Netdev List
 help / color / mirror / Atom feed
* [PATCH 03/10] net/macb: change debugging messages
From: Nicolas Ferre @ 2012-09-05  8:19 UTC (permalink / raw)
  To: netdev
  Cc: linux-arm-kernel, davem, havard, nicolas.ferre, plagnioj, jamie,
	linux-kernel, patrice.vilchez
In-Reply-To: <cover.1346775479.git.nicolas.ferre@atmel.com>

From: Havard Skinnemoen <havard@skinnemoen.net>

Convert some noisy netdev_dbg() statements to netdev_vdbg(). Defining
DEBUG will no longer fill up the logs; VERBOSE_DEBUG still does.
Add one more verbose debug for ISR status.

Signed-off-by: Havard Skinnemoen <havard@skinnemoen.net>
[nicolas.ferre@atmel.com: split patch in topics, add ISR status]
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/net/ethernet/cadence/macb.c |   22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 26ca01e..2228dfc 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -313,7 +313,7 @@ static void macb_tx(struct macb *bp)
 	status = macb_readl(bp, TSR);
 	macb_writel(bp, TSR, status);
 
-	netdev_dbg(bp->dev, "macb_tx status = %02lx\n", (unsigned long)status);
+	netdev_vdbg(bp->dev, "macb_tx status = %02lx\n", (unsigned long)status);
 
 	if (status & (MACB_BIT(UND) | MACB_BIT(TSR_RLE))) {
 		int i;
@@ -380,7 +380,7 @@ static void macb_tx(struct macb *bp)
 		if (!(bufstat & MACB_BIT(TX_USED)))
 			break;
 
-		netdev_dbg(bp->dev, "skb %u (data %p) TX complete\n",
+		netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
 			   tail, skb->data);
 		dma_unmap_single(&bp->pdev->dev, rp->mapping, skb->len,
 				 DMA_TO_DEVICE);
@@ -406,7 +406,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 
 	len = MACB_BFEXT(RX_FRMLEN, bp->rx_ring[last_frag].ctrl);
 
-	netdev_dbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
+	netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
 		   first_frag, last_frag, len);
 
 	skb = netdev_alloc_skb(bp->dev, len + RX_OFFSET);
@@ -453,7 +453,7 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 
 	bp->stats.rx_packets++;
 	bp->stats.rx_bytes += len;
-	netdev_dbg(bp->dev, "received skb of length %u, csum: %08x\n",
+	netdev_vdbg(bp->dev, "received skb of length %u, csum: %08x\n",
 		   skb->len, skb->csum);
 	netif_receive_skb(skb);
 
@@ -535,7 +535,7 @@ static int macb_poll(struct napi_struct *napi, int budget)
 
 	work_done = 0;
 
-	netdev_dbg(bp->dev, "poll: status = %08lx, budget = %d\n",
+	netdev_vdbg(bp->dev, "poll: status = %08lx, budget = %d\n",
 		   (unsigned long)status, budget);
 
 	work_done = macb_rx(bp, budget);
@@ -574,6 +574,8 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 			break;
 		}
 
+		netdev_vdbg(bp->dev, "isr = 0x%08lx\n", (unsigned long)status);
+
 		if (status & MACB_RX_INT_FLAGS) {
 			/*
 			 * There's no point taking any more interrupts
@@ -585,7 +587,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 			macb_writel(bp, IDR, MACB_RX_INT_FLAGS);
 
 			if (napi_schedule_prep(&bp->napi)) {
-				netdev_dbg(bp->dev, "scheduling RX softirq\n");
+				netdev_vdbg(bp->dev, "scheduling RX softirq\n");
 				__napi_schedule(&bp->napi);
 			}
 		}
@@ -647,8 +649,8 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	u32 ctrl;
 	unsigned long flags;
 
-#ifdef DEBUG
-	netdev_dbg(bp->dev,
+#if defined(DEBUG) && defined(VERBOSE_DEBUG)
+	netdev_vdbg(bp->dev,
 		   "start_xmit: len %u head %p data %p tail %p end %p\n",
 		   skb->len, skb->head, skb->data,
 		   skb_tail_pointer(skb), skb_end_pointer(skb));
@@ -670,12 +672,12 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	entry = bp->tx_head;
-	netdev_dbg(bp->dev, "Allocated ring entry %u\n", entry);
+	netdev_vdbg(bp->dev, "Allocated ring entry %u\n", entry);
 	mapping = dma_map_single(&bp->pdev->dev, skb->data,
 				 len, DMA_TO_DEVICE);
 	bp->tx_skb[entry].skb = skb;
 	bp->tx_skb[entry].mapping = mapping;
-	netdev_dbg(bp->dev, "Mapped skb data %p to DMA addr %08lx\n",
+	netdev_vdbg(bp->dev, "Mapped skb data %p to DMA addr %08lx\n",
 		   skb->data, (unsigned long)mapping);
 
 	ctrl = MACB_BF(TX_FRMLEN, len);
-- 
1.7.10

^ permalink raw reply related

* [PATCH 02/10] net/macb: memory barriers cleanup
From: Nicolas Ferre @ 2012-09-05  8:19 UTC (permalink / raw)
  To: netdev
  Cc: patrice.vilchez, nicolas.ferre, linux-kernel, havard, jamie,
	plagnioj, davem, linux-arm-kernel
In-Reply-To: <cover.1346775479.git.nicolas.ferre@atmel.com>

From: Havard Skinnemoen <havard@skinnemoen.net>

Remove a couple of unneeded barriers and document the remaining ones.

Signed-off-by: Havard Skinnemoen <havard@skinnemoen.net>
[nicolas.ferre@atmel.com: split patch in topics]
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/net/ethernet/cadence/macb.c |   18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 9a10f69..26ca01e 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -372,7 +372,9 @@ static void macb_tx(struct macb *bp)
 
 		BUG_ON(skb == NULL);
 
+		/* Make hw descriptor updates visible to CPU */
 		rmb();
+
 		bufstat = bp->tx_ring[tail].ctrl;
 
 		if (!(bufstat & MACB_BIT(TX_USED)))
@@ -415,7 +417,10 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 			if (frag == last_frag)
 				break;
 		}
+
+		/* Make descriptor updates visible to hardware */
 		wmb();
+
 		return 1;
 	}
 
@@ -436,12 +441,14 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 					       frag_len);
 		offset += RX_BUFFER_SIZE;
 		bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED);
-		wmb();
 
 		if (frag == last_frag)
 			break;
 	}
 
+	/* Make descriptor updates visible to hardware */
+	wmb();
+
 	skb->protocol = eth_type_trans(skb, bp->dev);
 
 	bp->stats.rx_packets++;
@@ -461,6 +468,8 @@ static void discard_partial_frame(struct macb *bp, unsigned int begin,
 
 	for (frag = begin; frag != end; frag = NEXT_RX(frag))
 		bp->rx_ring[frag].addr &= ~MACB_BIT(RX_USED);
+
+	/* Make descriptor updates visible to hardware */
 	wmb();
 
 	/*
@@ -479,7 +488,9 @@ static int macb_rx(struct macb *bp, int budget)
 	for (; budget > 0; tail = NEXT_RX(tail)) {
 		u32 addr, ctrl;
 
+		/* Make hw descriptor updates visible to CPU */
 		rmb();
+
 		addr = bp->rx_ring[tail].addr;
 		ctrl = bp->rx_ring[tail].ctrl;
 
@@ -674,6 +685,8 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	bp->tx_ring[entry].addr = mapping;
 	bp->tx_ring[entry].ctrl = ctrl;
+
+	/* Make newly initialized descriptor visible to hardware */
 	wmb();
 
 	entry = NEXT_TX(entry);
@@ -782,9 +795,6 @@ static void macb_init_rings(struct macb *bp)
 
 static void macb_reset_hw(struct macb *bp)
 {
-	/* Make sure we have the write buffer for ourselves */
-	wmb();
-
 	/*
 	 * Disable RX and TX (XXX: Should we halt the transmission
 	 * more gracefully?)
-- 
1.7.10

^ permalink raw reply related

* [PATCH 01/10] net/macb: Add support for Gigabit Ethernet mode
From: Nicolas Ferre @ 2012-09-05  8:19 UTC (permalink / raw)
  To: netdev
  Cc: linux-arm-kernel, davem, havard, nicolas.ferre, plagnioj, jamie,
	linux-kernel, patrice.vilchez
In-Reply-To: <cover.1346775479.git.nicolas.ferre@atmel.com>

From: Patrice Vilchez <patrice.vilchez@atmel.com>

Add Gigabit Ethernet mode to GEM cadence IP and enable RGMII connection.

Signed-off-by: Patrice Vilchez <patrice.vilchez@atmel.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/net/ethernet/cadence/macb.c |   15 ++++++++++++---
 drivers/net/ethernet/cadence/macb.h |    4 ++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 033064b..9a10f69 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -152,13 +152,17 @@ static void macb_handle_link_change(struct net_device *dev)
 
 			reg = macb_readl(bp, NCFGR);
 			reg &= ~(MACB_BIT(SPD) | MACB_BIT(FD));
+			if (macb_is_gem(bp))
+				reg &= ~GEM_BIT(GBE);
 
 			if (phydev->duplex)
 				reg |= MACB_BIT(FD);
 			if (phydev->speed == SPEED_100)
 				reg |= MACB_BIT(SPD);
+			if (phydev->speed == SPEED_1000)
+				reg |= GEM_BIT(GBE);
 
-			macb_writel(bp, NCFGR, reg);
+			macb_or_gem_writel(bp, NCFGR, reg);
 
 			bp->speed = phydev->speed;
 			bp->duplex = phydev->duplex;
@@ -216,7 +220,10 @@ static int macb_mii_probe(struct net_device *dev)
 	}
 
 	/* mask with MAC supported features */
-	phydev->supported &= PHY_BASIC_FEATURES;
+	if (macb_is_gem(bp))
+		phydev->supported &= PHY_GBIT_FEATURES;
+	else
+		phydev->supported &= PHY_BASIC_FEATURES;
 
 	phydev->advertising = phydev->supported;
 
@@ -1384,7 +1391,9 @@ static int __init macb_probe(struct platform_device *pdev)
 		bp->phy_interface = err;
 	}
 
-	if (bp->phy_interface == PHY_INTERFACE_MODE_RMII)
+	if (bp->phy_interface == PHY_INTERFACE_MODE_RGMII)
+		macb_or_gem_writel(bp, USRIO, GEM_BIT(RGMII));
+	else if (bp->phy_interface == PHY_INTERFACE_MODE_RMII)
 #if defined(CONFIG_ARCH_AT91)
 		macb_or_gem_writel(bp, USRIO, (MACB_BIT(RMII) |
 					       MACB_BIT(CLKEN)));
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 335e288..f69ceef 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -145,6 +145,8 @@
 #define MACB_IRXFCS_SIZE			1
 
 /* GEM specific NCFGR bitfields. */
+#define GEM_GBE_OFFSET				10
+#define GEM_GBE_SIZE				1
 #define GEM_CLK_OFFSET				18
 #define GEM_CLK_SIZE				3
 #define GEM_DBW_OFFSET				21
@@ -246,6 +248,8 @@
 /* Bitfields in USRIO (AT91) */
 #define MACB_RMII_OFFSET			0
 #define MACB_RMII_SIZE				1
+#define GEM_RGMII_OFFSET			0	/* GEM gigabit mode */
+#define GEM_RGMII_SIZE				1
 #define MACB_CLKEN_OFFSET			1
 #define MACB_CLKEN_SIZE				1
 
-- 
1.7.10

^ permalink raw reply related

* [PATCH 00/10] net/macb: driver enhancement concerning GEM support, ring logic and cleanup
From: Nicolas Ferre @ 2012-09-05  8:19 UTC (permalink / raw)
  To: netdev
  Cc: patrice.vilchez, nicolas.ferre, linux-kernel, havard, jamie,
	plagnioj, davem, linux-arm-kernel

This is an enhancement work that began several years ago. I try to catchup with
some performance improvement that has been implemented then by Havard.
The ring index logic and the TX error path modification are the biggest changes
but some cleanup/debugging have been added along the way.
The GEM revision will benefit from the Gigabit support.

The series has been tested on several Atmel AT91 SoC with the two MACB/GEM
flavors.

Havard Skinnemoen (5):
  net/macb: memory barriers cleanup
  net/macb: change debugging messages
  net/macb: Fix a race in macb_start_xmit()
  net/macb: clean up ring buffer logic
  net/macb: Offset first RX buffer by two bytes

Nicolas Ferre (4):
  net/macb: better manage tx errors
  net/macb: tx status is more than 8 bits now
  net/macb: macb_get_drvinfo: add GEM/MACB suffix to differentiate
    revision
  net/macb: ethtool interface: add register dump feature

Patrice Vilchez (1):
  net/macb: Add support for Gigabit Ethernet mode

 drivers/net/ethernet/cadence/macb.c |  408 ++++++++++++++++++++++++-----------
 drivers/net/ethernet/cadence/macb.h |   29 ++-
 2 files changed, 304 insertions(+), 133 deletions(-)

-- 
1.7.10

^ permalink raw reply

* Re: [PATCH v7 1/1] ieee802154: MRF24J40 driver
From: Alexander Smirnov @ 2012-09-05  8:05 UTC (permalink / raw)
  To: David Miller
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-zigbee-devel-5NWGOfrQmneRv+LV9MX5uipxlwaOVQ5f,
	alan-yzvJWuRpmD1zbRFIqnYvSA
In-Reply-To: <20120904.144630.82568755842479462.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>

Dear colleagues,

2012/9/4 David Miller <davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>:
> From: Alan Ott <alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
> Date: Sun,  2 Sep 2012 21:44:13 -0400
>
>> Driver for the Microchip MRF24J40 802.15.4 WPAN module.

I wan on vacation so had no possibility to review the code.
Alan, thank you for the contribution!

>>
>> Signed-off-by: Alan Ott <alan-yzvJWuRpmD1zbRFIqnYvSA@public.gmane.org>
>
> Applied to net-next, thanks.

Thanks David.

Alex

------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/

^ permalink raw reply

* Re: [Bug 47021] New: kernel panic with l2tpv3 & mtu > 1500
From: a1 @ 2012-09-05  7:45 UTC (permalink / raw)
  To: Stephen Hemminger; +Cc: Eric Dumazet, netdev
In-Reply-To: <20120904125458.7d97ec38@nehalam.linuxnetplumber.net>

Thanks, it definitely helped, no more panics now...

jn

>>>
>>
>> Seems following patch is needed, not sure if it helps
>>
>> [PATCH] l2tp: fix a typo in l2tp_eth_dev_recv()
>>
>> While investigating l2tp bug, I hit a bug in eth_type_trans(),
>> because not enough bytes were pulled in skb head.
>>
>> Signed-off-by: Eric Dumazet <edumazet@google.com>
>> ---
>>  net/l2tp/l2tp_eth.c |    2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
>> index f9ee74d..3bfb34a 100644
>> --- a/net/l2tp/l2tp_eth.c
>> +++ b/net/l2tp/l2tp_eth.c
>> @@ -153,7 +153,7 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb,
>>  		print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, skb->data, length);
>>  	}
>>  
>> -	if (!pskb_may_pull(skb, sizeof(ETH_HLEN)))
>> +	if (!pskb_may_pull(skb, ETH_HLEN))
>>  		goto error;
> 
> I guess nobody ever looked inside this code. That seems like an obvious bug.
> 

^ permalink raw reply

* Re: [PATCH 1/4] net: mvneta: driver for Marvell Armada 370/XP network unit
From: Thomas Petazzoni @ 2012-09-05  7:32 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: Lior Amsalem, Ike Pan, Nadav Haklai, Ian Molton,
	Lennert Buytenhek, David Marlin, Rami Rosen, Yehuda Yitschak,
	Jani Monoses, Tawfik Bayouk, Dan Frazier, Eran Ben-Avi, Li Li,
	Leif Lindholm, Sebastian Hesselbarth, Jason Cooper, Arnd Bergmann,
	Jon Masters, Ben Dooks, Gregory Clement, linux-arm-kernel,
	Chris Van Hoof, Nicolas Pitre, netdev
In-Reply-To: <20120904183125.GB14683@lunn.ch>

Hello Andrew,

Le Tue, 4 Sep 2012 20:31:25 +0200,
Andrew Lunn <andrew@lunn.ch> a écrit :

> I've used Marvell switch chipsets, which have a phy polling unit,
> PPU. This sounds very similar. You can do a lot with the PPU, but when
> you want to configure subsets of auto-negotiation rates/duplex modes,
> or fixed speeds/duplex modes, the PPU could not do it. You had to
> disable the PPU and configure the PHY directly.
> 
> I see you have some of the ethtools API calls implemented, but not the
> ones needed for auto-neg and rates/duplex mode configurations. Does
> the neta PPU support this, or will you need to export the MDIO bus for
> these sorts of configuration options?

There's nothing named 'PPU' in the datasheet for Armada XP/370, but the
Ethernet controller registers indeed allow to change the
auto-negotiation, duplex and speed without talking to the PHY.

In addition to the GMAC_STATUS register (offset 0x2C10) that the driver
currently uses to find out the state of the link (up/down, speed,
duplex), there is a "Port Auto-Negotiation Configuration
Register" (offset 0x2C0C), which allows to set:

 * Manual duplex or auto duplex detection
 * If manual duplex, choose full/half
 * Manual or automatic detection of flow control
 * If manual flow control, decide the flow control value
 * Manual or automatic detection of speed
 * If manual speed, set the speed value

And a few other things. So indeed, we can configure the PHY parameters
without having to talk to the PHY directly.

Best regards,

Thomas
-- 
Thomas Petazzoni, Free Electrons
Kernel, drivers, real-time and embedded Linux
development, consulting, training and support.
http://free-electrons.com

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply

* Re: [PATCH] usbnet: drop unneeded check for NULL
From: Oliver Neukum @ 2012-09-05  6:24 UTC (permalink / raw)
  To: Richard Cochran; +Cc: David Miller, netdev
In-Reply-To: <20120905044712.GA2284@netboy.at.omicron.at>

On Wednesday 05 September 2012 06:47:12 Richard Cochran wrote:
> and so I think the problem that the test addresses is still present,
> or am I missing something?

No,

you are right. Thank you.

Dave, for now, please don't apply this patch. In the long run, this crap
in cdc-ncm needs to go. I am starting rewriting this driver right now.

	Regards
		Oliver

^ permalink raw reply

* [PATCH] ath6kl: use list_move_tail instead of list_del/list_add_tail
From: Wei Yongjun @ 2012-09-05  7:07 UTC (permalink / raw)
  To: kvalo, linville; +Cc: yongjun_wei, linux-wireless, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

Using list_move_tail() instead of list_del() + list_add_tail().

spatch with a semantic match is used to found this problem.
(http://coccinelle.lip6.fr/)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 drivers/net/wireless/ath/ath6kl/htc_pipe.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ath/ath6kl/htc_pipe.c b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
index f9626c7..ba6bd49 100644
--- a/drivers/net/wireless/ath/ath6kl/htc_pipe.c
+++ b/drivers/net/wireless/ath/ath6kl/htc_pipe.c
@@ -374,9 +374,8 @@ static enum htc_send_queue_result htc_try_send(struct htc_target *target,
 				packet = list_first_entry(txq,
 							  struct htc_packet,
 							  list);
-				list_del(&packet->list);
-				/* insert into local queue */
-				list_add_tail(&packet->list, &send_queue);
+				/* move to local queue */
+				list_move_tail(&packet->list, &send_queue);
 			}
 
 			/*
@@ -399,11 +398,10 @@ static enum htc_send_queue_result htc_try_send(struct htc_target *target,
 					 * for cleanup */
 				} else {
 					/* callback wants to keep this packet,
-					 * remove from caller's queue */
-					list_del(&packet->list);
-					/* put it in the send queue */
-					list_add_tail(&packet->list,
-						      &send_queue);
+					 * move from caller's queue to the send
+					 * queue */
+					list_move_tail(&packet->list,
+						       &send_queue);
 				}
 
 			}

^ permalink raw reply related

* [PATCH] bnx2x: use list_move_tail instead of list_del/list_add_tail
From: Wei Yongjun @ 2012-09-05  7:06 UTC (permalink / raw)
  To: eilong; +Cc: yongjun_wei, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

Using list_move_tail() instead of list_del() + list_add_tail().

spatch with a semantic match is used to found this problem.
(http://coccinelle.lip6.fr/)

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 62f754b..5a5fbf5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -229,8 +229,7 @@ static inline int bnx2x_exe_queue_step(struct bnx2x *bp,
 			 */
 			list_add_tail(&spacer.link, &o->pending_comp);
 			mb();
-			list_del(&elem->link);
-			list_add_tail(&elem->link, &o->pending_comp);
+			list_move_tail(&elem->link, &o->pending_comp);
 			list_del(&spacer.link);
 		} else
 			break;

^ permalink raw reply related

* Re: Commit "ipconfig wait for carrier" makes boot hang for 2 mins if no carrier
From: Joakim Tjernlund @ 2012-09-05  7:04 UTC (permalink / raw)
  To: Micha Nelissen; +Cc: netdev
In-Reply-To: <50465791.40805@neli.hopto.org>

Micha Nelissen <micha@neli.hopto.org> wrote on 2012/09/04 21:33:37:
>
> Joakim Tjernlund wrote:
> >>  Why not set the IP address then in your rootfs yourself?  Micha
> >
> > I could ask you the same question, why do you need to have nfs in kernel?
>
> Because that's where my root filesystem is? The IP autoconfiguration
> code exists for this purpose.

This is not the only purpose.

>
> > The answer is probably the same, it is much easier to
> > manage our IP config in one place for our embedded system.
>
> You retrieve the kernel via TFTP or so when booting?

Yes, but mostly not. This really doesn't matter

>
> > I don't understand why you need 2 minutes timeout for carrier either?
>
> Just a safe value.
>
> > The wait should be conditional on NFS root or not so that non NFS roots
> > can skip this stage altogether.
>
> Feel free to submit a patch :-)

Your patch broke other use cases so my patch would just revert or change the tmo
to 2 secs or so.
Or you could clean up your stuff so it works for all and not just for you.

 Jocke

^ permalink raw reply

* [V2 PATCH 2/9] csiostor: Chelsio FCoE offload driver submission (sources part 2).
From: Naresh Kumar Inna @ 2012-09-05 12:33 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1346848442-4573-1-git-send-email-naresh@chelsio.com>

This patch contains code for driver initialization, driver resource
allocation and the Work Request module functionality. Driver initialization
includes module entry/exit points, registration with PCI, FC transport and
SCSI mid layer subsystems. The Work Request module provides services for
allocation of DMA queues, posting Work Requests on them and processing
completions.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
V2: Removed module parameters.

 drivers/scsi/csiostor/csio_init.c | 1276 +++++++++++++++++++++++++++++
 drivers/scsi/csiostor/csio_wr.c   | 1632 +++++++++++++++++++++++++++++++++++++
 2 files changed, 2908 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_init.c
 create mode 100644 drivers/scsi/csiostor/csio_wr.c

diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c
new file mode 100644
index 0000000..383f0dc
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_init.c
@@ -0,0 +1,1276 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kdebug.h>
+#include <linux/version.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/string.h>
+#include <linux/export.h>
+
+#include "csio_init.h"
+#include "csio_defs.h"
+
+#define CSIO_MIN_MEMPOOL_SZ	64
+
+static struct dentry *csio_debugfs_root;
+
+static struct scsi_transport_template *csio_fcoe_transport;
+static struct scsi_transport_template *csio_fcoe_transport_vport;
+
+/*
+ * debugfs support
+ */
+static int
+csio_mem_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t
+csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	loff_t pos = *ppos;
+	loff_t avail = file->f_path.dentry->d_inode->i_size;
+	unsigned int mem = (uintptr_t)file->private_data & 3;
+	struct csio_hw *hw = file->private_data - mem;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= avail)
+		return 0;
+	if (count > avail - pos)
+		count = avail - pos;
+
+	while (count) {
+		size_t len;
+		int ret, ofst;
+		__be32 data[16];
+
+		if (mem == MEM_MC)
+			ret = csio_hw_mc_read(hw, pos, data, NULL);
+		else
+			ret = csio_hw_edc_read(hw, mem, pos, data, NULL);
+		if (ret)
+			return ret;
+
+		ofst = pos % sizeof(data);
+		len = min(count, sizeof(data) - ofst);
+		if (copy_to_user(buf, (u8 *)data + ofst, len))
+			return -EFAULT;
+
+		buf += len;
+		pos += len;
+		count -= len;
+	}
+	count = pos - *ppos;
+	*ppos = pos;
+	return count;
+}
+
+static const struct file_operations csio_mem_debugfs_fops = {
+	.owner   = THIS_MODULE,
+	.open    = csio_mem_open,
+	.read    = csio_mem_read,
+	.llseek  = default_llseek,
+};
+
+static void __devinit
+csio_add_debugfs_mem(struct csio_hw *hw, const char *name,
+		     unsigned int idx, unsigned int size_mb)
+{
+	struct dentry *de;
+
+	de = debugfs_create_file(name, S_IRUSR, hw->debugfs_root,
+				 (void *)hw + idx, &csio_mem_debugfs_fops);
+	if (de && de->d_inode)
+		de->d_inode->i_size = size_mb << 20;
+}
+
+static int __devinit
+csio_setup_debugfs(struct csio_hw *hw)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(hw->debugfs_root))
+		return -1;
+
+	i = csio_rd_reg32(hw, MA_TARGET_MEM_ENABLE);
+	if (i & EDRAM0_ENABLE)
+		csio_add_debugfs_mem(hw, "edc0", MEM_EDC0, 5);
+	if (i & EDRAM1_ENABLE)
+		csio_add_debugfs_mem(hw, "edc1", MEM_EDC1, 5);
+	if (i & EXT_MEM_ENABLE)
+		csio_add_debugfs_mem(hw, "mc", MEM_MC,
+		      EXT_MEM_SIZE_GET(csio_rd_reg32(hw, MA_EXT_MEMORY_BAR)));
+	return 0;
+}
+
+/*
+ * csio_dfs_create - Creates debug filesystem and proc fs for the given HW.
+ *
+ */
+static int
+csio_dfs_create(struct csio_hw *hw)
+{
+	if (csio_debugfs_root) {
+		hw->debugfs_root = debugfs_create_dir(pci_name(hw->pdev),
+							csio_debugfs_root);
+		csio_setup_debugfs(hw);
+	}
+
+	return 0;
+}
+
+/*
+ * csio_dfs_destroy - Deletes debugfs and procfs entries for the given HW.
+ *
+ */
+static int
+csio_dfs_destroy(struct csio_hw *hw)
+{
+	if (hw->debugfs_root)
+		debugfs_remove_recursive(hw->debugfs_root);
+
+	return 0;
+}
+
+/*
+ * csio_dfs_init - Debug filesystem initialization.
+ *
+ * This is function is called during driver load to initialize debugfs, procfs
+ * used for debugging.
+ */
+static int
+csio_dfs_init(void)
+{
+	csio_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!csio_debugfs_root)
+		pr_warn("Could not create debugfs entry, continuing\n");
+
+	return 0;
+}
+
+/*
+ * csio_dfs_exit - Cleans up debugfs and  procfs created during driver load.
+ * Function that gets called in the unload path.
+ */
+static void
+csio_dfs_exit(void)
+{
+	debugfs_remove(csio_debugfs_root);
+}
+
+/*
+ * csio_pci_init - PCI initialization.
+ * @pdev: PCI device.
+ * @bars: Bitmask of bars to be requested.
+ *
+ * Initializes the PCI function by enabling MMIO, setting bus
+ * mastership and setting DMA mask.
+ */
+static int
+csio_pci_init(struct pci_dev *pdev, int *bars)
+{
+	int rv = -ENODEV;
+
+	*bars = pci_select_bars(pdev, IORESOURCE_MEM);
+
+	if (pci_enable_device_mem(pdev))
+		goto err;
+
+	if (pci_request_selected_regions(pdev, *bars, KBUILD_MODNAME))
+		goto err_disable_device;
+
+	pci_set_master(pdev);
+	pci_try_set_mwi(pdev);
+
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	} else {
+		dev_err(&pdev->dev, "No suitable DMA available.\n");
+		goto err_release_regions;
+	}
+
+	return 0;
+
+err_release_regions:
+	pci_release_selected_regions(pdev, *bars);
+err_disable_device:
+	pci_disable_device(pdev);
+err:
+	return rv;
+
+}
+
+/*
+ * csio_pci_exit - PCI unitialization.
+ * @pdev: PCI device.
+ * @bars: Bars to be released.
+ *
+ */
+static void
+csio_pci_exit(struct pci_dev *pdev, int *bars)
+{
+	pci_release_selected_regions(pdev, *bars);
+	pci_disable_device(pdev);
+}
+
+/*
+ * csio_hw_init_workers - Initialize the HW module's worker threads.
+ * @hw: HW module.
+ *
+ */
+static void
+csio_hw_init_workers(struct csio_hw *hw)
+{
+	INIT_WORK(&hw->evtq_work, csio_evtq_worker);
+}
+
+static void
+csio_hw_exit_workers(struct csio_hw *hw)
+{
+	cancel_work_sync(&hw->evtq_work);
+	flush_scheduled_work();
+}
+
+static int
+csio_create_queues(struct csio_hw *hw)
+{
+	int i, j;
+	struct csio_mgmtm *mgmtm = csio_hw_to_mgmtm(hw);
+	int rv;
+	struct csio_scsi_cpu_info *info;
+
+	if (hw->flags & CSIO_HWF_Q_FW_ALLOCED)
+		return 0;
+
+	if (hw->intr_mode != CSIO_IM_MSIX) {
+		rv = csio_wr_iq_create(hw, NULL, hw->intr_iq_idx,
+					0, hw->pport[0].portid, false, NULL);
+		if (rv != 0) {
+			csio_err(hw, " Forward Interrupt IQ failed!: %d\n", rv);
+			return rv;
+		}
+	}
+
+	/* FW event queue */
+	rv = csio_wr_iq_create(hw, NULL, hw->fwevt_iq_idx,
+			       csio_get_fwevt_intr_idx(hw),
+			       hw->pport[0].portid, true, NULL);
+	if (rv != 0) {
+		csio_err(hw, "FW event IQ config failed!: %d\n", rv);
+		return rv;
+	}
+
+	/* Create mgmt queue */
+	rv = csio_wr_eq_create(hw, NULL, mgmtm->eq_idx,
+			mgmtm->iq_idx, hw->pport[0].portid, NULL);
+
+	if (rv != 0) {
+		csio_err(hw, "Mgmt EQ create failed!: %d\n", rv);
+		goto err;
+	}
+
+	/* Create SCSI queues */
+	for (i = 0; i < hw->num_pports; i++) {
+		info = &hw->scsi_cpu_info[i];
+
+		for (j = 0; j < info->max_cpus; j++) {
+			struct csio_scsi_qset *sqset = &hw->sqset[i][j];
+
+			rv = csio_wr_iq_create(hw, NULL, sqset->iq_idx,
+					       sqset->intr_idx, i, false, NULL);
+			if (rv != 0) {
+				csio_err(hw,
+				   "SCSI module IQ config failed [%d][%d]:%d\n",
+				   i, j, rv);
+				goto err;
+			}
+			rv = csio_wr_eq_create(hw, NULL, sqset->eq_idx,
+					       sqset->iq_idx, i, NULL);
+			if (rv != 0) {
+				csio_err(hw,
+				   "SCSI module EQ config failed [%d][%d]:%d\n",
+				   i, j, rv);
+				goto err;
+			}
+		} /* for all CPUs */
+	} /* For all ports */
+
+	hw->flags |= CSIO_HWF_Q_FW_ALLOCED;
+	return 0;
+err:
+	csio_wr_destroy_queues(hw, true);
+	return -EINVAL;
+}
+
+/*
+ * csio_config_queues - Configure the DMA queues.
+ * @hw: HW module.
+ *
+ * Allocates memory for queues are registers them with FW.
+ */
+int
+csio_config_queues(struct csio_hw *hw)
+{
+	int i, j, idx, k = 0;
+	int rv;
+	struct csio_scsi_qset *sqset;
+	struct csio_mgmtm *mgmtm = csio_hw_to_mgmtm(hw);
+	struct csio_scsi_qset *orig;
+	struct csio_scsi_cpu_info *info;
+
+	if (hw->flags & CSIO_HWF_Q_MEM_ALLOCED)
+		return csio_create_queues(hw);
+
+	/* Calculate number of SCSI queues for MSIX we would like */
+	hw->num_scsi_msix_cpus = num_online_cpus();
+	hw->num_sqsets = num_online_cpus() * hw->num_pports;
+
+	if (hw->num_sqsets > CSIO_MAX_SCSI_QSETS) {
+		hw->num_sqsets = CSIO_MAX_SCSI_QSETS;
+		hw->num_scsi_msix_cpus = CSIO_MAX_SCSI_CPU;
+	}
+
+	/* Initialize max_cpus, may get reduced during msix allocations */
+	for (i = 0; i < hw->num_pports; i++)
+		hw->scsi_cpu_info[i].max_cpus = hw->num_scsi_msix_cpus;
+
+	csio_dbg(hw, "nsqsets:%d scpus:%d\n",
+		    hw->num_sqsets, hw->num_scsi_msix_cpus);
+
+	csio_intr_enable(hw);
+
+	if (hw->intr_mode != CSIO_IM_MSIX) {
+
+		/* Allocate Forward interrupt iq. */
+		hw->intr_iq_idx = csio_wr_alloc_q(hw, CSIO_INTR_IQSIZE,
+						CSIO_INTR_WRSIZE, CSIO_INGRESS,
+						(void *)hw, 0, 0, NULL);
+		if (hw->intr_iq_idx == -1) {
+			csio_err(hw,
+				 "Forward interrupt queue creation failed\n");
+			goto intr_disable;
+		}
+	}
+
+	/* Allocate the FW evt queue */
+	hw->fwevt_iq_idx = csio_wr_alloc_q(hw, CSIO_FWEVT_IQSIZE,
+					   CSIO_FWEVT_WRSIZE,
+					   CSIO_INGRESS, (void *)hw,
+					   CSIO_FWEVT_FLBUFS, 0,
+					   csio_fwevt_intx_handler);
+	if (hw->fwevt_iq_idx == -1) {
+		csio_err(hw, "FW evt queue creation failed\n");
+		goto intr_disable;
+	}
+
+	/* Allocate the mgmt queue */
+	mgmtm->eq_idx = csio_wr_alloc_q(hw, CSIO_MGMT_EQSIZE,
+				      CSIO_MGMT_EQ_WRSIZE,
+				      CSIO_EGRESS, (void *)hw, 0, 0, NULL);
+	if (mgmtm->eq_idx == -1) {
+		csio_err(hw, "Failed to alloc egress queue for mgmt module\n");
+		goto intr_disable;
+	}
+
+	/* Use FW IQ for MGMT req completion */
+	mgmtm->iq_idx = hw->fwevt_iq_idx;
+
+	/* Allocate SCSI queues */
+	for (i = 0; i < hw->num_pports; i++) {
+		info = &hw->scsi_cpu_info[i];
+
+		for (j = 0; j < hw->num_scsi_msix_cpus; j++) {
+			sqset = &hw->sqset[i][j];
+
+			if (j >= info->max_cpus) {
+				k = j % info->max_cpus;
+				orig = &hw->sqset[i][k];
+				sqset->eq_idx = orig->eq_idx;
+				sqset->iq_idx = orig->iq_idx;
+				continue;
+			}
+
+			idx = csio_wr_alloc_q(hw, csio_scsi_eqsize, 0,
+					      CSIO_EGRESS, (void *)hw, 0, 0,
+					      NULL);
+			if (idx == -1) {
+				csio_err(hw, "EQ creation failed for idx:%d\n",
+					    idx);
+				goto intr_disable;
+			}
+
+			sqset->eq_idx = idx;
+
+			idx = csio_wr_alloc_q(hw, CSIO_SCSI_IQSIZE,
+					     CSIO_SCSI_IQ_WRSZ, CSIO_INGRESS,
+					     (void *)hw, 0, 0,
+					     csio_scsi_intx_handler);
+			if (idx == -1) {
+				csio_err(hw, "IQ creation failed for idx:%d\n",
+					    idx);
+				goto intr_disable;
+			}
+			sqset->iq_idx = idx;
+		} /* for all CPUs */
+	} /* For all ports */
+
+	hw->flags |= CSIO_HWF_Q_MEM_ALLOCED;
+
+	rv = csio_create_queues(hw);
+	if (rv != 0)
+		goto intr_disable;
+
+	/*
+	 * Now request IRQs for the vectors. In the event of a failure,
+	 * cleanup is handled internally by this function.
+	 */
+	rv = csio_request_irqs(hw);
+	if (rv != 0)
+		return -EINVAL;
+
+	return 0;
+
+intr_disable:
+	csio_intr_disable(hw, false);
+
+	return -EINVAL;
+}
+
+static int
+csio_resource_alloc(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	int rv = -ENOMEM;
+
+	wrm->num_q = ((CSIO_MAX_SCSI_QSETS * 2) + CSIO_HW_NIQ +
+		       CSIO_HW_NEQ + CSIO_HW_NFLQ + CSIO_HW_NINTXQ);
+
+	hw->mb_mempool = mempool_create_kmalloc_pool(CSIO_MIN_MEMPOOL_SZ,
+						  sizeof(struct csio_mb));
+	if (!hw->mb_mempool)
+		goto err;
+
+	hw->rnode_mempool = mempool_create_kmalloc_pool(CSIO_MIN_MEMPOOL_SZ,
+						     sizeof(struct csio_rnode));
+	if (!hw->rnode_mempool)
+		goto err_free_mb_mempool;
+
+	hw->scsi_pci_pool = pci_pool_create("csio_scsi_pci_pool", hw->pdev,
+					    CSIO_SCSI_RSP_LEN, 8, 0);
+	if (!hw->scsi_pci_pool)
+		goto err_free_rn_pool;
+
+	return 0;
+
+err_free_rn_pool:
+	mempool_destroy(hw->rnode_mempool);
+	hw->rnode_mempool = NULL;
+err_free_mb_mempool:
+	mempool_destroy(hw->mb_mempool);
+	hw->mb_mempool = NULL;
+err:
+	return rv;
+}
+
+static void
+csio_resource_free(struct csio_hw *hw)
+{
+	pci_pool_destroy(hw->scsi_pci_pool);
+	hw->scsi_pci_pool = NULL;
+	mempool_destroy(hw->rnode_mempool);
+	hw->rnode_mempool = NULL;
+	mempool_destroy(hw->mb_mempool);
+	hw->mb_mempool = NULL;
+}
+
+/*
+ * csio_hw_alloc - Allocate and initialize the HW module.
+ * @pdev: PCI device.
+ *
+ * Allocates HW structure, DMA, memory resources, maps BARS to
+ * host memory and initializes HW module.
+ */
+static struct csio_hw * __devinit
+csio_hw_alloc(struct pci_dev *pdev)
+{
+	struct csio_hw *hw;
+
+	hw = kzalloc(sizeof(struct csio_hw), GFP_KERNEL);
+	if (!hw)
+		goto err;
+
+	hw->pdev = pdev;
+	strncpy(hw->drv_version, CSIO_DRV_VERSION, 32);
+
+	/* memory pool/DMA pool allocation */
+	if (csio_resource_alloc(hw))
+		goto err_free_hw;
+
+	/* Get the start address of registers from BAR 0 */
+	hw->regstart = ioremap_nocache(pci_resource_start(pdev, 0),
+				       pci_resource_len(pdev, 0));
+	if (!hw->regstart) {
+		csio_err(hw, "Could not map BAR 0, regstart = %p\n",
+			 hw->regstart);
+		goto err_resource_free;
+	}
+
+	csio_hw_init_workers(hw);
+
+	if (csio_hw_init(hw))
+		goto err_unmap_bar;
+
+	csio_dfs_create(hw);
+
+	csio_dbg(hw, "hw:%p\n", hw);
+
+	return hw;
+
+err_unmap_bar:
+	csio_hw_exit_workers(hw);
+	iounmap(hw->regstart);
+err_resource_free:
+	csio_resource_free(hw);
+err_free_hw:
+	kfree(hw);
+err:
+	return NULL;
+}
+
+/*
+ * csio_hw_free - Uninitialize and free the HW module.
+ * @hw: The HW module
+ *
+ * Disable interrupts, uninit the HW module, free resources, free hw.
+ */
+static void
+csio_hw_free(struct csio_hw *hw)
+{
+	csio_intr_disable(hw, true);
+	csio_hw_exit_workers(hw);
+	csio_hw_exit(hw);
+	iounmap(hw->regstart);
+	csio_dfs_destroy(hw);
+	csio_resource_free(hw);
+	kfree(hw);
+}
+
+/**
+ * csio_shost_init - Create and initialize the lnode module.
+ * @hw:		The HW module.
+ * @dev:	The device associated with this invocation.
+ * @probe:	Called from probe context or not?
+ * @os_pln:	Parent lnode if any.
+ *
+ * Allocates lnode structure via scsi_host_alloc, initializes
+ * shost, initializes lnode module and registers with SCSI ML
+ * via scsi_host_add. This function is shared between physical and
+ * virtual node ports.
+ */
+struct csio_lnode *
+csio_shost_init(struct csio_hw *hw, struct device *dev,
+		  bool probe, struct csio_lnode *pln)
+{
+	struct Scsi_Host  *shost = NULL;
+	struct csio_lnode *ln;
+
+	csio_fcoe_shost_template.cmd_per_lun = csio_lun_qdepth;
+	csio_fcoe_shost_vport_template.cmd_per_lun = csio_lun_qdepth;
+
+	/*
+	 * hw->pdev is the physical port's PCI dev structure,
+	 * which will be different from the NPIV dev structure.
+	 */
+	if (dev == &hw->pdev->dev)
+		shost = scsi_host_alloc(
+				&csio_fcoe_shost_template,
+				sizeof(struct csio_lnode));
+	else
+		shost = scsi_host_alloc(
+				&csio_fcoe_shost_vport_template,
+				sizeof(struct csio_lnode));
+
+	if (!shost)
+		goto err;
+
+	ln = shost_priv(shost);
+	memset(ln, 0, sizeof(struct csio_lnode));
+
+	/* Link common lnode to this lnode */
+	ln->dev_num = (shost->host_no << 16);
+
+	shost->can_queue = CSIO_MAX_QUEUE;
+	shost->this_id = -1;
+	shost->unique_id = shost->host_no;
+	shost->max_cmd_len = 16; /* Max CDB length supported */
+	shost->max_id = min_t(uint32_t, csio_fcoe_rnodes,
+			      hw->fres_info.max_ssns);
+	shost->max_lun = CSIO_MAX_LUN;
+	if (dev == &hw->pdev->dev)
+		shost->transportt = csio_fcoe_transport;
+	else
+		shost->transportt = csio_fcoe_transport_vport;
+
+	/* root lnode */
+	if (!hw->rln)
+		hw->rln = ln;
+
+	/* Other initialization here: Common, Transport specific */
+	if (csio_lnode_init(ln, hw, pln))
+		goto err_shost_put;
+
+	if (scsi_add_host(shost, dev))
+		goto err_lnode_exit;
+
+	return ln;
+
+err_lnode_exit:
+	csio_lnode_exit(ln);
+err_shost_put:
+	scsi_host_put(shost);
+err:
+	return NULL;
+}
+
+/**
+ * csio_shost_exit - De-instantiate the shost.
+ * @ln:		The lnode module corresponding to the shost.
+ *
+ */
+void
+csio_shost_exit(struct csio_lnode *ln)
+{
+	struct Scsi_Host *shost = csio_ln_to_shost(ln);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	/* Inform transport */
+	fc_remove_host(shost);
+
+	/* Inform SCSI ML */
+	scsi_remove_host(shost);
+
+	/* Flush all the events, so that any rnode removal events
+	 * already queued are all handled, before we remove the lnode.
+	 */
+	spin_lock_irq(&hw->lock);
+	csio_evtq_flush(hw);
+	spin_unlock_irq(&hw->lock);
+
+	csio_lnode_exit(ln);
+	scsi_host_put(shost);
+}
+
+struct csio_lnode *
+csio_lnode_alloc(struct csio_hw *hw)
+{
+	return csio_shost_init(hw, &hw->pdev->dev, false, NULL);
+}
+
+void
+csio_lnodes_block_request(struct csio_hw *hw)
+{
+	struct Scsi_Host  *shost;
+	struct csio_lnode *sln;
+	struct csio_lnode *ln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "Failed to allocate lnodes_list");
+		return;
+	}
+
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		lnode_list[cur_cnt++] = sln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "Blocking IOs on lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		shost = csio_ln_to_shost(ln);
+		scsi_block_requests(shost);
+
+	}
+	kfree(lnode_list);
+}
+
+void
+csio_lnodes_unblock_request(struct csio_hw *hw)
+{
+	struct csio_lnode *ln;
+	struct Scsi_Host  *shost;
+	struct csio_lnode *sln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "Failed to allocate lnodes_list");
+		return;
+	}
+
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		lnode_list[cur_cnt++] = sln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "unblocking IOs on lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		shost = csio_ln_to_shost(ln);
+		scsi_unblock_requests(shost);
+	}
+	kfree(lnode_list);
+}
+
+void
+csio_lnodes_block_by_port(struct csio_hw *hw, uint8_t portid)
+{
+	struct csio_lnode *ln;
+	struct Scsi_Host  *shost;
+	struct csio_lnode *sln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "Failed to allocate lnodes_list");
+		return;
+	}
+
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		if (sln->portid != portid)
+			continue;
+
+		lnode_list[cur_cnt++] = sln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "Blocking IOs on lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		shost = csio_ln_to_shost(ln);
+		scsi_block_requests(shost);
+	}
+	kfree(lnode_list);
+}
+
+void
+csio_lnodes_unblock_by_port(struct csio_hw *hw, uint8_t portid)
+{
+	struct csio_lnode *ln;
+	struct Scsi_Host  *shost;
+	struct csio_lnode *sln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "Failed to allocate lnodes_list");
+		return;
+	}
+
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		if (sln->portid != portid)
+			continue;
+		lnode_list[cur_cnt++] = sln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "unblocking IOs on lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		shost = csio_ln_to_shost(ln);
+		scsi_unblock_requests(shost);
+	}
+	kfree(lnode_list);
+}
+
+void
+csio_lnodes_exit(struct csio_hw *hw, bool npiv)
+{
+	struct csio_lnode *sln;
+	struct csio_lnode *ln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "lnodes_exit: Failed to allocate lnodes_list.\n");
+		return;
+	}
+
+	/* Get all child lnodes(NPIV ports) */
+	spin_lock_irq(&hw->lock);
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	/* Delete NPIV lnodes */
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "Deleting child lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		fc_vport_terminate(ln->fc_vport);
+	}
+
+	/* Delete only npiv lnodes */
+	if (npiv)
+		goto free_lnodes;
+
+	cur_cnt = 0;
+	/* Get all physical lnodes */
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		lnode_list[cur_cnt++] = sln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	/* Delete physical lnodes */
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "Deleting parent lnode: %p\n", lnode_list[ii]);
+		csio_shost_exit(lnode_list[ii]);
+	}
+
+free_lnodes:
+	kfree(lnode_list);
+}
+
+/*
+ * csio_lnode_init_post: Set lnode attributes after starting HW.
+ * @ln: lnode.
+ *
+ */
+static void
+csio_lnode_init_post(struct csio_lnode *ln)
+{
+	struct Scsi_Host  *shost = csio_ln_to_shost(ln);
+
+	csio_fchost_attr_init(ln);
+
+	scsi_scan_host(shost);
+}
+
+/*
+ * csio_probe_one - Instantiate this function.
+ * @pdev: PCI device
+ * @id: Device ID
+ *
+ * This is the .probe() callback of the driver. This function:
+ * - Initializes the PCI function by enabling MMIO, setting bus
+ *   mastership and setting DMA mask.
+ * - Allocates HW structure, DMA, memory resources, maps BARS to
+ *   host memory and initializes HW module.
+ * - Allocates lnode structure via scsi_host_alloc, initializes
+ *   shost, initialized lnode module and registers with SCSI ML
+ *   via scsi_host_add.
+ * - Enables interrupts, and starts the chip by kicking off the
+ *   HW state machine.
+ * - Once hardware is ready, initiated scan of the host via
+ *   scsi_scan_host.
+ */
+static int __devinit
+csio_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int rv;
+	int bars;
+	int i;
+	struct csio_hw *hw;
+	struct csio_lnode *ln;
+
+	rv = csio_pci_init(pdev, &bars);
+	if (rv)
+		goto err;
+
+	hw = csio_hw_alloc(pdev);
+	if (!hw) {
+		rv = -ENODEV;
+		goto err_pci_exit;
+	}
+
+	pci_set_drvdata(pdev, hw);
+
+	if (csio_hw_start(hw) != 0) {
+		dev_err(&pdev->dev,
+			"Failed to start FW, continuing in debug mode.\n");
+		return 0;
+	}
+
+	sprintf(hw->fwrev_str, "%u.%u.%u.%u\n",
+		    FW_HDR_FW_VER_MAJOR_GET(hw->fwrev),
+		    FW_HDR_FW_VER_MINOR_GET(hw->fwrev),
+		    FW_HDR_FW_VER_MICRO_GET(hw->fwrev),
+		    FW_HDR_FW_VER_BUILD_GET(hw->fwrev));
+
+	for (i = 0; i < hw->num_pports; i++) {
+		ln = csio_shost_init(hw, &pdev->dev, true, NULL);
+		if (!ln) {
+			rv = -ENODEV;
+			break;
+		}
+		/* Initialize portid */
+		ln->portid = hw->pport[i].portid;
+
+		spin_lock_irq(&hw->lock);
+		if (csio_lnode_start(ln) != 0)
+			rv = -ENODEV;
+		spin_unlock_irq(&hw->lock);
+
+		if (rv)
+			break;
+
+		csio_lnode_init_post(ln);
+	}
+
+	if (rv)
+		goto err_lnode_exit;
+
+	return 0;
+
+err_lnode_exit:
+	csio_lnodes_block_request(hw);
+	spin_lock_irq(&hw->lock);
+	csio_hw_stop(hw);
+	spin_unlock_irq(&hw->lock);
+	csio_lnodes_unblock_request(hw);
+	pci_set_drvdata(hw->pdev, NULL);
+	csio_lnodes_exit(hw, 0);
+	csio_hw_free(hw);
+err_pci_exit:
+	csio_pci_exit(pdev, &bars);
+err:
+	dev_err(&pdev->dev, "probe of device failed: %d\n", rv);
+	return rv;
+}
+
+/*
+ * csio_remove_one - Remove one instance of the driver at this PCI function.
+ * @pdev: PCI device
+ *
+ * Used during hotplug operation.
+ */
+static void __devexit
+csio_remove_one(struct pci_dev *pdev)
+{
+	struct csio_hw *hw = pci_get_drvdata(pdev);
+	int bars = pci_select_bars(pdev, IORESOURCE_MEM);
+
+	csio_lnodes_block_request(hw);
+	spin_lock_irq(&hw->lock);
+
+	/* Stops lnode, Rnode s/m
+	 * Quiesce IOs.
+	 * All sessions with remote ports are unregistered.
+	 */
+	csio_hw_stop(hw);
+	spin_unlock_irq(&hw->lock);
+	csio_lnodes_unblock_request(hw);
+
+	csio_lnodes_exit(hw, 0);
+	csio_hw_free(hw);
+	pci_set_drvdata(pdev, NULL);
+	csio_pci_exit(pdev, &bars);
+}
+
+/*
+ * csio_pci_error_detected - PCI error was detected
+ * @pdev: PCI device
+ *
+ */
+static pci_ers_result_t
+csio_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct csio_hw *hw = pci_get_drvdata(pdev);
+
+	csio_lnodes_block_request(hw);
+	spin_lock_irq(&hw->lock);
+
+	/* Post PCI error detected evt to HW s/m
+	 * HW s/m handles this evt by quiescing IOs, unregisters rports
+	 * and finally takes the device to offline.
+	 */
+	csio_post_event(&hw->sm, CSIO_HWE_PCIERR_DETECTED);
+	spin_unlock_irq(&hw->lock);
+	csio_lnodes_unblock_request(hw);
+	csio_lnodes_exit(hw, 0);
+	csio_intr_disable(hw, true);
+	pci_disable_device(pdev);
+	return state == pci_channel_io_perm_failure ?
+		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+/*
+ * csio_pci_slot_reset - PCI slot has been reset.
+ * @pdev: PCI device
+ *
+ */
+static pci_ers_result_t
+csio_pci_slot_reset(struct pci_dev *pdev)
+{
+	struct csio_hw *hw = pci_get_drvdata(pdev);
+
+	if (pci_enable_device(pdev)) {
+		dev_err(&pdev->dev, "cannot re-enable device in slot reset\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_set_master(pdev);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+	pci_cleanup_aer_uncorrect_error_status(pdev);
+
+	/* Bring HW s/m to ready state.
+	 * but don't resume IOs.
+	 */
+	spin_lock_irq(&hw->lock);
+	csio_post_event(&hw->sm, CSIO_HWE_PCIERR_SLOT_RESET);
+	if (!csio_is_hw_ready(hw)) {
+		spin_unlock_irq(&hw->lock);
+		dev_err(&pdev->dev, "Can't initialize HW when in slot reset\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	spin_unlock_irq(&hw->lock);
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/*
+ * csio_pci_resume - Resume normal operations
+ * @pdev: PCI device
+ *
+ */
+static void
+csio_pci_resume(struct pci_dev *pdev)
+{
+	struct csio_hw *hw = pci_get_drvdata(pdev);
+	struct csio_lnode *ln;
+	int rv = 0;
+	int i;
+
+	/* Bring the LINK UP and Resume IO */
+
+	for (i = 0; i < hw->num_pports; i++) {
+		ln = csio_shost_init(hw, &pdev->dev, true, NULL);
+		if (!ln) {
+			rv = -ENODEV;
+			break;
+		}
+		/* Initialize portid */
+		ln->portid = hw->pport[i].portid;
+
+		spin_lock_irq(&hw->lock);
+		if (csio_lnode_start(ln) != 0)
+			rv = -ENODEV;
+		spin_unlock_irq(&hw->lock);
+
+		if (rv)
+			break;
+
+		csio_lnode_init_post(ln);
+	}
+
+	if (rv)
+		goto err_resume_exit;
+
+	return;
+
+err_resume_exit:
+	csio_lnodes_block_request(hw);
+	spin_lock_irq(&hw->lock);
+	csio_hw_stop(hw);
+	spin_unlock_irq(&hw->lock);
+	csio_lnodes_unblock_request(hw);
+	csio_lnodes_exit(hw, 0);
+	csio_hw_free(hw);
+	dev_err(&pdev->dev, "resume of device failed: %d\n", rv);
+}
+
+static struct pci_error_handlers csio_err_handler = {
+	.error_detected = csio_pci_error_detected,
+	.slot_reset	= csio_pci_slot_reset,
+	.resume		= csio_pci_resume,
+};
+
+static DEFINE_PCI_DEVICE_TABLE(csio_pci_tbl) = {
+	CSIO_DEVICE(CSIO_DEVID_T440DBG_FCOE, 0),	/* T440DBG FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420CR_FCOE, 0),		/* T420CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T422CR_FCOE, 0),		/* T422CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T440CR_FCOE, 0),		/* T440CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420BCH_FCOE, 0),	/* T420BCH FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T440BCH_FCOE, 0),	/* T440BCH FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T440CH_FCOE, 0),		/* T440CH FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420SO_FCOE, 0),		/* T420SO FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420CX_FCOE, 0),		/* T420CX FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420BT_FCOE, 0),		/* T420BT FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T404BT_FCOE, 0),		/* T404BT FCOE */
+	CSIO_DEVICE(CSIO_DEVID_B420_FCOE, 0),		/* B420 FCOE */
+	CSIO_DEVICE(CSIO_DEVID_B404_FCOE, 0),		/* B404 FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T480CR_FCOE, 0),		/* T480 CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T440LPCR_FCOE, 0),	/* T440 LP-CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_PE10K, 0),		/* PE10K FCOE */
+	CSIO_DEVICE(CSIO_DEVID_PE10K_PF1, 0),	/* PE10K FCOE on PF1 */
+	{ 0, 0, 0, 0, 0, 0, 0 }
+};
+
+
+static struct pci_driver csio_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.driver		= {
+		.owner	= THIS_MODULE,
+	},
+	.id_table	= csio_pci_tbl,
+	.probe		= csio_probe_one,
+	.remove		= csio_remove_one,
+	.err_handler	= &csio_err_handler,
+};
+
+/*
+ * csio_init - Chelsio storage driver initialization function.
+ *
+ */
+static int __init
+csio_init(void)
+{
+	int rv = -ENOMEM;
+
+	pr_info("%s %s\n", CSIO_DRV_DESC, CSIO_DRV_VERSION);
+
+	csio_dfs_init();
+
+	csio_fcoe_transport = fc_attach_transport(&csio_fc_transport_funcs);
+	if (!csio_fcoe_transport)
+		goto err;
+
+	csio_fcoe_transport_vport =
+			fc_attach_transport(&csio_fc_transport_vport_funcs);
+	if (!csio_fcoe_transport_vport)
+		goto err_vport;
+
+	rv = pci_register_driver(&csio_pci_driver);
+	if (rv)
+		goto err_pci;
+
+	return 0;
+
+err_pci:
+	fc_release_transport(csio_fcoe_transport_vport);
+err_vport:
+	fc_release_transport(csio_fcoe_transport);
+err:
+	csio_dfs_exit();
+	return rv;
+}
+
+/*
+ * csio_exit - Chelsio storage driver uninitialization .
+ *
+ * Function that gets called in the unload path.
+ */
+static void __exit
+csio_exit(void)
+{
+	pci_unregister_driver(&csio_pci_driver);
+	csio_dfs_exit();
+	fc_release_transport(csio_fcoe_transport_vport);
+	fc_release_transport(csio_fcoe_transport);
+}
+
+module_init(csio_init);
+module_exit(csio_exit);
+MODULE_AUTHOR(CSIO_DRV_AUTHOR);
+MODULE_DESCRIPTION(CSIO_DRV_DESC);
+MODULE_LICENSE(CSIO_DRV_LICENSE);
+MODULE_DEVICE_TABLE(pci, csio_pci_tbl);
+MODULE_VERSION(CSIO_DRV_VERSION);
+MODULE_FIRMWARE(CSIO_FW_FNAME);
diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c
new file mode 100644
index 0000000..329c6df
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_wr.c
@@ -0,0 +1,1632 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+#include <linux/cache.h>
+
+#include "csio_hw.h"
+#include "csio_wr.h"
+#include "csio_mb.h"
+#include "csio_defs.h"
+
+int csio_intr_coalesce_cnt;		/* value:SGE_INGRESS_RX_THRESHOLD[0] */
+static int csio_sge_thresh_reg;		/* SGE_INGRESS_RX_THRESHOLD[0] */
+
+int csio_intr_coalesce_time = 10;	/* value:SGE_TIMER_VALUE_1 */
+static int csio_sge_timer_reg = 1;
+
+#define CSIO_SET_FLBUF_SIZE(_hw, _reg, _val)				\
+	csio_wr_reg32((_hw), (_val), SGE_FL_BUFFER_SIZE##_reg)
+
+static void
+csio_get_flbuf_size(struct csio_hw *hw, struct csio_sge *sge, uint32_t reg)
+{
+	sge->sge_fl_buf_size[reg] = csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE0 +
+							reg * sizeof(uint32_t));
+}
+
+/* Free list buffer size */
+static inline uint32_t
+csio_wr_fl_bufsz(struct csio_sge *sge, struct csio_dma_buf *buf)
+{
+	return sge->sge_fl_buf_size[buf->paddr & 0xF];
+}
+
+/* Size of the egress queue status page */
+static inline uint32_t
+csio_wr_qstat_pgsz(struct csio_hw *hw)
+{
+	return (hw->wrm.sge.sge_control & EGRSTATUSPAGESIZE(1)) ?  128 : 64;
+}
+
+/* Ring freelist doorbell */
+static inline void
+csio_wr_ring_fldb(struct csio_hw *hw, struct csio_q *flq)
+{
+	/*
+	 * Ring the doorbell only when we have atleast CSIO_QCREDIT_SZ
+	 * number of bytes in the freelist queue. This translates to atleast
+	 * 8 freelist buffer pointers (since each pointer is 8 bytes).
+	 */
+	if (flq->inc_idx >= 8) {
+		csio_wr_reg32(hw, DBPRIO(1) | QID(flq->un.fl.flid) |
+			      PIDX(flq->inc_idx / 8),
+			      MYPF_REG(SGE_PF_KDOORBELL));
+		flq->inc_idx &= 7;
+	}
+}
+
+/* Write a 0 cidx increment value to enable SGE interrupts for this queue */
+static void
+csio_wr_sge_intr_enable(struct csio_hw *hw, uint16_t iqid)
+{
+	csio_wr_reg32(hw, CIDXINC(0)		|
+			  INGRESSQID(iqid)	|
+			  TIMERREG(X_TIMERREG_RESTART_COUNTER),
+			  MYPF_REG(SGE_PF_GTS));
+}
+
+/*
+ * csio_wr_fill_fl - Populate the FL buffers of a FL queue.
+ * @hw: HW module.
+ * @flq: Freelist queue.
+ *
+ * Fill up freelist buffer entries with buffers of size specified
+ * in the size register.
+ *
+ */
+static int
+csio_wr_fill_fl(struct csio_hw *hw, struct csio_q *flq)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	__be64 *d = (__be64 *)(flq->vstart);
+	struct csio_dma_buf *buf = &flq->un.fl.bufs[0];
+	uint64_t paddr;
+	int sreg = flq->un.fl.sreg;
+	int n = flq->credits;
+
+	while (n--) {
+		buf->len = sge->sge_fl_buf_size[sreg];
+		buf->vaddr = pci_alloc_consistent(hw->pdev, buf->len,
+						  &buf->paddr);
+		if (!buf->vaddr) {
+			csio_err(hw, "Could only fill %d buffers!\n", n + 1);
+			return -ENOMEM;
+		}
+
+		paddr = buf->paddr | (sreg & 0xF);
+
+		*d++ = cpu_to_be64(paddr);
+		buf++;
+	}
+
+	return 0;
+}
+
+/*
+ * csio_wr_update_fl -
+ * @hw: HW module.
+ * @flq: Freelist queue.
+ *
+ *
+ */
+static inline void
+csio_wr_update_fl(struct csio_hw *hw, struct csio_q *flq, uint16_t n)
+{
+
+	flq->inc_idx += n;
+	flq->pidx += n;
+	if (unlikely(flq->pidx >= flq->credits))
+		flq->pidx -= (uint16_t)flq->credits;
+
+	CSIO_INC_STATS(flq, n_flq_refill);
+}
+
+/*
+ * csio_wr_alloc_q - Allocate a WR queue and initialize it.
+ * @hw: HW module
+ * @qsize: Size of the queue in bytes
+ * @wrsize: Since of WR in this queue, if fixed.
+ * @type: Type of queue (Ingress/Egress/Freelist)
+ * @owner: Module that owns this queue.
+ * @nflb: Number of freelist buffers for FL.
+ * @sreg: What is the FL buffer size register?
+ * @iq_int_handler: Ingress queue handler in INTx mode.
+ *
+ * This function allocates and sets up a queue for the caller
+ * of size qsize, aligned at the required boundary. This is subject to
+ * be free entries being available in the queue array. If one is found,
+ * it is initialized with the allocated queue, marked as being used (owner),
+ * and a handle returned to the caller in form of the queue's index
+ * into the q_arr array.
+ * If user has indicated a freelist (by specifying nflb > 0), create
+ * another queue (with its own index into q_arr) for the freelist. Allocate
+ * memory for DMA buffer metadata (vaddr, len etc). Save off the freelist
+ * idx in the ingress queue's flq.idx. This is how a Freelist is associated
+ * with its owning ingress queue.
+ */
+int
+csio_wr_alloc_q(struct csio_hw *hw, uint32_t qsize, uint32_t wrsize,
+		uint16_t type, void *owner, uint32_t nflb, int sreg,
+		iq_handler_t iq_intx_handler)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_q	*q, *flq;
+	int		free_idx = wrm->free_qidx;
+	int		ret_idx = free_idx;
+	uint32_t	qsz;
+	int flq_idx;
+
+	if (free_idx >= wrm->num_q) {
+		csio_err(hw, "No more free queues.\n");
+		return -1;
+	}
+
+	switch (type) {
+	case CSIO_EGRESS:
+		qsz = ALIGN(qsize, CSIO_QCREDIT_SZ) + csio_wr_qstat_pgsz(hw);
+		break;
+	case CSIO_INGRESS:
+		switch (wrsize) {
+		case 16:
+		case 32:
+		case 64:
+		case 128:
+			break;
+		default:
+			csio_err(hw, "Invalid Ingress queue WR size:%d\n",
+				    wrsize);
+			return -1;
+		}
+
+		/*
+		 * Number of elements must be a multiple of 16
+		 * So this includes status page size
+		 */
+		qsz = ALIGN(qsize/wrsize, 16) * wrsize;
+
+		break;
+	case CSIO_FREELIST:
+		qsz = ALIGN(qsize/wrsize, 8) * wrsize + csio_wr_qstat_pgsz(hw);
+		break;
+	default:
+		csio_err(hw, "Invalid queue type: 0x%x\n", type);
+		return -1;
+	}
+
+	q = wrm->q_arr[free_idx];
+
+	q->vstart = pci_alloc_consistent(hw->pdev, qsz, &q->pstart);
+	if (!q->vstart) {
+		csio_err(hw,
+			 "Failed to allocate DMA memory for "
+			 "queue at id: %d size: %d\n", free_idx, qsize);
+		return -1;
+	}
+
+	/*
+	 * We need to zero out the contents, importantly for ingress,
+	 * since we start with a generatiom bit of 1 for ingress.
+	 */
+	memset(q->vstart, 0, qsz);
+
+	q->type		= type;
+	q->owner	= owner;
+	q->pidx		= q->cidx = q->inc_idx = 0;
+	q->size		= qsz;
+	q->wr_sz	= wrsize;	/* If using fixed size WRs */
+
+	wrm->free_qidx++;
+
+	if (type == CSIO_INGRESS) {
+		/* Since queue area is set to zero */
+		q->un.iq.genbit	= 1;
+
+		/*
+		 * Ingress queue status page size is always the size of
+		 * the ingress queue entry.
+		 */
+		q->credits	= (qsz - q->wr_sz) / q->wr_sz;
+		q->vwrap	= (void *)((uintptr_t)(q->vstart) + qsz
+							- q->wr_sz);
+
+		/* Allocate memory for FL if requested */
+		if (nflb > 0) {
+			flq_idx = csio_wr_alloc_q(hw, nflb * sizeof(__be64),
+						  sizeof(__be64), CSIO_FREELIST,
+						  owner, 0, sreg, NULL);
+			if (flq_idx == -1) {
+				csio_err(hw,
+					 "Failed to allocate FL queue"
+					 " for IQ idx:%d\n", free_idx);
+				return -1;
+			}
+
+			/* Associate the new FL with the Ingress quue */
+			q->un.iq.flq_idx = flq_idx;
+
+			flq = wrm->q_arr[q->un.iq.flq_idx];
+			flq->un.fl.bufs = kzalloc(flq->credits *
+						  sizeof(struct csio_dma_buf),
+						  GFP_KERNEL);
+			if (!flq->un.fl.bufs) {
+				csio_err(hw,
+					 "Failed to allocate FL queue bufs"
+					 " for IQ idx:%d\n", free_idx);
+				return -1;
+			}
+
+			flq->un.fl.packen = 0;
+			flq->un.fl.offset = 0;
+			flq->un.fl.sreg = sreg;
+
+			/* Fill up the free list buffers */
+			if (csio_wr_fill_fl(hw, flq))
+				return -1;
+
+			/*
+			 * Make sure in a FLQ, atleast 1 credit (8 FL buffers)
+			 * remains unpopulated,otherwise HW thinks
+			 * FLQ is empty.
+			 */
+			flq->pidx = flq->inc_idx = flq->credits - 8;
+		} else {
+			q->un.iq.flq_idx = -1;
+		}
+
+		/* Associate the IQ INTx handler. */
+		q->un.iq.iq_intx_handler = iq_intx_handler;
+
+		csio_q_iqid(hw, ret_idx) = CSIO_MAX_QID;
+
+	} else if (type == CSIO_EGRESS) {
+		q->credits = (qsz - csio_wr_qstat_pgsz(hw)) / CSIO_QCREDIT_SZ;
+		q->vwrap   = (void *)((uintptr_t)(q->vstart) + qsz
+						- csio_wr_qstat_pgsz(hw));
+		csio_q_eqid(hw, ret_idx) = CSIO_MAX_QID;
+	} else { /* Freelist */
+		q->credits = (qsz - csio_wr_qstat_pgsz(hw)) / sizeof(__be64);
+		q->vwrap   = (void *)((uintptr_t)(q->vstart) + qsz
+						- csio_wr_qstat_pgsz(hw));
+		csio_q_flid(hw, ret_idx) = CSIO_MAX_QID;
+	}
+
+	return ret_idx;
+}
+
+/*
+ * csio_wr_iq_create_rsp - Response handler for IQ creation.
+ * @hw: The HW module.
+ * @mbp: Mailbox.
+ * @iq_idx: Ingress queue that got created.
+ *
+ * Handle FW_IQ_CMD mailbox completion. Save off the assigned IQ/FL ids.
+ */
+static int
+csio_wr_iq_create_rsp(struct csio_hw *hw, struct csio_mb *mbp, int iq_idx)
+{
+	struct csio_iq_params iqp;
+	enum fw_retval retval;
+	uint32_t iq_id;
+	int flq_idx;
+
+	memset(&iqp, 0, sizeof(struct csio_iq_params));
+
+	csio_mb_iq_alloc_write_rsp(hw, mbp, &retval, &iqp);
+
+	if (retval != FW_SUCCESS) {
+		csio_err(hw, "IQ cmd returned 0x%x!\n", retval);
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	csio_q_iqid(hw, iq_idx)		= iqp.iqid;
+	csio_q_physiqid(hw, iq_idx)	= iqp.physiqid;
+	csio_q_pidx(hw, iq_idx)		= csio_q_cidx(hw, iq_idx) = 0;
+	csio_q_inc_idx(hw, iq_idx)	= 0;
+
+	/* Actual iq-id. */
+	iq_id = iqp.iqid - hw->wrm.fw_iq_start;
+
+	/* Set the iq-id to iq map table. */
+	if (iq_id >= CSIO_MAX_IQ) {
+		csio_err(hw,
+			 "Exceeding MAX_IQ(%d) supported!"
+			 " iqid:%d rel_iqid:%d FW iq_start:%d\n",
+			 CSIO_MAX_IQ, iq_id, iqp.iqid, hw->wrm.fw_iq_start);
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+	csio_q_set_intr_map(hw, iq_idx, iq_id);
+
+	/*
+	 * During FW_IQ_CMD, FW sets interrupt_sent bit to 1 in the SGE
+	 * ingress context of this queue. This will block interrupts to
+	 * this queue until the next GTS write. Therefore, we do a
+	 * 0-cidx increment GTS write for this queue just to clear the
+	 * interrupt_sent bit. This will re-enable interrupts to this
+	 * queue.
+	 */
+	csio_wr_sge_intr_enable(hw, iqp.physiqid);
+
+	flq_idx = csio_q_iq_flq_idx(hw, iq_idx);
+	if (flq_idx != -1) {
+		struct csio_q *flq = hw->wrm.q_arr[flq_idx];
+
+		csio_q_flid(hw, flq_idx) = iqp.fl0id;
+		csio_q_cidx(hw, flq_idx) = 0;
+		csio_q_pidx(hw, flq_idx)    = csio_q_credits(hw, flq_idx) - 8;
+		csio_q_inc_idx(hw, flq_idx) = csio_q_credits(hw, flq_idx) - 8;
+
+		/* Now update SGE about the buffers allocated during init */
+		csio_wr_ring_fldb(hw, flq);
+	}
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	return 0;
+}
+
+/*
+ * csio_wr_iq_create - Configure an Ingress queue with FW.
+ * @hw: The HW module.
+ * @priv: Private data object.
+ * @iq_idx: Ingress queue index in the WR module.
+ * @vec: MSIX vector.
+ * @portid: PCIE Channel to be associated with this queue.
+ * @async: Is this a FW asynchronous message handling queue?
+ * @cbfn: Completion callback.
+ *
+ * This API configures an ingress queue with FW by issuing a FW_IQ_CMD mailbox
+ * with alloc/write bits set.
+ */
+int
+csio_wr_iq_create(struct csio_hw *hw, void *priv, int iq_idx,
+		  uint32_t vec, uint8_t portid, bool async,
+		  void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct csio_mb  *mbp;
+	struct csio_iq_params iqp;
+	int flq_idx;
+
+	memset(&iqp, 0, sizeof(struct csio_iq_params));
+	csio_q_portid(hw, iq_idx) = portid;
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		csio_err(hw, "IQ command out of memory!\n");
+		return -ENOMEM;
+	}
+
+	switch (hw->intr_mode) {
+	case CSIO_IM_INTX:
+	case CSIO_IM_MSI:
+		/* For interrupt forwarding queue only */
+		if (hw->intr_iq_idx == iq_idx)
+			iqp.iqandst	= X_INTERRUPTDESTINATION_PCIE;
+		else
+			iqp.iqandst	= X_INTERRUPTDESTINATION_IQ;
+		iqp.iqandstindex	=
+			csio_q_physiqid(hw, hw->intr_iq_idx);
+		break;
+	case CSIO_IM_MSIX:
+		iqp.iqandst		= X_INTERRUPTDESTINATION_PCIE;
+		iqp.iqandstindex	= (uint16_t)vec;
+		break;
+	case CSIO_IM_NONE:
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	/* Pass in the ingress queue cmd parameters */
+	iqp.pfn			= hw->pfn;
+	iqp.vfn			= 0;
+	iqp.iq_start		= 1;
+	iqp.viid		= 0;
+	iqp.type		= FW_IQ_TYPE_FL_INT_CAP;
+	iqp.iqasynch		= async;
+	if (csio_intr_coalesce_cnt)
+		iqp.iqanus	= X_UPDATESCHEDULING_COUNTER_OPTTIMER;
+	else
+		iqp.iqanus	= X_UPDATESCHEDULING_TIMER;
+	iqp.iqanud		= X_UPDATEDELIVERY_INTERRUPT;
+	iqp.iqpciech		= portid;
+	iqp.iqintcntthresh	= (uint8_t)csio_sge_thresh_reg;
+
+	switch (csio_q_wr_sz(hw, iq_idx)) {
+	case 16:
+		iqp.iqesize = 0; break;
+	case 32:
+		iqp.iqesize = 1; break;
+	case 64:
+		iqp.iqesize = 2; break;
+	case 128:
+		iqp.iqesize = 3; break;
+	}
+
+	iqp.iqsize		= csio_q_size(hw, iq_idx) /
+						csio_q_wr_sz(hw, iq_idx);
+	iqp.iqaddr		= csio_q_pstart(hw, iq_idx);
+
+	flq_idx = csio_q_iq_flq_idx(hw, iq_idx);
+	if (flq_idx != -1) {
+		struct csio_q *flq = hw->wrm.q_arr[flq_idx];
+
+		iqp.fl0paden	= 1;
+		iqp.fl0packen	= flq->un.fl.packen ? 1 : 0;
+		iqp.fl0fbmin	= X_FETCHBURSTMIN_64B;
+		iqp.fl0fbmax	= X_FETCHBURSTMAX_512B;
+		iqp.fl0size	= csio_q_size(hw, flq_idx) / CSIO_QCREDIT_SZ;
+		iqp.fl0addr	= csio_q_pstart(hw, flq_idx);
+	}
+
+	csio_mb_iq_alloc_write(hw, mbp, priv, CSIO_MB_DEFAULT_TMO, &iqp, cbfn);
+
+	if (csio_mb_issue(hw, mbp)) {
+		csio_err(hw, "Issue of IQ cmd failed!\n");
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	if (cbfn != NULL)
+		return 0;
+
+	return csio_wr_iq_create_rsp(hw, mbp, iq_idx);
+}
+
+/*
+ * csio_wr_eq_create_rsp - Response handler for EQ creation.
+ * @hw: The HW module.
+ * @mbp: Mailbox.
+ * @eq_idx: Egress queue that got created.
+ *
+ * Handle FW_EQ_OFLD_CMD mailbox completion. Save off the assigned EQ ids.
+ */
+static int
+csio_wr_eq_cfg_rsp(struct csio_hw *hw, struct csio_mb *mbp, int eq_idx)
+{
+	struct csio_eq_params eqp;
+	enum fw_retval retval;
+
+	memset(&eqp, 0, sizeof(struct csio_eq_params));
+
+	csio_mb_eq_ofld_alloc_write_rsp(hw, mbp, &retval, &eqp);
+
+	if (retval != FW_SUCCESS) {
+		csio_err(hw, "EQ OFLD cmd returned 0x%x!\n", retval);
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	csio_q_eqid(hw, eq_idx)	= (uint16_t)eqp.eqid;
+	csio_q_physeqid(hw, eq_idx) = (uint16_t)eqp.physeqid;
+	csio_q_pidx(hw, eq_idx)	= csio_q_cidx(hw, eq_idx) = 0;
+	csio_q_inc_idx(hw, eq_idx) = 0;
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	return 0;
+}
+
+/*
+ * csio_wr_eq_create - Configure an Egress queue with FW.
+ * @hw: HW module.
+ * @priv: Private data.
+ * @eq_idx: Egress queue index in the WR module.
+ * @iq_idx: Associated ingress queue index.
+ * @cbfn: Completion callback.
+ *
+ * This API configures a offload egress queue with FW by issuing a
+ * FW_EQ_OFLD_CMD  (with alloc + write ) mailbox.
+ */
+int
+csio_wr_eq_create(struct csio_hw *hw, void *priv, int eq_idx,
+		  int iq_idx, uint8_t portid,
+		  void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct csio_mb  *mbp;
+	struct csio_eq_params eqp;
+
+	memset(&eqp, 0, sizeof(struct csio_eq_params));
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		csio_err(hw, "EQ command out of memory!\n");
+		return -ENOMEM;
+	}
+
+	eqp.pfn			= hw->pfn;
+	eqp.vfn			= 0;
+	eqp.eqstart		= 1;
+	eqp.hostfcmode		= X_HOSTFCMODE_STATUS_PAGE;
+	eqp.iqid		= csio_q_iqid(hw, iq_idx);
+	eqp.fbmin		= X_FETCHBURSTMIN_64B;
+	eqp.fbmax		= X_FETCHBURSTMAX_512B;
+	eqp.cidxfthresh		= 0;
+	eqp.pciechn		= portid;
+	eqp.eqsize		= csio_q_size(hw, eq_idx) / CSIO_QCREDIT_SZ;
+	eqp.eqaddr		= csio_q_pstart(hw, eq_idx);
+
+	csio_mb_eq_ofld_alloc_write(hw, mbp, priv, CSIO_MB_DEFAULT_TMO,
+				    &eqp, cbfn);
+
+	if (csio_mb_issue(hw, mbp)) {
+		csio_err(hw, "Issue of EQ OFLD cmd failed!\n");
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	if (cbfn != NULL)
+		return 0;
+
+	return csio_wr_eq_cfg_rsp(hw, mbp, eq_idx);
+}
+
+/*
+ * csio_wr_iq_destroy_rsp - Response handler for IQ removal.
+ * @hw: The HW module.
+ * @mbp: Mailbox.
+ * @iq_idx: Ingress queue that was freed.
+ *
+ * Handle FW_IQ_CMD (free) mailbox completion.
+ */
+static int
+csio_wr_iq_destroy_rsp(struct csio_hw *hw, struct csio_mb *mbp, int iq_idx)
+{
+	enum fw_retval retval = csio_mb_fw_retval(mbp);
+	int rv = 0;
+
+	if (retval != FW_SUCCESS)
+		rv = -EINVAL;
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	return rv;
+}
+
+/*
+ * csio_wr_iq_destroy - Free an ingress queue.
+ * @hw: The HW module.
+ * @priv: Private data object.
+ * @iq_idx: Ingress queue index to destroy
+ * @cbfn: Completion callback.
+ *
+ * This API frees an ingress queue by issuing the FW_IQ_CMD
+ * with the free bit set.
+ */
+static int
+csio_wr_iq_destroy(struct csio_hw *hw, void *priv, int iq_idx,
+		   void (*cbfn)(struct csio_hw *, struct csio_mb *))
+{
+	int rv = 0;
+	struct csio_mb  *mbp;
+	struct csio_iq_params iqp;
+	int flq_idx;
+
+	memset(&iqp, 0, sizeof(struct csio_iq_params));
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp)
+		return -ENOMEM;
+
+	iqp.pfn		= hw->pfn;
+	iqp.vfn		= 0;
+	iqp.iqid	= csio_q_iqid(hw, iq_idx);
+	iqp.type	= FW_IQ_TYPE_FL_INT_CAP;
+
+	flq_idx = csio_q_iq_flq_idx(hw, iq_idx);
+	if (flq_idx != -1)
+		iqp.fl0id = csio_q_flid(hw, flq_idx);
+	else
+		iqp.fl0id = 0xFFFF;
+
+	iqp.fl1id = 0xFFFF;
+
+	csio_mb_iq_free(hw, mbp, priv, CSIO_MB_DEFAULT_TMO, &iqp, cbfn);
+
+	rv = csio_mb_issue(hw, mbp);
+	if (rv != 0) {
+		mempool_free(mbp, hw->mb_mempool);
+		return rv;
+	}
+
+	if (cbfn != NULL)
+		return 0;
+
+	return csio_wr_iq_destroy_rsp(hw, mbp, iq_idx);
+}
+
+/*
+ * csio_wr_eq_destroy_rsp - Response handler for OFLD EQ creation.
+ * @hw: The HW module.
+ * @mbp: Mailbox.
+ * @eq_idx: Egress queue that was freed.
+ *
+ * Handle FW_OFLD_EQ_CMD (free) mailbox completion.
+ */
+static int
+csio_wr_eq_destroy_rsp(struct csio_hw *hw, struct csio_mb *mbp, int eq_idx)
+{
+	enum fw_retval retval = csio_mb_fw_retval(mbp);
+	int rv = 0;
+
+	if (retval != FW_SUCCESS)
+		rv = -EINVAL;
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	return rv;
+}
+
+/*
+ * csio_wr_eq_destroy - Free an Egress queue.
+ * @hw: The HW module.
+ * @priv: Private data object.
+ * @eq_idx: Egress queue index to destroy
+ * @cbfn: Completion callback.
+ *
+ * This API frees an Egress queue by issuing the FW_EQ_OFLD_CMD
+ * with the free bit set.
+ */
+static int
+csio_wr_eq_destroy(struct csio_hw *hw, void *priv, int eq_idx,
+		   void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	int rv = 0;
+	struct csio_mb  *mbp;
+	struct csio_eq_params eqp;
+
+	memset(&eqp, 0, sizeof(struct csio_eq_params));
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp)
+		return -ENOMEM;
+
+	eqp.pfn		= hw->pfn;
+	eqp.vfn		= 0;
+	eqp.eqid	= csio_q_eqid(hw, eq_idx);
+
+	csio_mb_eq_ofld_free(hw, mbp, priv, CSIO_MB_DEFAULT_TMO, &eqp, cbfn);
+
+	rv = csio_mb_issue(hw, mbp);
+	if (rv != 0) {
+		mempool_free(mbp, hw->mb_mempool);
+		return rv;
+	}
+
+	if (cbfn != NULL)
+		return 0;
+
+	return csio_wr_eq_destroy_rsp(hw, mbp, eq_idx);
+}
+
+/*
+ * csio_wr_cleanup_eq_stpg - Cleanup Egress queue status page
+ * @hw: HW module
+ * @qidx: Egress queue index
+ *
+ * Cleanup the Egress queue status page.
+ */
+static void
+csio_wr_cleanup_eq_stpg(struct csio_hw *hw, int qidx)
+{
+	struct csio_q	*q = csio_hw_to_wrm(hw)->q_arr[qidx];
+	struct csio_qstatus_page *stp = (struct csio_qstatus_page *)q->vwrap;
+
+	memset(stp, 0, sizeof(*stp));
+}
+
+/*
+ * csio_wr_cleanup_iq_ftr - Cleanup Footer entries in IQ
+ * @hw: HW module
+ * @qidx: Ingress queue index
+ *
+ * Cleanup the footer entries in the given ingress queue,
+ * set to 1 the internal copy of genbit.
+ */
+static void
+csio_wr_cleanup_iq_ftr(struct csio_hw *hw, int qidx)
+{
+	struct csio_wrm *wrm	= csio_hw_to_wrm(hw);
+	struct csio_q	*q	= wrm->q_arr[qidx];
+	void *wr;
+	struct csio_iqwr_footer *ftr;
+	uint32_t i = 0;
+
+	/* set to 1 since we are just about zero out genbit */
+	q->un.iq.genbit = 1;
+
+	for (i = 0; i < q->credits; i++) {
+		/* Get the WR */
+		wr = (void *)((uintptr_t)q->vstart +
+					   (i * q->wr_sz));
+		/* Get the footer */
+		ftr = (struct csio_iqwr_footer *)((uintptr_t)wr +
+					  (q->wr_sz - sizeof(*ftr)));
+		/* Zero out footer */
+		memset(ftr, 0, sizeof(*ftr));
+	}
+}
+
+int
+csio_wr_destroy_queues(struct csio_hw *hw, bool cmd)
+{
+	int i, flq_idx;
+	struct csio_q *q;
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	int rv;
+
+	for (i = 0; i < wrm->free_qidx; i++) {
+		q = wrm->q_arr[i];
+
+		switch (q->type) {
+		case CSIO_EGRESS:
+			if (csio_q_eqid(hw, i) != CSIO_MAX_QID) {
+				csio_wr_cleanup_eq_stpg(hw, i);
+				if (!cmd) {
+					csio_q_eqid(hw, i) = CSIO_MAX_QID;
+					continue;
+				}
+
+				rv = csio_wr_eq_destroy(hw, NULL, i, NULL);
+				if ((rv == -EBUSY) || (rv == -ETIMEDOUT))
+					cmd = false;
+
+				csio_q_eqid(hw, i) = CSIO_MAX_QID;
+			}
+		case CSIO_INGRESS:
+			if (csio_q_iqid(hw, i) != CSIO_MAX_QID) {
+				csio_wr_cleanup_iq_ftr(hw, i);
+				if (!cmd) {
+					csio_q_iqid(hw, i) = CSIO_MAX_QID;
+					flq_idx = csio_q_iq_flq_idx(hw, i);
+					if (flq_idx != -1)
+						csio_q_flid(hw, flq_idx) =
+								CSIO_MAX_QID;
+					continue;
+				}
+
+				rv = csio_wr_iq_destroy(hw, NULL, i, NULL);
+				if ((rv == -EBUSY) || (rv == -ETIMEDOUT))
+					cmd = false;
+
+				csio_q_iqid(hw, i) = CSIO_MAX_QID;
+				flq_idx = csio_q_iq_flq_idx(hw, i);
+				if (flq_idx != -1)
+					csio_q_flid(hw, flq_idx) = CSIO_MAX_QID;
+			}
+		default:
+			break;
+		}
+	}
+
+	hw->flags &= ~CSIO_HWF_Q_FW_ALLOCED;
+
+	return 0;
+}
+
+/*
+ * csio_wr_get - Get requested size of WR entry/entries from queue.
+ * @hw: HW module.
+ * @qidx: Index of queue.
+ * @size: Cumulative size of Work request(s).
+ * @wrp: Work request pair.
+ *
+ * If requested credits are available, return the start address of the
+ * work request in the work request pair. Set pidx accordingly and
+ * return.
+ *
+ * NOTE about WR pair:
+ * ==================
+ * A WR can start towards the end of a queue, and then continue at the
+ * beginning, since the queue is considered to be circular. This will
+ * require a pair of address/size to be passed back to the caller -
+ * hence Work request pair format.
+ */
+int
+csio_wr_get(struct csio_hw *hw, int qidx, uint32_t size,
+	    struct csio_wr_pair *wrp)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_q *q = wrm->q_arr[qidx];
+	void *cwr = (void *)((uintptr_t)(q->vstart) +
+						(q->pidx * CSIO_QCREDIT_SZ));
+	struct csio_qstatus_page *stp = (struct csio_qstatus_page *)q->vwrap;
+	uint16_t cidx = q->cidx = ntohs(stp->cidx);
+	uint16_t pidx = q->pidx;
+	uint32_t req_sz	= ALIGN(size, CSIO_QCREDIT_SZ);
+	int req_credits	= req_sz / CSIO_QCREDIT_SZ;
+	int credits;
+
+	CSIO_DB_ASSERT(q->owner != NULL);
+	CSIO_DB_ASSERT((qidx >= 0) && (qidx < wrm->free_qidx));
+	CSIO_DB_ASSERT(cidx <= q->credits);
+
+	/* Calculate credits */
+	if (pidx > cidx) {
+		credits = q->credits - (pidx - cidx) - 1;
+	} else if (cidx > pidx) {
+		credits = cidx - pidx - 1;
+	} else {
+		/* cidx == pidx, empty queue */
+		credits = q->credits;
+		CSIO_INC_STATS(q, n_qempty);
+	}
+
+	/*
+	 * Check if we have enough credits.
+	 * credits = 1 implies queue is full.
+	 */
+	if (!credits || (req_credits > credits)) {
+		CSIO_INC_STATS(q, n_qfull);
+		return -EBUSY;
+	}
+
+	/*
+	 * If we are here, we have enough credits to satisfy the
+	 * request. Check if we are near the end of q, and if WR spills over.
+	 * If it does, use the first addr/size to cover the queue until
+	 * the end. Fit the remainder portion of the request at the top
+	 * of queue and return it in the second addr/len. Set pidx
+	 * accordingly.
+	 */
+	if (unlikely(((uintptr_t)cwr + req_sz) > (uintptr_t)(q->vwrap))) {
+		wrp->addr1 = cwr;
+		wrp->size1 = (uint32_t)((uintptr_t)q->vwrap - (uintptr_t)cwr);
+		wrp->addr2 = q->vstart;
+		wrp->size2 = req_sz - wrp->size1;
+		q->pidx	= (uint16_t)(ALIGN(wrp->size2, CSIO_QCREDIT_SZ) /
+							CSIO_QCREDIT_SZ);
+		CSIO_INC_STATS(q, n_qwrap);
+		CSIO_INC_STATS(q, n_eq_wr_split);
+	} else {
+		wrp->addr1 = cwr;
+		wrp->size1 = req_sz;
+		wrp->addr2 = NULL;
+		wrp->size2 = 0;
+		q->pidx	+= (uint16_t)req_credits;
+
+		/* We are the end of queue, roll back pidx to top of queue */
+		if (unlikely(q->pidx == q->credits)) {
+			q->pidx = 0;
+			CSIO_INC_STATS(q, n_qwrap);
+		}
+	}
+
+	q->inc_idx = (uint16_t)req_credits;
+
+	CSIO_INC_STATS(q, n_tot_reqs);
+
+	return 0;
+}
+
+/*
+ * csio_wr_copy_to_wrp - Copies given data into WR.
+ * @data_buf - Data buffer
+ * @wrp - Work request pair.
+ * @wr_off - Work request offset.
+ * @data_len - Data length.
+ *
+ * Copies the given data in Work Request. Work request pair(wrp) specifies
+ * address information of Work request.
+ * Returns: none
+ */
+void
+csio_wr_copy_to_wrp(void *data_buf, struct csio_wr_pair *wrp,
+		   uint32_t wr_off, uint32_t data_len)
+{
+	uint32_t nbytes;
+
+	/* Number of space available in buffer addr1 of WRP */
+	nbytes = ((wrp->size1 - wr_off) >= data_len) ?
+					data_len : (wrp->size1 - wr_off);
+
+	memcpy((uint8_t *) wrp->addr1 + wr_off, data_buf, nbytes);
+	data_len -= nbytes;
+
+	/* Write the remaining data from the begining of circular buffer */
+	if (data_len) {
+		CSIO_DB_ASSERT(data_len <= wrp->size2);
+		CSIO_DB_ASSERT(wrp->addr2 != NULL);
+		memcpy(wrp->addr2, (uint8_t *) data_buf + nbytes, data_len);
+	}
+}
+
+/*
+ * csio_wr_issue - Notify chip of Work request.
+ * @hw: HW module.
+ * @qidx: Index of queue.
+ * @prio: 0: Low priority, 1: High priority
+ *
+ * Rings the SGE Doorbell by writing the current producer index of the passed
+ * in queue into the register.
+ *
+ */
+int
+csio_wr_issue(struct csio_hw *hw, int qidx, bool prio)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_q *q = wrm->q_arr[qidx];
+
+	CSIO_DB_ASSERT((qidx >= 0) && (qidx < wrm->free_qidx));
+
+	wmb();
+	/* Ring SGE Doorbell writing q->pidx into it */
+	csio_wr_reg32(hw, DBPRIO(prio) | QID(q->un.eq.physeqid) |
+		      PIDX(q->inc_idx), MYPF_REG(SGE_PF_KDOORBELL));
+	q->inc_idx = 0;
+
+	return 0;
+}
+
+static inline uint32_t
+csio_wr_avail_qcredits(struct csio_q *q)
+{
+	if (q->pidx > q->cidx)
+		return q->pidx - q->cidx;
+	else if (q->cidx > q->pidx)
+		return q->credits - (q->cidx - q->pidx);
+	else
+		return 0;	/* cidx == pidx, empty queue */
+}
+
+/*
+ * csio_wr_inval_flq_buf - Invalidate a free list buffer entry.
+ * @hw: HW module.
+ * @flq: The freelist queue.
+ *
+ * Invalidate the driver's version of a freelist buffer entry,
+ * without freeing the associated the DMA memory. The entry
+ * to be invalidated is picked up from the current Free list
+ * queue cidx.
+ *
+ */
+static inline void
+csio_wr_inval_flq_buf(struct csio_hw *hw, struct csio_q *flq)
+{
+	flq->cidx++;
+	if (flq->cidx == flq->credits) {
+		flq->cidx = 0;
+		CSIO_INC_STATS(flq, n_qwrap);
+	}
+}
+
+/*
+ * csio_wr_process_fl - Process a freelist completion.
+ * @hw: HW module.
+ * @q: The ingress queue attached to the Freelist.
+ * @wr: The freelist completion WR in the ingress queue.
+ * @len_to_qid: The lower 32-bits of the first flit of the RSP footer
+ * @iq_handler: Caller's handler for this completion.
+ * @priv: Private pointer of caller
+ *
+ */
+static inline void
+csio_wr_process_fl(struct csio_hw *hw, struct csio_q *q,
+		   void *wr, uint32_t len_to_qid,
+		   void (*iq_handler)(struct csio_hw *, void *,
+				      uint32_t, struct csio_fl_dma_buf *,
+				      void *),
+		   void *priv)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	struct csio_fl_dma_buf flb;
+	struct csio_dma_buf *buf, *fbuf;
+	uint32_t bufsz, len, lastlen = 0;
+	int n;
+	struct csio_q *flq = hw->wrm.q_arr[q->un.iq.flq_idx];
+
+	CSIO_DB_ASSERT(flq != NULL);
+
+	len = len_to_qid;
+
+	if (len & IQWRF_NEWBUF) {
+		if (flq->un.fl.offset > 0) {
+			csio_wr_inval_flq_buf(hw, flq);
+			flq->un.fl.offset = 0;
+		}
+		len = IQWRF_LEN_GET(len);
+	}
+
+	CSIO_DB_ASSERT(len != 0);
+
+	flb.totlen = len;
+
+	/* Consume all freelist buffers used for len bytes */
+	for (n = 0, fbuf = flb.flbufs; ; n++, fbuf++) {
+		buf = &flq->un.fl.bufs[flq->cidx];
+		bufsz = csio_wr_fl_bufsz(sge, buf);
+
+		fbuf->paddr	= buf->paddr;
+		fbuf->vaddr	= buf->vaddr;
+
+		flb.offset	= flq->un.fl.offset;
+		lastlen		= min(bufsz, len);
+		fbuf->len	= lastlen;
+
+		len -= lastlen;
+		if (!len)
+			break;
+		csio_wr_inval_flq_buf(hw, flq);
+	}
+
+	flb.defer_free = flq->un.fl.packen ? 0 : 1;
+
+	iq_handler(hw, wr, q->wr_sz - sizeof(struct csio_iqwr_footer),
+		   &flb, priv);
+
+	if (flq->un.fl.packen)
+		flq->un.fl.offset += ALIGN(lastlen, sge->csio_fl_align);
+	else
+		csio_wr_inval_flq_buf(hw, flq);
+
+}
+
+/*
+ * csio_is_new_iqwr - Is this a new Ingress queue entry ?
+ * @q: Ingress quueue.
+ * @ftr: Ingress queue WR SGE footer.
+ *
+ * The entry is new if our generation bit matches the corresponding
+ * bit in the footer of the current WR.
+ */
+static inline bool
+csio_is_new_iqwr(struct csio_q *q, struct csio_iqwr_footer *ftr)
+{
+	return (q->un.iq.genbit == (ftr->u.type_gen >> IQWRF_GEN_SHIFT));
+}
+
+/*
+ * csio_wr_process_iq - Process elements in Ingress queue.
+ * @hw:  HW pointer
+ * @qidx: Index of queue
+ * @iq_handler: Handler for this queue
+ * @priv: Caller's private pointer
+ *
+ * This routine walks through every entry of the ingress queue, calling
+ * the provided iq_handler with the entry, until the generation bit
+ * flips.
+ */
+int
+csio_wr_process_iq(struct csio_hw *hw, struct csio_q *q,
+		   void (*iq_handler)(struct csio_hw *, void *,
+				      uint32_t, struct csio_fl_dma_buf *,
+				      void *),
+		   void *priv)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	void *wr = (void *)((uintptr_t)q->vstart + (q->cidx * q->wr_sz));
+	struct csio_iqwr_footer *ftr;
+	uint32_t wr_type, fw_qid, qid;
+	struct csio_q *q_completed;
+	struct csio_q *flq = csio_iq_has_fl(q) ?
+					wrm->q_arr[q->un.iq.flq_idx] : NULL;
+	int rv = 0;
+
+	/* Get the footer */
+	ftr = (struct csio_iqwr_footer *)((uintptr_t)wr +
+					  (q->wr_sz - sizeof(*ftr)));
+
+	/*
+	 * When q wrapped around last time, driver should have inverted
+	 * ic.genbit as well.
+	 */
+	while (csio_is_new_iqwr(q, ftr)) {
+
+		CSIO_DB_ASSERT(((uintptr_t)wr + q->wr_sz) <=
+						(uintptr_t)q->vwrap);
+		rmb();
+		wr_type = IQWRF_TYPE_GET(ftr->u.type_gen);
+
+		switch (wr_type) {
+		case X_RSPD_TYPE_CPL:
+			/* Subtract footer from WR len */
+			iq_handler(hw, wr, q->wr_sz - sizeof(*ftr), NULL, priv);
+			break;
+		case X_RSPD_TYPE_FLBUF:
+			csio_wr_process_fl(hw, q, wr,
+					   ntohl(ftr->pldbuflen_qid),
+					   iq_handler, priv);
+			break;
+		case X_RSPD_TYPE_INTR:
+			fw_qid = ntohl(ftr->pldbuflen_qid);
+			qid = fw_qid - wrm->fw_iq_start;
+			q_completed = hw->wrm.intr_map[qid];
+
+			if (unlikely(qid ==
+					csio_q_physiqid(hw, hw->intr_iq_idx))) {
+				/*
+				 * We are already in the Forward Interrupt
+				 * Interrupt Queue Service! Do-not service
+				 * again!
+				 *
+				 */
+			} else {
+				CSIO_DB_ASSERT(q_completed);
+				CSIO_DB_ASSERT(
+					q_completed->un.iq.iq_intx_handler);
+
+				/* Call the queue handler. */
+				q_completed->un.iq.iq_intx_handler(hw, NULL,
+						0, NULL, (void *)q_completed);
+			}
+			break;
+		default:
+			csio_warn(hw, "Unknown resp type 0x%x received\n",
+				 wr_type);
+			CSIO_INC_STATS(q, n_rsp_unknown);
+			break;
+		}
+
+		/*
+		 * Ingress *always* has fixed size WR entries. Therefore,
+		 * there should always be complete WRs towards the end of
+		 * queue.
+		 */
+		if (((uintptr_t)wr + q->wr_sz) == (uintptr_t)q->vwrap) {
+
+			/* Roll over to start of queue */
+			q->cidx = 0;
+			wr	= q->vstart;
+
+			/* Toggle genbit */
+			q->un.iq.genbit ^= 0x1;
+
+			CSIO_INC_STATS(q, n_qwrap);
+		} else {
+			q->cidx++;
+			wr	= (void *)((uintptr_t)(q->vstart) +
+					   (q->cidx * q->wr_sz));
+		}
+
+		ftr = (struct csio_iqwr_footer *)((uintptr_t)wr +
+						  (q->wr_sz - sizeof(*ftr)));
+		q->inc_idx++;
+
+	} /* while (q->un.iq.genbit == hdr->genbit) */
+
+	/*
+	 * We need to re-arm SGE interrupts in case we got a stray interrupt,
+	 * especially in msix mode. With INTx, this may be a common occurence.
+	 */
+	if (unlikely(!q->inc_idx)) {
+		CSIO_INC_STATS(q, n_stray_comp);
+		rv = -EINVAL;
+		goto restart;
+	}
+
+	/* Replenish free list buffers if pending falls below low water mark */
+	if (flq) {
+		uint32_t avail  = csio_wr_avail_qcredits(flq);
+		if (avail <= 16) {
+			/* Make sure in FLQ, atleast 1 credit (8 FL buffers)
+			 * remains unpopulated otherwise HW thinks
+			 * FLQ is empty.
+			 */
+			csio_wr_update_fl(hw, flq, (flq->credits - 8) - avail);
+			csio_wr_ring_fldb(hw, flq);
+		}
+	}
+
+restart:
+	/* Now inform SGE about our incremental index value */
+	csio_wr_reg32(hw, CIDXINC(q->inc_idx)		|
+			  INGRESSQID(q->un.iq.physiqid)	|
+			  TIMERREG(csio_sge_timer_reg),
+			  MYPF_REG(SGE_PF_GTS));
+	q->stats.n_tot_rsps += q->inc_idx;
+
+	q->inc_idx = 0;
+
+	return rv;
+}
+
+int
+csio_wr_process_iq_idx(struct csio_hw *hw, int qidx,
+		   void (*iq_handler)(struct csio_hw *, void *,
+				      uint32_t, struct csio_fl_dma_buf *,
+				      void *),
+		   void *priv)
+{
+	struct csio_wrm *wrm	= csio_hw_to_wrm(hw);
+	struct csio_q	*iq	= wrm->q_arr[qidx];
+
+	return csio_wr_process_iq(hw, iq, iq_handler, priv);
+}
+
+static int
+csio_closest_timer(struct csio_sge *s, int time)
+{
+	int i, delta, match = 0, min_delta = INT_MAX;
+
+	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
+		delta = time - s->timer_val[i];
+		if (delta < 0)
+			delta = -delta;
+		if (delta < min_delta) {
+			min_delta = delta;
+			match = i;
+		}
+	}
+	return match;
+}
+
+static int
+csio_closest_thresh(struct csio_sge *s, int cnt)
+{
+	int i, delta, match = 0, min_delta = INT_MAX;
+
+	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
+		delta = cnt - s->counter_val[i];
+		if (delta < 0)
+			delta = -delta;
+		if (delta < min_delta) {
+			min_delta = delta;
+			match = i;
+		}
+	}
+	return match;
+}
+
+static void
+csio_wr_fixup_host_params(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	uint32_t clsz = L1_CACHE_BYTES;
+	uint32_t s_hps = PAGE_SHIFT - 10;
+	uint32_t ingpad = 0;
+	uint32_t stat_len = clsz > 64 ? 128 : 64;
+
+	csio_wr_reg32(hw, HOSTPAGESIZEPF0(s_hps) | HOSTPAGESIZEPF1(s_hps) |
+		      HOSTPAGESIZEPF2(s_hps) | HOSTPAGESIZEPF3(s_hps) |
+		      HOSTPAGESIZEPF4(s_hps) | HOSTPAGESIZEPF5(s_hps) |
+		      HOSTPAGESIZEPF6(s_hps) | HOSTPAGESIZEPF7(s_hps),
+		      SGE_HOST_PAGE_SIZE);
+
+	sge->csio_fl_align = clsz < 32 ? 32 : clsz;
+	ingpad = ilog2(sge->csio_fl_align) - 5;
+
+	csio_set_reg_field(hw, SGE_CONTROL, INGPADBOUNDARY_MASK |
+					    EGRSTATUSPAGESIZE(1),
+			   INGPADBOUNDARY(ingpad) |
+			   EGRSTATUSPAGESIZE(stat_len != 64));
+
+	/* FL BUFFER SIZE#0 is Page size i,e already aligned to cache line */
+	csio_wr_reg32(hw, PAGE_SIZE, SGE_FL_BUFFER_SIZE0);
+	csio_wr_reg32(hw,
+		      (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE2) +
+		      sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
+		      SGE_FL_BUFFER_SIZE2);
+	csio_wr_reg32(hw,
+		      (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE3) +
+		      sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
+		      SGE_FL_BUFFER_SIZE3);
+
+	csio_wr_reg32(hw, HPZ0(PAGE_SHIFT - 12), ULP_RX_TDDP_PSZ);
+
+	/* default value of rx_dma_offset of the NIC driver */
+	csio_set_reg_field(hw, SGE_CONTROL, PKTSHIFT_MASK,
+			   PKTSHIFT(CSIO_SGE_RX_DMA_OFFSET));
+}
+
+static void
+csio_init_intr_coalesce_parms(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+
+	csio_sge_thresh_reg = csio_closest_thresh(sge, csio_intr_coalesce_cnt);
+	if (csio_intr_coalesce_cnt) {
+		csio_sge_thresh_reg = 0;
+		csio_sge_timer_reg = X_TIMERREG_RESTART_COUNTER;
+		return;
+	}
+
+	csio_sge_timer_reg = csio_closest_timer(sge, csio_intr_coalesce_time);
+}
+
+/*
+ * csio_wr_get_sge - Get SGE register values.
+ * @hw: HW module.
+ *
+ * Used by non-master functions and by master-functions relying on config file.
+ */
+static void
+csio_wr_get_sge(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	uint32_t ingpad;
+	int i;
+	u32 timer_value_0_and_1, timer_value_2_and_3, timer_value_4_and_5;
+	u32 ingress_rx_threshold;
+
+	sge->sge_control = csio_rd_reg32(hw, SGE_CONTROL);
+
+	ingpad = INGPADBOUNDARY_GET(sge->sge_control);
+
+	switch (ingpad) {
+	case X_INGPCIEBOUNDARY_32B:
+		sge->csio_fl_align = 32; break;
+	case X_INGPCIEBOUNDARY_64B:
+		sge->csio_fl_align = 64; break;
+	case X_INGPCIEBOUNDARY_128B:
+		sge->csio_fl_align = 128; break;
+	case X_INGPCIEBOUNDARY_256B:
+		sge->csio_fl_align = 256; break;
+	case X_INGPCIEBOUNDARY_512B:
+		sge->csio_fl_align = 512; break;
+	case X_INGPCIEBOUNDARY_1024B:
+		sge->csio_fl_align = 1024; break;
+	case X_INGPCIEBOUNDARY_2048B:
+		sge->csio_fl_align = 2048; break;
+	case X_INGPCIEBOUNDARY_4096B:
+		sge->csio_fl_align = 4096; break;
+	}
+
+	for (i = 0; i < CSIO_SGE_FL_SIZE_REGS; i++)
+		csio_get_flbuf_size(hw, sge, i);
+
+	timer_value_0_and_1 = csio_rd_reg32(hw, SGE_TIMER_VALUE_0_AND_1);
+	timer_value_2_and_3 = csio_rd_reg32(hw, SGE_TIMER_VALUE_2_AND_3);
+	timer_value_4_and_5 = csio_rd_reg32(hw, SGE_TIMER_VALUE_4_AND_5);
+
+	sge->timer_val[0] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE0_GET(timer_value_0_and_1));
+	sge->timer_val[1] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE1_GET(timer_value_0_and_1));
+	sge->timer_val[2] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE2_GET(timer_value_2_and_3));
+	sge->timer_val[3] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE3_GET(timer_value_2_and_3));
+	sge->timer_val[4] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE4_GET(timer_value_4_and_5));
+	sge->timer_val[5] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE5_GET(timer_value_4_and_5));
+
+	ingress_rx_threshold = csio_rd_reg32(hw, SGE_INGRESS_RX_THRESHOLD);
+	sge->counter_val[0] = THRESHOLD_0_GET(ingress_rx_threshold);
+	sge->counter_val[1] = THRESHOLD_1_GET(ingress_rx_threshold);
+	sge->counter_val[2] = THRESHOLD_2_GET(ingress_rx_threshold);
+	sge->counter_val[3] = THRESHOLD_3_GET(ingress_rx_threshold);
+
+	csio_init_intr_coalesce_parms(hw);
+}
+
+/*
+ * csio_wr_set_sge - Initialize SGE registers
+ * @hw: HW module.
+ *
+ * Used by Master function to initialize SGE registers in the absence
+ * of a config file.
+ */
+static void
+csio_wr_set_sge(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	int i;
+
+	/*
+	 * Set up our basic SGE mode to deliver CPL messages to our Ingress
+	 * Queue and Packet Date to the Free List.
+	 */
+	csio_set_reg_field(hw, SGE_CONTROL, RXPKTCPLMODE, RXPKTCPLMODE);
+
+	sge->sge_control = csio_rd_reg32(hw, SGE_CONTROL);
+
+	/* sge->csio_fl_align is set up by csio_wr_fixup_host_params(). */
+
+	/*
+	 * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows
+	 * and generate an interrupt when this occurs so we can recover.
+	 */
+	csio_set_reg_field(hw, SGE_DBFIFO_STATUS,
+			   HP_INT_THRESH(HP_INT_THRESH_MASK) |
+			   LP_INT_THRESH(LP_INT_THRESH_MASK),
+			   HP_INT_THRESH(CSIO_SGE_DBFIFO_INT_THRESH) |
+			   LP_INT_THRESH(CSIO_SGE_DBFIFO_INT_THRESH));
+	csio_set_reg_field(hw, SGE_DOORBELL_CONTROL, ENABLE_DROP,
+			   ENABLE_DROP);
+
+	/* SGE_FL_BUFFER_SIZE0 is set up by csio_wr_fixup_host_params(). */
+
+	CSIO_SET_FLBUF_SIZE(hw, 1, CSIO_SGE_FLBUF_SIZE1);
+	CSIO_SET_FLBUF_SIZE(hw, 2, CSIO_SGE_FLBUF_SIZE2);
+	CSIO_SET_FLBUF_SIZE(hw, 3, CSIO_SGE_FLBUF_SIZE3);
+	CSIO_SET_FLBUF_SIZE(hw, 4, CSIO_SGE_FLBUF_SIZE4);
+	CSIO_SET_FLBUF_SIZE(hw, 5, CSIO_SGE_FLBUF_SIZE5);
+	CSIO_SET_FLBUF_SIZE(hw, 6, CSIO_SGE_FLBUF_SIZE6);
+	CSIO_SET_FLBUF_SIZE(hw, 7, CSIO_SGE_FLBUF_SIZE7);
+	CSIO_SET_FLBUF_SIZE(hw, 8, CSIO_SGE_FLBUF_SIZE8);
+
+	for (i = 0; i < CSIO_SGE_FL_SIZE_REGS; i++)
+		csio_get_flbuf_size(hw, sge, i);
+
+	/* Initialize interrupt coalescing attributes */
+	sge->timer_val[0] = CSIO_SGE_TIMER_VAL_0;
+	sge->timer_val[1] = CSIO_SGE_TIMER_VAL_1;
+	sge->timer_val[2] = CSIO_SGE_TIMER_VAL_2;
+	sge->timer_val[3] = CSIO_SGE_TIMER_VAL_3;
+	sge->timer_val[4] = CSIO_SGE_TIMER_VAL_4;
+	sge->timer_val[5] = CSIO_SGE_TIMER_VAL_5;
+
+	sge->counter_val[0] = CSIO_SGE_INT_CNT_VAL_0;
+	sge->counter_val[1] = CSIO_SGE_INT_CNT_VAL_1;
+	sge->counter_val[2] = CSIO_SGE_INT_CNT_VAL_2;
+	sge->counter_val[3] = CSIO_SGE_INT_CNT_VAL_3;
+
+	csio_wr_reg32(hw, THRESHOLD_0(sge->counter_val[0]) |
+		      THRESHOLD_1(sge->counter_val[1]) |
+		      THRESHOLD_2(sge->counter_val[2]) |
+		      THRESHOLD_3(sge->counter_val[3]),
+		      SGE_INGRESS_RX_THRESHOLD);
+
+	csio_wr_reg32(hw,
+		   TIMERVALUE0(csio_us_to_core_ticks(hw, sge->timer_val[0])) |
+		   TIMERVALUE1(csio_us_to_core_ticks(hw, sge->timer_val[1])),
+		   SGE_TIMER_VALUE_0_AND_1);
+
+	csio_wr_reg32(hw,
+		   TIMERVALUE2(csio_us_to_core_ticks(hw, sge->timer_val[2])) |
+		   TIMERVALUE3(csio_us_to_core_ticks(hw, sge->timer_val[3])),
+		   SGE_TIMER_VALUE_2_AND_3);
+
+	csio_wr_reg32(hw,
+		   TIMERVALUE4(csio_us_to_core_ticks(hw, sge->timer_val[4])) |
+		   TIMERVALUE5(csio_us_to_core_ticks(hw, sge->timer_val[5])),
+		   SGE_TIMER_VALUE_4_AND_5);
+
+	csio_init_intr_coalesce_parms(hw);
+}
+
+void
+csio_wr_sge_init(struct csio_hw *hw)
+{
+	/*
+	 * If we are master:
+	 *    - If we plan to use the config file, we need to fixup some
+	 *      host specific registers, and read the rest of the SGE
+	 *      configuration.
+	 *    - If we dont plan to use the config file, we need to initialize
+	 *      SGE entirely, including fixing the host specific registers.
+	 * If we arent the master, we are only allowed to read and work off of
+	 *      the already initialized SGE values.
+	 *
+	 * Therefore, before calling this function, we assume that the master-
+	 * ship of the card, and whether to use config file or not, have
+	 * already been decided. In other words, CSIO_HWF_USING_SOFT_PARAMS and
+	 * CSIO_HWF_MASTER should be set/unset.
+	 */
+	if (csio_is_hw_master(hw)) {
+		csio_wr_fixup_host_params(hw);
+
+		if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS)
+			csio_wr_get_sge(hw);
+		else
+			csio_wr_set_sge(hw);
+	} else
+		csio_wr_get_sge(hw);
+}
+
+/*
+ * csio_wrm_init - Initialize Work request module.
+ * @wrm: WR module
+ * @hw: HW pointer
+ *
+ * Allocates memory for an array of queue pointers starting at q_arr.
+ */
+int
+csio_wrm_init(struct csio_wrm *wrm, struct csio_hw *hw)
+{
+	int i;
+
+	if (!wrm->num_q) {
+		csio_err(hw, "Num queues is not set\n");
+		return -EINVAL;
+	}
+
+	wrm->q_arr = kzalloc(sizeof(struct csio_q *) * wrm->num_q, GFP_KERNEL);
+	if (!wrm->q_arr)
+		goto err;
+
+	for (i = 0; i < wrm->num_q; i++) {
+		wrm->q_arr[i] = kzalloc(sizeof(struct csio_q), GFP_KERNEL);
+		if (!wrm->q_arr[i]) {
+			while (--i >= 0)
+				kfree(wrm->q_arr[i]);
+			goto err_free_arr;
+		}
+	}
+	wrm->free_qidx	= 0;
+
+	return 0;
+
+err_free_arr:
+	kfree(wrm->q_arr);
+err:
+	return -ENOMEM;
+}
+
+/*
+ * csio_wrm_exit - Initialize Work request module.
+ * @wrm: WR module
+ * @hw: HW module
+ *
+ * Uninitialize WR module. Free q_arr and pointers in it.
+ * We have the additional job of freeing the DMA memory associated
+ * with the queues.
+ */
+void
+csio_wrm_exit(struct csio_wrm *wrm, struct csio_hw *hw)
+{
+	int i;
+	uint32_t j;
+	struct csio_q *q;
+	struct csio_dma_buf *buf;
+
+	for (i = 0; i < wrm->num_q; i++) {
+		q = wrm->q_arr[i];
+
+		if (wrm->free_qidx && (i < wrm->free_qidx)) {
+			if (q->type == CSIO_FREELIST) {
+				if (!q->un.fl.bufs)
+					continue;
+				for (j = 0; j < q->credits; j++) {
+					buf = &q->un.fl.bufs[j];
+					if (!buf->vaddr)
+						continue;
+					pci_free_consistent(hw->pdev, buf->len,
+							    buf->vaddr,
+							    buf->paddr);
+				}
+				kfree(q->un.fl.bufs);
+			}
+			pci_free_consistent(hw->pdev, q->size,
+					    q->vstart, q->pstart);
+		}
+		kfree(q);
+	}
+
+	hw->flags &= ~CSIO_HWF_Q_MEM_ALLOCED;
+
+	kfree(wrm->q_arr);
+}
-- 
1.7.1

^ permalink raw reply related

* [V2 PATCH 0/9] csiostor: Chelsio FCoE offload driver submission
From: Naresh Kumar Inna @ 2012-09-05 12:33 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan

This is the initial submission of the Chelsio FCoE offload driver (csiostor)
to the upstream kernel. This driver currently supports FCoE offload
functionality over Chelsio T4-based 10Gb Converged Network Adapters.

The following patches contain the driver sources for csiostor driver and
updates to firmware/hardware header files shared between csiostor,
cxgb4 (Chelsio T4-based NIC driver) and cxgb4vf (Chelsio T4-based Virtual
Function NIC driver). The csiostor driver is dependent on these
header updates. These patches have been generated against scsi 'misc' branch.

csiostor is a low level SCSI driver that interfaces with PCI, SCSI midlayer and
FC transport subsystems. This driver claims the FCoE PCIe function on
Chelsio Converged Network Adapters. It relies on firmware events for slow path
operations like discovery, thereby offloading session management. The driver
programs firmware via Work Request interfaces for fast path I/O offload
features.

V2 patches address review comments from Nicholas Bellinger, Robert Love,
David Miller and Joe Perches. These changes are listed in the individual
patch emails. The common header updates resulted in changes to cxgb4vf as well,
which are included in this series.

Here is the brief description of patches:
[V2 PATCH 1/9]: Hardware interface, Makefile and Kconfig changes.
[V2 PATCH 2/9]: Driver initialization and Work Request services.
[V2 PATCH 3/9]: FC transport interfaces and mailbox services.
[V2 PATCH 4/9]: Local and remote port state tracking functionality.
[V2 PATCH 5/9]: Interrupt handling and fast path I/O functionality.
[V2 PATCH 6/9]: Header files part 1.
[V2 PATCH 7/9]: Header files part 2.
[V2 PATCH 8/9]: Updates to header files shared between cxgb4, cxgb4vf and
		csiostor.
[V2 PATCH 9/9]: Header file compatibility fixes to cxgb4vf.

Naresh Kumar Inna (9):
  csiostor: Chelsio FCoE offload driver submission (sources part 1).
  csiostor: Chelsio FCoE offload driver submission (sources part 2).
  csiostor: Chelsio FCoE offload driver submission (sources part 3).
  csiostor: Chelsio FCoE offload driver submission (sources part 4).
  csiostor: Chelsio FCoE offload driver submission (sources part 5).
  csiostor: Chelsio FCoE offload driver submission (headers part 1).
  csiostor: Chelsio FCoE offload driver submission (headers part 2).
  cxgb4: Chelsio FCoE offload driver submission (cxgb4 common header
    updates).
  cxgb4vf: Chelsio FCoE offload driver submission (header compatibility
    fixes).

 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |    2 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c        |   10 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c      |   16 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h     |    1 +
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h    |   69 +-
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h   |  104 +-
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c      |   11 +-
 drivers/scsi/Kconfig                            |    1 +
 drivers/scsi/Makefile                           |    1 +
 drivers/scsi/csiostor/Kconfig                   |   20 +
 drivers/scsi/csiostor/Makefile                  |   11 +
 drivers/scsi/csiostor/csio_attr.c               |  809 +++++
 drivers/scsi/csiostor/csio_defs.h               |  113 +
 drivers/scsi/csiostor/csio_hw.c                 | 4395 +++++++++++++++++++++++
 drivers/scsi/csiostor/csio_hw.h                 |  666 ++++
 drivers/scsi/csiostor/csio_init.c               | 1276 +++++++
 drivers/scsi/csiostor/csio_init.h               |  158 +
 drivers/scsi/csiostor/csio_isr.c                |  624 ++++
 drivers/scsi/csiostor/csio_lnode.c              | 2148 +++++++++++
 drivers/scsi/csiostor/csio_lnode.h              |  255 ++
 drivers/scsi/csiostor/csio_mb.c                 | 1769 +++++++++
 drivers/scsi/csiostor/csio_mb.h                 |  278 ++
 drivers/scsi/csiostor/csio_rnode.c              |  889 +++++
 drivers/scsi/csiostor/csio_rnode.h              |  141 +
 drivers/scsi/csiostor/csio_scsi.c               | 2561 +++++++++++++
 drivers/scsi/csiostor/csio_scsi.h               |  342 ++
 drivers/scsi/csiostor/csio_wr.c                 | 1632 +++++++++
 drivers/scsi/csiostor/csio_wr.h                 |  512 +++
 drivers/scsi/csiostor/t4fw_api_stor.h           |  578 +++
 29 files changed, 19355 insertions(+), 37 deletions(-)
 create mode 100644 drivers/scsi/csiostor/Kconfig
 create mode 100644 drivers/scsi/csiostor/Makefile
 create mode 100644 drivers/scsi/csiostor/csio_attr.c
 create mode 100644 drivers/scsi/csiostor/csio_defs.h
 create mode 100644 drivers/scsi/csiostor/csio_hw.c
 create mode 100644 drivers/scsi/csiostor/csio_hw.h
 create mode 100644 drivers/scsi/csiostor/csio_init.c
 create mode 100644 drivers/scsi/csiostor/csio_init.h
 create mode 100644 drivers/scsi/csiostor/csio_isr.c
 create mode 100644 drivers/scsi/csiostor/csio_lnode.c
 create mode 100644 drivers/scsi/csiostor/csio_lnode.h
 create mode 100644 drivers/scsi/csiostor/csio_mb.c
 create mode 100644 drivers/scsi/csiostor/csio_mb.h
 create mode 100644 drivers/scsi/csiostor/csio_rnode.c
 create mode 100644 drivers/scsi/csiostor/csio_rnode.h
 create mode 100644 drivers/scsi/csiostor/csio_scsi.c
 create mode 100644 drivers/scsi/csiostor/csio_scsi.h
 create mode 100644 drivers/scsi/csiostor/csio_wr.c
 create mode 100644 drivers/scsi/csiostor/csio_wr.h
 create mode 100644 drivers/scsi/csiostor/t4fw_api_stor.h

^ permalink raw reply

* [V2 PATCH 9/9] cxgb4vf: Chelsio FCoE offload driver submission (header compatibility fixes).
From: Naresh Kumar Inna @ 2012-09-05 12:34 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1346848442-4573-1-git-send-email-naresh@chelsio.com>

This patch contains minor fixes to make cxgb4vf driver work with the updates to
shared firmware/hardware header files.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c |   11 ++++++-----
 1 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 8877fbf..33fc1ca 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -536,7 +536,7 @@ static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl)
 	if (fl->pend_cred >= FL_PER_EQ_UNIT) {
 		wmb();
 		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
-			     DBPRIO |
+			     DBPRIO(1) |
 			     QID(fl->cntxt_id) |
 			     PIDX(fl->pend_cred / FL_PER_EQ_UNIT));
 		fl->pend_cred %= FL_PER_EQ_UNIT;
@@ -952,7 +952,7 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
 	 * Warn if we write doorbells with the wrong priority and write
 	 * descriptors before telling HW.
 	 */
-	WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO);
+	WARN_ON((QID(tq->cntxt_id) | PIDX(n)) & DBPRIO(1));
 	wmb();
 	t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
 		     QID(tq->cntxt_id) | PIDX(n));
@@ -2126,8 +2126,8 @@ int t4vf_sge_alloc_rxq(struct adapter *adapter, struct sge_rspq *rspq,
 		cmd.iqns_to_fl0congen =
 			cpu_to_be32(
 				FW_IQ_CMD_FL0HOSTFCMODE(SGE_HOSTFCMODE_NONE) |
-				FW_IQ_CMD_FL0PACKEN |
-				FW_IQ_CMD_FL0PADEN);
+				FW_IQ_CMD_FL0PACKEN(1) |
+				FW_IQ_CMD_FL0PADEN(1));
 		cmd.fl0dcaen_to_fl0cidxfthresh =
 			cpu_to_be16(
 				FW_IQ_CMD_FL0FBMIN(SGE_FETCHBURSTMIN_64B) |
@@ -2431,7 +2431,8 @@ int t4vf_sge_init(struct adapter *adapter)
 	 */
 	if (fl1)
 		FL_PG_ORDER = ilog2(fl1) - PAGE_SHIFT;
-	STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE) ? 128 : 64);
+	STAT_LEN = ((sge_params->sge_control & EGRSTATUSPAGESIZE(1)) ?
+		    128 : 64);
 	PKTSHIFT = PKTSHIFT_GET(sge_params->sge_control);
 	FL_ALIGN = 1 << (INGPADBOUNDARY_GET(sge_params->sge_control) +
 			 SGE_INGPADBOUNDARY_SHIFT);
-- 
1.7.1

^ permalink raw reply related

* [V2 PATCH 6/9] csiostor: Chelsio FCoE offload driver submission (headers part 1).
From: Naresh Kumar Inna @ 2012-09-05 12:33 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1346848442-4573-1-git-send-email-naresh@chelsio.com>

This patch contains the first set of the header files for csiostor driver.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
V2:
- Removed csio_fcoe_proto.h, using defines from include/scsi/fc instead.
- Removed driver-specific return values, using errno values instead.
- Retained CSIO_INC_STATS, since it is useful in multiple places and
  the name of the structure has been standardized to make use of this macro.
- Removed csio_deq_from_head(), replaced it inline with calls from list.h.
- Removed csio_deq_from_tail().
- Replaced state machine macros with static functions.
- Capitalizing macros with CPP keys.

 drivers/scsi/csiostor/csio_defs.h     |  113 ++++++
 drivers/scsi/csiostor/csio_hw.h       |  666 +++++++++++++++++++++++++++++++++
 drivers/scsi/csiostor/csio_init.h     |  158 ++++++++
 drivers/scsi/csiostor/t4fw_api_stor.h |  578 ++++++++++++++++++++++++++++
 4 files changed, 1515 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_defs.h
 create mode 100644 drivers/scsi/csiostor/csio_hw.h
 create mode 100644 drivers/scsi/csiostor/csio_init.h
 create mode 100644 drivers/scsi/csiostor/t4fw_api_stor.h

diff --git a/drivers/scsi/csiostor/csio_defs.h b/drivers/scsi/csiostor/csio_defs.h
new file mode 100644
index 0000000..5a82f63
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_defs.h
@@ -0,0 +1,113 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_DEFS_H__
+#define __CSIO_DEFS_H__
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+
+#define CSIO_ROUNDUP(__v, __r)		(((__v) + (__r) - 1) / (__r))
+#define CSIO_INVALID_IDX		0xFFFFFFFF
+#define CSIO_INC_STATS(elem, val)	((elem)->stats.val++)
+#define CSIO_DEC_STATS(elem, val)	((elem)->stats.val--)
+#define CSIO_VALID_WWN(__n)		((*__n >> 4) == 0x5 ? true : false)
+#define CSIO_DID_MASK			0xFFFFFF
+#define CSIO_WORD_TO_BYTE		4
+
+static inline int
+csio_list_deleted(struct list_head *list)
+{
+	return ((list->next == list) && (list->prev == list));
+}
+
+#define csio_list_next(elem)	(((struct list_head *)(elem))->next)
+#define csio_list_prev(elem)	(((struct list_head *)(elem))->prev)
+
+/* State machine */
+typedef void (*csio_sm_state_t)(void *, uint32_t);
+
+struct csio_sm {
+	struct list_head	sm_list;
+	csio_sm_state_t		sm_state;
+};
+
+static inline void
+csio_set_state(void *smp, void *state)
+{
+	((struct csio_sm *)smp)->sm_state = (csio_sm_state_t)state;
+}
+
+static inline void
+csio_init_state(struct csio_sm *smp, void *state)
+{
+	csio_set_state(smp, state);
+}
+
+static inline void
+csio_post_event(void *smp, uint32_t evt)
+{
+	((struct csio_sm *)smp)->sm_state(smp, evt);
+}
+
+static inline csio_sm_state_t
+csio_get_state(void *smp)
+{
+	return ((struct csio_sm *)smp)->sm_state;
+}
+
+static inline bool
+csio_match_state(void *smp, void *state)
+{
+	return (csio_get_state(smp) == (csio_sm_state_t)state);
+}
+
+#define	CSIO_ASSERT(cond)						\
+do {									\
+	if (unlikely(!((cond))))					\
+		BUG();                                                  \
+} while (0)
+
+#ifdef __CSIO_DEBUG__
+#define CSIO_DB_ASSERT(__c)		CSIO_ASSERT((__c))
+#else
+#define CSIO_DB_ASSERT(__c)
+#endif
+
+#endif /* ifndef __CSIO_DEFS_H__ */
diff --git a/drivers/scsi/csiostor/csio_hw.h b/drivers/scsi/csiostor/csio_hw.h
new file mode 100644
index 0000000..a58fdc9
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_hw.h
@@ -0,0 +1,666 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_HW_H__
+#define __CSIO_HW_H__
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/workqueue.h>
+#include <linux/compiler.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/io.h>
+#include <linux/spinlock_types.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include "csio_wr.h"
+#include "csio_mb.h"
+#include "csio_scsi.h"
+#include "csio_defs.h"
+#include "t4_regs.h"
+#include "t4_msg.h"
+
+/*
+ * An error value used by host. Should not clash with FW defined return values.
+ */
+#define	FW_HOSTERROR			255
+
+#define CSIO_FW_FNAME		"cxgb4/t4fw.bin"
+#define CSIO_CF_FNAME		"cxgb4/t4-config.txt"
+
+#define FW_VERSION_MAJOR	1
+#define FW_VERSION_MINOR	2
+#define FW_VERSION_MICRO	8
+
+#define CSIO_HW_NAME		"Chelsio FCoE Adapter"
+#define CSIO_MAX_PFN		8
+#define CSIO_MAX_PPORTS		4
+
+#define CSIO_MAX_LUN		0xFFFF
+#define CSIO_MAX_QUEUE		2048
+#define CSIO_MAX_CMD_PER_LUN	32
+#define CSIO_MAX_DDP_BUF_SIZE	(1024 * 1024)
+#define CSIO_MAX_SECTOR_SIZE	128
+
+/* Interrupts */
+#define CSIO_EXTRA_MSI_IQS	2	/* Extra iqs for INTX/MSI mode
+					 * (Forward intr iq + fw iq) */
+#define CSIO_EXTRA_VECS		2	/* non-data + FW evt */
+#define CSIO_MAX_SCSI_CPU	128
+#define CSIO_MAX_SCSI_QSETS	(CSIO_MAX_SCSI_CPU * CSIO_MAX_PPORTS)
+#define CSIO_MAX_MSIX_VECS	(CSIO_MAX_SCSI_QSETS + CSIO_EXTRA_VECS)
+
+/* Queues */
+enum {
+	CSIO_INTR_WRSIZE = 128,
+	CSIO_INTR_IQSIZE = ((CSIO_MAX_MSIX_VECS + 1) * CSIO_INTR_WRSIZE),
+	CSIO_FWEVT_WRSIZE = 128,
+	CSIO_FWEVT_IQLEN = 128,
+	CSIO_FWEVT_FLBUFS = 64,
+	CSIO_FWEVT_IQSIZE = (CSIO_FWEVT_WRSIZE * CSIO_FWEVT_IQLEN),
+	CSIO_HW_NIQ = 1,
+	CSIO_HW_NFLQ = 1,
+	CSIO_HW_NEQ = 1,
+	CSIO_HW_NINTXQ = 1,
+};
+
+struct csio_msix_entries {
+	unsigned short	vector;		/* Vector assigned by pci_enable_msix */
+	void		*dev_id;	/* Priv object associated w/ this msix*/
+	char		desc[24];	/* Description of this vector */
+};
+
+struct csio_scsi_qset {
+	int		iq_idx;		/* Ingress index */
+	int		eq_idx;		/* Egress index */
+	uint32_t	intr_idx;	/* MSIX Vector index */
+};
+
+struct csio_scsi_cpu_info {
+	int16_t	max_cpus;
+};
+
+extern int csio_dbg_level;
+extern int csio_force_master;
+extern unsigned int csio_port_mask;
+extern int csio_msi;
+
+#define CSIO_VENDOR_ID				0x1425
+#define CSIO_ASIC_DEVID_PROTO_MASK		0xFF00
+#define CSIO_ASIC_DEVID_TYPE_MASK		0x00FF
+#define CSIO_FPGA				0xA000
+#define CSIO_T4_FCOE_ASIC			0x4600
+
+#define CSIO_GLBL_INTR_MASK		(CIM | MPS | PL | PCIE | MC | EDC0 | \
+					 EDC1 | LE | TP | MA | PM_TX | PM_RX | \
+					 ULP_RX | CPL_SWITCH | SGE | \
+					 ULP_TX | SF)
+
+/*
+ * Hard parameters used to initialize the card in the absence of a
+ * configuration file.
+ */
+enum {
+	/* General */
+	CSIO_SGE_DBFIFO_INT_THRESH	= 10,
+
+	CSIO_SGE_RX_DMA_OFFSET		= 2,
+
+	CSIO_SGE_FLBUF_SIZE1		= 65536,
+	CSIO_SGE_FLBUF_SIZE2		= 1536,
+	CSIO_SGE_FLBUF_SIZE3		= 9024,
+	CSIO_SGE_FLBUF_SIZE4		= 9216,
+	CSIO_SGE_FLBUF_SIZE5		= 2048,
+	CSIO_SGE_FLBUF_SIZE6		= 128,
+	CSIO_SGE_FLBUF_SIZE7		= 8192,
+	CSIO_SGE_FLBUF_SIZE8		= 16384,
+
+	CSIO_SGE_TIMER_VAL_0		= 5,
+	CSIO_SGE_TIMER_VAL_1		= 10,
+	CSIO_SGE_TIMER_VAL_2		= 20,
+	CSIO_SGE_TIMER_VAL_3		= 50,
+	CSIO_SGE_TIMER_VAL_4		= 100,
+	CSIO_SGE_TIMER_VAL_5		= 200,
+
+	CSIO_SGE_INT_CNT_VAL_0		= 1,
+	CSIO_SGE_INT_CNT_VAL_1		= 4,
+	CSIO_SGE_INT_CNT_VAL_2		= 8,
+	CSIO_SGE_INT_CNT_VAL_3		= 16,
+
+	/* Storage specific - used by FW_PFVF_CMD */
+	CSIO_WX_CAPS			= FW_CMD_CAP_PF, /* w/x all */
+	CSIO_R_CAPS			= FW_CMD_CAP_PF, /* r all */
+	CSIO_NVI			= 4,
+	CSIO_NIQ_FLINT			= 34,
+	CSIO_NETH_CTRL			= 32,
+	CSIO_NEQ			= 66,
+	CSIO_NEXACTF			= 32,
+	CSIO_CMASK			= FW_PFVF_CMD_CMASK_MASK,
+	CSIO_PMASK			= FW_PFVF_CMD_PMASK_MASK,
+};
+
+/* Slowpath events */
+enum csio_evt {
+	CSIO_EVT_FW  = 0,	/* FW event */
+	CSIO_EVT_MBX,		/* MBX event */
+	CSIO_EVT_SCN,		/* State change notification */
+	CSIO_EVT_DEV_LOSS,	/* Device loss event */
+	CSIO_EVT_MAX,		/* Max supported event */
+};
+
+#define CSIO_EVT_MSG_SIZE	512
+#define CSIO_EVTQ_SIZE		512
+
+/* Event msg  */
+struct csio_evt_msg {
+	struct list_head	list;	/* evt queue*/
+	enum csio_evt		type;
+	uint8_t			data[CSIO_EVT_MSG_SIZE];
+};
+
+enum {
+	EEPROMVSIZE    = 32768, /* Serial EEPROM virtual address space size */
+	SERNUM_LEN     = 16,    /* Serial # length */
+	EC_LEN         = 16,    /* E/C length */
+	ID_LEN         = 16,    /* ID length */
+	TRACE_LEN      = 112,   /* length of trace data and mask */
+};
+
+enum {
+	SF_PAGE_SIZE = 256,           /* serial flash page size */
+	SF_SEC_SIZE = 64 * 1024,      /* serial flash sector size */
+	SF_SIZE = SF_SEC_SIZE * 16,   /* serial flash size */
+};
+
+enum { MEM_EDC0, MEM_EDC1, MEM_MC };
+
+enum {
+	MEMWIN0_APERTURE = 2048,
+	MEMWIN0_BASE     = 0x1b800,
+	MEMWIN1_APERTURE = 32768,
+	MEMWIN1_BASE     = 0x28000,
+	MEMWIN2_APERTURE = 65536,
+	MEMWIN2_BASE     = 0x30000,
+};
+
+/* serial flash and firmware constants */
+enum {
+	SF_ATTEMPTS = 10,             /* max retries for SF operations */
+
+	/* flash command opcodes */
+	SF_PROG_PAGE    = 2,          /* program page */
+	SF_WR_DISABLE   = 4,          /* disable writes */
+	SF_RD_STATUS    = 5,          /* read status register */
+	SF_WR_ENABLE    = 6,          /* enable writes */
+	SF_RD_DATA_FAST = 0xb,        /* read flash */
+	SF_RD_ID	= 0x9f,	      /* read ID */
+	SF_ERASE_SECTOR = 0xd8,       /* erase sector */
+
+	FW_START_SEC = 8,             /* first flash sector for FW */
+	FW_END_SEC = 15,              /* last flash sector for FW */
+	FW_IMG_START = FW_START_SEC * SF_SEC_SIZE,
+	FW_MAX_SIZE = (FW_END_SEC - FW_START_SEC + 1) * SF_SEC_SIZE,
+
+	FLASH_CFG_MAX_SIZE    = 0x10000 , /* max size of the flash config file*/
+	FLASH_CFG_OFFSET      = 0x1f0000,
+	FLASH_CFG_START_SEC   = FLASH_CFG_OFFSET / SF_SEC_SIZE,
+	FPGA_FLASH_CFG_OFFSET = 0xf0000 , /* if FPGA mode, then cfg file is
+					   * at 1MB - 64KB */
+	FPGA_FLASH_CFG_START_SEC  = FPGA_FLASH_CFG_OFFSET / SF_SEC_SIZE,
+};
+
+/*
+ * Flash layout.
+ */
+#define FLASH_START(start)	((start) * SF_SEC_SIZE)
+#define FLASH_MAX_SIZE(nsecs)	((nsecs) * SF_SEC_SIZE)
+
+enum {
+	/*
+	 * Location of firmware image in FLASH.
+	 */
+	FLASH_FW_START_SEC = 8,
+	FLASH_FW_NSECS = 8,
+	FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC),
+	FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS),
+
+};
+
+#undef FLASH_START
+#undef FLASH_MAX_SIZE
+
+/* Management module */
+enum {
+	CSIO_MGMT_EQ_WRSIZE = 512,
+	CSIO_MGMT_IQ_WRSIZE = 128,
+	CSIO_MGMT_EQLEN = 64,
+	CSIO_MGMT_IQLEN = 64,
+};
+
+#define CSIO_MGMT_EQSIZE	(CSIO_MGMT_EQLEN * CSIO_MGMT_EQ_WRSIZE)
+#define CSIO_MGMT_IQSIZE	(CSIO_MGMT_IQLEN * CSIO_MGMT_IQ_WRSIZE)
+
+/* mgmt module stats */
+struct csio_mgmtm_stats {
+	uint32_t	n_abort_req;		/* Total abort request */
+	uint32_t	n_abort_rsp;		/* Total abort response */
+	uint32_t	n_close_req;		/* Total close request */
+	uint32_t	n_close_rsp;		/* Total close response */
+	uint32_t	n_err;			/* Total Errors */
+	uint32_t	n_drop;			/* Total request dropped */
+	uint32_t	n_active;		/* Count of active_q */
+	uint32_t	n_cbfn;			/* Count of cbfn_q */
+};
+
+/* MGMT module */
+struct csio_mgmtm {
+	struct	csio_hw		*hw;		/* Pointer to HW moduel */
+	int			eq_idx;		/* Egress queue index */
+	int			iq_idx;		/* Ingress queue index */
+	int			msi_vec;	/* MSI vector */
+	struct list_head	active_q;	/* Outstanding ELS/CT */
+	struct list_head	abort_q;	/* Outstanding abort req */
+	struct list_head	cbfn_q;		/* Completion queue */
+	struct list_head	mgmt_req_freelist; /* Free poll of reqs */
+						/* ELSCT request freelist*/
+	struct timer_list	mgmt_timer;	/* MGMT timer */
+	struct csio_mgmtm_stats stats;		/* ELS/CT stats */
+};
+
+struct csio_adap_desc {
+	char model_no[16];
+	char description[32];
+};
+
+struct pci_params {
+	uint16_t   vendor_id;
+	uint16_t   device_id;
+	uint32_t   vpd_cap_addr;
+	uint16_t   speed;
+	uint8_t    width;
+};
+
+/* User configurable hw parameters */
+struct csio_hw_params {
+	uint32_t		sf_size;		/* serial flash
+							 * size in bytes
+							 */
+	uint32_t		sf_nsec;		/* # of flash sectors */
+	struct pci_params	pci;
+	uint32_t		log_level;		/* Module-level for
+							 * debug log.
+							 */
+};
+
+struct csio_vpd {
+	uint32_t cclk;
+	uint8_t ec[EC_LEN + 1];
+	uint8_t sn[SERNUM_LEN + 1];
+	uint8_t id[ID_LEN + 1];
+};
+
+struct csio_pport {
+	uint16_t	pcap;
+	uint8_t		portid;
+	uint8_t		link_status;
+	uint16_t	link_speed;
+	uint8_t		mac[6];
+	uint8_t		mod_type;
+	uint8_t		rsvd1;
+	uint8_t		rsvd2;
+	uint8_t		rsvd3;
+};
+
+/* fcoe resource information */
+struct csio_fcoe_res_info {
+	uint16_t	e_d_tov;
+	uint16_t	r_a_tov_seq;
+	uint16_t	r_a_tov_els;
+	uint16_t	r_r_tov;
+	uint32_t	max_xchgs;
+	uint32_t	max_ssns;
+	uint32_t	used_xchgs;
+	uint32_t	used_ssns;
+	uint32_t	max_fcfs;
+	uint32_t	max_vnps;
+	uint32_t	used_fcfs;
+	uint32_t	used_vnps;
+};
+
+/* HW State machine Events */
+enum csio_hw_ev {
+	CSIO_HWE_CFG = (uint32_t)1, /* Starts off the State machine */
+	CSIO_HWE_INIT,	         /* Config done, start Init      */
+	CSIO_HWE_INIT_DONE,      /* Init Mailboxes sent, HW ready */
+	CSIO_HWE_FATAL,		 /* Fatal error during initialization */
+	CSIO_HWE_PCIERR_DETECTED,/* PCI error recovery detetced */
+	CSIO_HWE_PCIERR_SLOT_RESET, /* Slot reset after PCI recoviery */
+	CSIO_HWE_PCIERR_RESUME,  /* Resume after PCI error recovery */
+	CSIO_HWE_QUIESCED,	 /* HBA quiesced */
+	CSIO_HWE_HBA_RESET,      /* HBA reset requested */
+	CSIO_HWE_HBA_RESET_DONE, /* HBA reset completed */
+	CSIO_HWE_FW_DLOAD,       /* FW download requested */
+	CSIO_HWE_PCI_REMOVE,     /* PCI de-instantiation */
+	CSIO_HWE_SUSPEND,        /* HW suspend for Online(hot) replacement */
+	CSIO_HWE_RESUME,         /* HW resume for Online(hot) replacement */
+	CSIO_HWE_MAX,		 /* Max HW event */
+};
+
+/* hw stats */
+struct csio_hw_stats {
+	uint32_t	n_evt_activeq;	/* Number of event in active Q */
+	uint32_t	n_evt_freeq;	/* Number of event in free Q */
+	uint32_t	n_evt_drop;	/* Number of event droped */
+	uint32_t	n_evt_unexp;	/* Number of unexpected events */
+	uint32_t	n_pcich_offline;/* Number of pci channel offline */
+	uint32_t	n_lnlkup_miss;  /* Number of lnode lookup miss */
+	uint32_t	n_cpl_fw6_msg;	/* Number of cpl fw6 message*/
+	uint32_t	n_cpl_fw6_pld;	/* Number of cpl fw6 payload*/
+	uint32_t	n_cpl_unexp;	/* Number of unexpected cpl */
+	uint32_t	n_mbint_unexp;	/* Number of unexpected mbox */
+					/* interrupt */
+	uint32_t	n_plint_unexp;	/* Number of unexpected PL */
+					/* interrupt */
+	uint32_t	n_plint_cnt;	/* Number of PL interrupt */
+	uint32_t	n_int_stray;	/* Number of stray interrupt */
+	uint32_t	n_err;		/* Number of hw errors */
+	uint32_t	n_err_fatal;	/* Number of fatal errors */
+	uint32_t	n_err_nomem;	/* Number of memory alloc failure */
+	uint32_t	n_err_io;	/* Number of IO failure */
+	enum csio_hw_ev	n_evt_sm[CSIO_HWE_MAX];	/* Number of sm events */
+	uint64_t	n_reset_start;  /* Start time after the reset */
+	uint32_t	rsvd1;
+};
+
+/* Defines for hw->flags */
+#define CSIO_HWF_MASTER			0x00000001	/* This is the Master
+							 * function for the
+							 * card.
+							 */
+#define	CSIO_HWF_HW_INTR_ENABLED	0x00000002	/* Are HW Interrupt
+							 * enable bit set?
+							 */
+#define	CSIO_HWF_FWEVT_PENDING		0x00000004	/* FW events pending */
+#define	CSIO_HWF_Q_MEM_ALLOCED		0x00000008	/* Queues have been
+							 * allocated memory.
+							 */
+#define	CSIO_HWF_Q_FW_ALLOCED		0x00000010	/* Queues have been
+							 * allocated in FW.
+							 */
+#define CSIO_HWF_VPD_VALID		0x00000020	/* Valid VPD copied */
+#define CSIO_HWF_DEVID_CACHED		0X00000040	/* PCI vendor & device
+							 * id cached */
+#define	CSIO_HWF_FWEVT_STOP		0x00000080	/* Stop processing
+							 * FW events
+							 */
+#define CSIO_HWF_USING_SOFT_PARAMS	0x00000100      /* Using FW config
+							 * params
+							 */
+#define	CSIO_HWF_HOST_INTR_ENABLED	0x00000200	/* Are host interrupts
+							 * enabled?
+							 */
+
+#define csio_is_hw_intr_enabled(__hw)	\
+				((__hw)->flags & CSIO_HWF_HW_INTR_ENABLED)
+#define csio_is_host_intr_enabled(__hw)	\
+				((__hw)->flags & CSIO_HWF_HOST_INTR_ENABLED)
+#define csio_is_hw_master(__hw)		((__hw)->flags & CSIO_HWF_MASTER)
+#define csio_is_valid_vpd(__hw)		((__hw)->flags & CSIO_HWF_VPD_VALID)
+#define csio_is_dev_id_cached(__hw)	((__hw)->flags & CSIO_HWF_DEVID_CACHED)
+#define csio_valid_vpd_copied(__hw)	((__hw)->flags |= CSIO_HWF_VPD_VALID)
+#define csio_dev_id_cached(__hw)	((__hw)->flags |= CSIO_HWF_DEVID_CACHED)
+
+/* Defines for intr_mode */
+enum csio_intr_mode {
+	CSIO_IM_NONE = 0,
+	CSIO_IM_INTX = 1,
+	CSIO_IM_MSI  = 2,
+	CSIO_IM_MSIX = 3,
+};
+
+/* Master HW structure: One per function */
+struct csio_hw {
+	struct csio_sm		sm;			/* State machine: should
+							 * be the 1st member.
+							 */
+	spinlock_t		lock;			/* Lock for hw */
+
+	struct csio_scsim	scsim;			/* SCSI module*/
+	struct csio_wrm		wrm;			/* Work request module*/
+	struct pci_dev		*pdev;			/* PCI device */
+
+	void __iomem		*regstart;		/* Virtual address of
+							 * register map
+							 */
+	/* SCSI queue sets */
+	uint32_t		num_sqsets;		/* Number of SCSI
+							 * queue sets */
+	uint32_t		num_scsi_msix_cpus;	/* Number of CPUs that
+							 * will be used
+							 * for ingress
+							 * processing.
+							 */
+
+	struct csio_scsi_qset	sqset[CSIO_MAX_PPORTS][CSIO_MAX_SCSI_CPU];
+	struct csio_scsi_cpu_info scsi_cpu_info[CSIO_MAX_PPORTS];
+
+	uint32_t		evtflag;		/* Event flag  */
+	uint32_t		flags;			/* HW flags */
+
+	struct csio_mgmtm	mgmtm;			/* management module */
+	struct csio_mbm		mbm;			/* Mailbox module */
+
+	/* Lnodes */
+	uint32_t		num_lns;		/* Number of lnodes */
+	struct csio_lnode	*rln;			/* Root lnode */
+	struct list_head	sln_head;		/* Sibling node list
+							 * list
+							 */
+	int			intr_iq_idx;		/* Forward interrupt
+							 * queue.
+							 */
+	int			fwevt_iq_idx;		/* FW evt queue */
+	struct work_struct	evtq_work;		/* Worker thread for
+							 * HW events.
+							 */
+	struct list_head	evt_free_q;		/* freelist of evt
+							 * elements
+							 */
+	struct list_head	evt_active_q;		/* active evt queue*/
+
+	/* board related info */
+	char			name[32];
+	char			hw_ver[16];
+	char			model_desc[32];
+	char			drv_version[32];
+	char			fwrev_str[32];
+	uint32_t		optrom_ver;
+	uint32_t		fwrev;
+	uint32_t		tp_vers;
+	char			chip_ver;
+	uint32_t		cfg_finiver;
+	uint32_t		cfg_finicsum;
+	uint32_t		cfg_cfcsum;
+	uint8_t			cfg_csum_status;
+	uint8_t			cfg_store;
+	enum csio_dev_state	fw_state;
+	struct csio_vpd		vpd;
+
+	uint8_t			pfn;			/* Physical Function
+							 * number
+							 */
+	uint32_t		port_vec;		/* Port vector */
+	uint8_t			num_pports;		/* Number of physical
+							 * ports.
+							 */
+	uint8_t			rst_retries;		/* Reset retries */
+	uint8_t			cur_evt;		/* current s/m evt */
+	uint8_t			prev_evt;		/* Previous s/m evt */
+	uint32_t		dev_num;		/* device number */
+	struct csio_pport	pport[CSIO_MAX_PPORTS];	/* Ports (XGMACs) */
+	struct csio_hw_params	params;			/* Hw parameters */
+
+	struct pci_pool		*scsi_pci_pool;		/* PCI pool for SCSI */
+	mempool_t		*mb_mempool;		/* Mailbox memory pool*/
+	mempool_t		*rnode_mempool;		/* rnode memory pool */
+
+	/* Interrupt */
+	enum csio_intr_mode	intr_mode;		/* INTx, MSI, MSIX */
+	uint32_t		fwevt_intr_idx;		/* FW evt MSIX/interrupt
+							 * index
+							 */
+	uint32_t		nondata_intr_idx;	/* nondata MSIX/intr
+							 * idx
+							 */
+
+	uint8_t			cfg_neq;		/* FW configured no of
+							 * egress queues
+							 */
+	uint8_t			cfg_niq;		/* FW configured no of
+							 * iq queues.
+							 */
+
+	struct csio_fcoe_res_info  fres_info;		/* Fcoe resource info */
+
+	/* MSIX vectors */
+	struct csio_msix_entries msix_entries[CSIO_MAX_MSIX_VECS];
+
+	struct dentry		*debugfs_root;		/* Debug FS */
+	struct csio_hw_stats	stats;			/* Hw statistics */
+};
+
+/* Register access macros */
+#define csio_reg(_b, _r)		((_b) + (_r))
+
+#define	csio_rd_reg8(_h, _r)		readb(csio_reg((_h)->regstart, (_r)))
+#define	csio_rd_reg16(_h, _r)		readw(csio_reg((_h)->regstart, (_r)))
+#define	csio_rd_reg32(_h, _r)		readl(csio_reg((_h)->regstart, (_r)))
+#define	csio_rd_reg64(_h, _r)		readq(csio_reg((_h)->regstart, (_r)))
+
+#define	csio_wr_reg8(_h, _v, _r)	writeb((_v), \
+						csio_reg((_h)->regstart, (_r)))
+#define	csio_wr_reg16(_h, _v, _r)	writew((_v), \
+						csio_reg((_h)->regstart, (_r)))
+#define	csio_wr_reg32(_h, _v, _r)	writel((_v), \
+						csio_reg((_h)->regstart, (_r)))
+#define	csio_wr_reg64(_h, _v, _r)	writeq((_v), \
+						csio_reg((_h)->regstart, (_r)))
+
+void csio_set_reg_field(struct csio_hw *, uint32_t, uint32_t, uint32_t);
+
+/* Core clocks <==> uSecs */
+static inline uint32_t
+csio_core_ticks_to_us(struct csio_hw *hw, uint32_t ticks)
+{
+	/* add Core Clock / 2 to round ticks to nearest uS */
+	return (ticks * 1000 + hw->vpd.cclk/2) / hw->vpd.cclk;
+}
+
+static inline uint32_t
+csio_us_to_core_ticks(struct csio_hw *hw, uint32_t us)
+{
+	return (us * hw->vpd.cclk) / 1000;
+}
+
+/* Easy access macros */
+#define csio_hw_to_wrm(hw)		((struct csio_wrm *)(&(hw)->wrm))
+#define csio_hw_to_mbm(hw)		((struct csio_mbm *)(&(hw)->mbm))
+#define csio_hw_to_scsim(hw)		((struct csio_scsim *)(&(hw)->scsim))
+#define csio_hw_to_mgmtm(hw)		((struct csio_mgmtm *)(&(hw)->mgmtm))
+
+#define CSIO_PCI_BUS(hw)		((hw)->pdev->bus->number)
+#define CSIO_PCI_DEV(hw)		(PCI_SLOT((hw)->pdev->devfn))
+#define CSIO_PCI_FUNC(hw)		(PCI_FUNC((hw)->pdev->devfn))
+
+#define csio_set_fwevt_intr_idx(_h, _i)		((_h)->fwevt_intr_idx = (_i))
+#define csio_get_fwevt_intr_idx(_h)		((_h)->fwevt_intr_idx)
+#define csio_set_nondata_intr_idx(_h, _i)	((_h)->nondata_intr_idx = (_i))
+#define csio_get_nondata_intr_idx(_h)		((_h)->nondata_intr_idx)
+
+/* Printing/logging */
+#define CSIO_DEVID(__dev)		((__dev)->dev_num)
+#define CSIO_DEVID_LO(__dev)		(CSIO_DEVID((__dev)) & 0xFFFF)
+#define CSIO_DEVID_HI(__dev)		((CSIO_DEVID((__dev)) >> 16) & 0xFFFF)
+
+#define csio_info(__hw, __fmt, ...)					\
+			dev_info(&(__hw)->pdev->dev, __fmt, ##__VA_ARGS__)
+
+#define csio_fatal(__hw, __fmt, ...)					\
+			dev_crit(&(__hw)->pdev->dev, __fmt, ##__VA_ARGS__)
+
+#define csio_err(__hw, __fmt, ...)					\
+			dev_err(&(__hw)->pdev->dev, __fmt, ##__VA_ARGS__)
+
+#define csio_warn(__hw, __fmt, ...)					\
+			dev_warn(&(__hw)->pdev->dev, __fmt, ##__VA_ARGS__)
+
+#ifdef __CSIO_DEBUG__
+#define csio_dbg(__hw, __fmt, ...)					\
+			csio_info((__hw), __fmt, ##__VA_ARGS__);
+#else
+#define csio_dbg(__hw, __fmt, ...)
+#endif
+
+int csio_mgmt_req_lookup(struct csio_mgmtm *, struct csio_ioreq *);
+void csio_hw_intr_disable(struct csio_hw *);
+int csio_hw_slow_intr_handler(struct csio_hw *hw);
+int csio_hw_start(struct csio_hw *);
+int csio_hw_stop(struct csio_hw *);
+int csio_hw_reset(struct csio_hw *);
+int csio_is_hw_ready(struct csio_hw *);
+int csio_is_hw_removing(struct csio_hw *);
+
+int csio_fwevtq_handler(struct csio_hw *);
+void csio_evtq_worker(struct work_struct *);
+int csio_enqueue_evt(struct csio_hw *hw, enum csio_evt type,
+				void *evt_msg, uint16_t len);
+void csio_evtq_flush(struct csio_hw *hw);
+
+int csio_request_irqs(struct csio_hw *);
+void csio_intr_enable(struct csio_hw *);
+void csio_intr_disable(struct csio_hw *, bool);
+
+struct csio_lnode *csio_lnode_alloc(struct csio_hw *);
+int csio_config_queues(struct csio_hw *);
+
+int csio_hw_mc_read(struct csio_hw *, uint32_t,
+			      uint32_t *, uint64_t *);
+int csio_hw_edc_read(struct csio_hw *, int, uint32_t, uint32_t *,
+			       uint64_t *);
+int csio_hw_init(struct csio_hw *);
+void csio_hw_exit(struct csio_hw *);
+#endif /* ifndef __CSIO_HW_H__ */
diff --git a/drivers/scsi/csiostor/csio_init.h b/drivers/scsi/csiostor/csio_init.h
new file mode 100644
index 0000000..0838fd7
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_init.h
@@ -0,0 +1,158 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_INIT_H__
+#define __CSIO_INIT_H__
+
+#include <linux/pci.h>
+#include <linux/if_ether.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include "csio_scsi.h"
+#include "csio_lnode.h"
+#include "csio_rnode.h"
+#include "csio_hw.h"
+
+#define CSIO_DRV_AUTHOR			"Chelsio Communications"
+#define CSIO_DRV_LICENSE		"Dual BSD/GPL"
+#define CSIO_DRV_DESC			"Chelsio FCoE driver"
+#define CSIO_DRV_VERSION		"1.0.0"
+
+#define CSIO_DEVICE(devid, idx)					\
+{ PCI_VENDOR_ID_CHELSIO, (devid), PCI_ANY_ID, PCI_ANY_ID, 0, 0, (idx) }
+
+#define CSIO_IS_T4_FPGA(_dev)		(((_dev) == CSIO_DEVID_PE10K) ||\
+					 ((_dev) == CSIO_DEVID_PE10K_PF1))
+
+/* FCoE device IDs */
+#define CSIO_DEVID_PE10K		0xA000
+#define CSIO_DEVID_PE10K_PF1		0xA001
+#define CSIO_DEVID_T440DBG_FCOE		0x4600
+#define CSIO_DEVID_T420CR_FCOE		0x4601
+#define CSIO_DEVID_T422CR_FCOE		0x4602
+#define CSIO_DEVID_T440CR_FCOE		0x4603
+#define CSIO_DEVID_T420BCH_FCOE		0x4604
+#define CSIO_DEVID_T440BCH_FCOE		0x4605
+#define CSIO_DEVID_T440CH_FCOE		0x4606
+#define CSIO_DEVID_T420SO_FCOE		0x4607
+#define CSIO_DEVID_T420CX_FCOE		0x4608
+#define CSIO_DEVID_T420BT_FCOE		0x4609
+#define CSIO_DEVID_T404BT_FCOE		0x460A
+#define CSIO_DEVID_B420_FCOE		0x460B
+#define CSIO_DEVID_B404_FCOE		0x460C
+#define CSIO_DEVID_T480CR_FCOE		0x460D
+#define CSIO_DEVID_T440LPCR_FCOE	0x460E
+
+extern struct fc_function_template csio_fc_transport_funcs;
+extern struct fc_function_template csio_fc_transport_vport_funcs;
+
+void csio_fchost_attr_init(struct csio_lnode *);
+
+/* INTx handlers */
+void csio_scsi_intx_handler(struct csio_hw *, void *, uint32_t,
+			       struct csio_fl_dma_buf *, void *);
+
+void csio_fwevt_intx_handler(struct csio_hw *, void *, uint32_t,
+				struct csio_fl_dma_buf *, void *);
+
+/* Common os lnode APIs */
+void csio_lnodes_block_request(struct csio_hw *);
+void csio_lnodes_unblock_request(struct csio_hw *);
+void csio_lnodes_block_by_port(struct csio_hw *, uint8_t);
+void csio_lnodes_unblock_by_port(struct csio_hw *, uint8_t);
+
+struct csio_lnode *csio_shost_init(struct csio_hw *, struct device *, bool,
+					struct csio_lnode *);
+void csio_shost_exit(struct csio_lnode *);
+void csio_lnodes_exit(struct csio_hw *, bool);
+
+static inline struct Scsi_Host *
+csio_ln_to_shost(struct csio_lnode *ln)
+{
+	return container_of((void *)ln, struct Scsi_Host, hostdata[0]);
+}
+
+/* SCSI -- locking version of get/put ioreqs  */
+static inline struct csio_ioreq *
+csio_get_scsi_ioreq_lock(struct csio_hw *hw, struct csio_scsim *scsim)
+{
+	struct csio_ioreq *ioreq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scsim->freelist_lock, flags);
+	ioreq = csio_get_scsi_ioreq(scsim);
+	spin_unlock_irqrestore(&scsim->freelist_lock, flags);
+
+	return ioreq;
+}
+
+static inline void
+csio_put_scsi_ioreq_lock(struct csio_hw *hw, struct csio_scsim *scsim,
+			 struct csio_ioreq *ioreq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&scsim->freelist_lock, flags);
+	csio_put_scsi_ioreq(scsim, ioreq);
+	spin_unlock_irqrestore(&scsim->freelist_lock, flags);
+}
+
+/* Called in interrupt context */
+static inline void
+csio_put_scsi_ioreq_list_lock(struct csio_hw *hw, struct csio_scsim *scsim,
+			      struct list_head *reqlist, int n)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&scsim->freelist_lock, flags);
+	csio_put_scsi_ioreq_list(scsim, reqlist, n);
+	spin_unlock_irqrestore(&scsim->freelist_lock, flags);
+}
+
+/* Called in interrupt context */
+static inline void
+csio_put_scsi_ddp_list_lock(struct csio_hw *hw, struct csio_scsim *scsim,
+			      struct list_head *reqlist, int n)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&hw->lock, flags);
+	csio_put_scsi_ddp_list(scsim, reqlist, n);
+	spin_unlock_irqrestore(&hw->lock, flags);
+}
+
+#endif /* ifndef __CSIO_INIT_H__ */
diff --git a/drivers/scsi/csiostor/t4fw_api_stor.h b/drivers/scsi/csiostor/t4fw_api_stor.h
new file mode 100644
index 0000000..b96903a
--- /dev/null
+++ b/drivers/scsi/csiostor/t4fw_api_stor.h
@@ -0,0 +1,578 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _T4FW_API_STOR_H_
+#define _T4FW_API_STOR_H_
+
+
+/******************************************************************************
+ *   R E T U R N   V A L U E S
+ ********************************/
+
+enum fw_retval {
+	FW_SUCCESS		= 0,	/* completed sucessfully */
+	FW_EPERM		= 1,	/* operation not permitted */
+	FW_ENOENT		= 2,	/* no such file or directory */
+	FW_EIO			= 5,	/* input/output error; hw bad */
+	FW_ENOEXEC		= 8,	/* exec format error; inv microcode */
+	FW_EAGAIN		= 11,	/* try again */
+	FW_ENOMEM		= 12,	/* out of memory */
+	FW_EFAULT		= 14,	/* bad address; fw bad */
+	FW_EBUSY		= 16,	/* resource busy */
+	FW_EEXIST		= 17,	/* file exists */
+	FW_EINVAL		= 22,	/* invalid argument */
+	FW_ENOSPC		= 28,	/* no space left on device */
+	FW_ENOSYS		= 38,	/* functionality not implemented */
+	FW_EPROTO		= 71,	/* protocol error */
+	FW_EADDRINUSE		= 98,	/* address already in use */
+	FW_EADDRNOTAVAIL	= 99,	/* cannot assigned requested address */
+	FW_ENETDOWN		= 100,	/* network is down */
+	FW_ENETUNREACH		= 101,	/* network is unreachable */
+	FW_ENOBUFS		= 105,	/* no buffer space available */
+	FW_ETIMEDOUT		= 110,	/* timeout */
+	FW_EINPROGRESS		= 115,	/* fw internal */
+	FW_SCSI_ABORT_REQUESTED	= 128,	/* */
+	FW_SCSI_ABORT_TIMEDOUT	= 129,	/* */
+	FW_SCSI_ABORTED		= 130,	/* */
+	FW_SCSI_CLOSE_REQUESTED	= 131,	/* */
+	FW_ERR_LINK_DOWN	= 132,	/* */
+	FW_RDEV_NOT_READY	= 133,	/* */
+	FW_ERR_RDEV_LOST	= 134,	/* */
+	FW_ERR_RDEV_LOGO	= 135,	/* */
+	FW_FCOE_NO_XCHG		= 136,	/* */
+	FW_SCSI_RSP_ERR		= 137,	/* */
+	FW_ERR_RDEV_IMPL_LOGO	= 138,	/* */
+	FW_SCSI_UNDER_FLOW_ERR  = 139,	/* */
+	FW_SCSI_OVER_FLOW_ERR   = 140,	/* */
+	FW_SCSI_DDP_ERR		= 141,	/* DDP error*/
+	FW_SCSI_TASK_ERR	= 142,	/* No SCSI tasks available */
+};
+
+enum fw_fcoe_link_sub_op {
+	FCOE_LINK_DOWN	= 0x0,
+	FCOE_LINK_UP	= 0x1,
+	FCOE_LINK_COND	= 0x2,
+};
+
+enum fw_fcoe_link_status {
+	FCOE_LINKDOWN	= 0x0,
+	FCOE_LINKUP	= 0x1,
+};
+
+enum fw_ofld_prot {
+	PROT_FCOE	= 0x1,
+	PROT_ISCSI	= 0x2,
+};
+
+enum rport_type_fcoe {
+	FLOGI_VFPORT	= 0x1,		/* 0xfffffe */
+	FDISC_VFPORT	= 0x2,		/* 0xfffffe */
+	NS_VNPORT	= 0x3,		/* 0xfffffc */
+	REG_FC4_VNPORT	= 0x4,		/* any FC4 type VN_PORT */
+	REG_VNPORT	= 0x5,		/* 0xfffxxx - non FC4 port in switch */
+	FDMI_VNPORT	= 0x6,		/* 0xfffffa */
+	FAB_CTLR_VNPORT	= 0x7,		/* 0xfffffd */
+};
+
+enum event_cause_fcoe {
+	PLOGI_ACC_RCVD		= 0x01,
+	PLOGI_RJT_RCVD		= 0x02,
+	PLOGI_RCVD		= 0x03,
+	PLOGO_RCVD		= 0x04,
+	PRLI_ACC_RCVD		= 0x05,
+	PRLI_RJT_RCVD		= 0x06,
+	PRLI_RCVD		= 0x07,
+	PRLO_RCVD		= 0x08,
+	NPORT_ID_CHGD		= 0x09,
+	FLOGO_RCVD		= 0x0a,
+	CLR_VIRT_LNK_RCVD	= 0x0b,
+	FLOGI_ACC_RCVD		= 0x0c,
+	FLOGI_RJT_RCVD		= 0x0d,
+	FDISC_ACC_RCVD		= 0x0e,
+	FDISC_RJT_RCVD		= 0x0f,
+	FLOGI_TMO_MAX_RETRY	= 0x10,
+	IMPL_LOGO_ADISC_ACC	= 0x11,
+	IMPL_LOGO_ADISC_RJT	= 0x12,
+	IMPL_LOGO_ADISC_CNFLT	= 0x13,
+	PRLI_TMO		= 0x14,
+	ADISC_TMO		= 0x15,
+	RSCN_DEV_LOST		= 0x16,
+	SCR_ACC_RCVD		= 0x17,
+	ADISC_RJT_RCVD		= 0x18,
+	LOGO_SNT		= 0x19,
+	PROTO_ERR_IMPL_LOGO	= 0x1a,
+};
+
+enum fcoe_cmn_type {
+	FCOE_ELS,
+	FCOE_CT,
+	FCOE_SCSI_CMD,
+	FCOE_UNSOL_ELS,
+};
+
+enum fw_wr_stor_opcodes {
+	FW_RDEV_WR                     = 0x38,
+	FW_FCOE_ELS_CT_WR              = 0x30,
+	FW_SCSI_WRITE_WR               = 0x31,
+	FW_SCSI_READ_WR                = 0x32,
+	FW_SCSI_CMD_WR                 = 0x33,
+	FW_SCSI_ABRT_CLS_WR            = 0x34,
+};
+
+struct fw_rdev_wr {
+	__be32 op_to_immdlen;
+	__be32 alloc_to_len16;
+	__be64 cookie;
+	u8     protocol;
+	u8     event_cause;
+	u8     cur_state;
+	u8     prev_state;
+	__be32 flags_to_assoc_flowid;
+	union rdev_entry {
+		struct fcoe_rdev_entry {
+			__be32 flowid;
+			u8     protocol;
+			u8     event_cause;
+			u8     flags;
+			u8     rjt_reason;
+			u8     cur_login_st;
+			u8     prev_login_st;
+			__be16 rcv_fr_sz;
+			u8     rd_xfer_rdy_to_rport_type;
+			u8     vft_to_qos;
+			u8     org_proc_assoc_to_acc_rsp_code;
+			u8     enh_disc_to_tgt;
+			u8     wwnn[8];
+			u8     wwpn[8];
+			__be16 iqid;
+			u8     fc_oui[3];
+			u8     r_id[3];
+		} fcoe_rdev;
+		struct iscsi_rdev_entry {
+			__be32 flowid;
+			u8     protocol;
+			u8     event_cause;
+			u8     flags;
+			u8     r3;
+			__be16 iscsi_opts;
+			__be16 tcp_opts;
+			__be16 ip_opts;
+			__be16 max_rcv_len;
+			__be16 max_snd_len;
+			__be16 first_brst_len;
+			__be16 max_brst_len;
+			__be16 r4;
+			__be16 def_time2wait;
+			__be16 def_time2ret;
+			__be16 nop_out_intrvl;
+			__be16 non_scsi_to;
+			__be16 isid;
+			__be16 tsid;
+			__be16 port;
+			__be16 tpgt;
+			u8     r5[6];
+			__be16 iqid;
+		} iscsi_rdev;
+	} u;
+};
+
+#define FW_RDEV_WR_FLOWID_GET(x)	(((x) >> 8) & 0xfffff)
+#define FW_RDEV_WR_ASSOC_FLOWID_GET(x)	(((x) >> 0) & 0xfffff)
+#define FW_RDEV_WR_RPORT_TYPE_GET(x)	(((x) >> 0) & 0x1f)
+#define FW_RDEV_WR_NPIV_GET(x)		(((x) >> 6) & 0x1)
+#define FW_RDEV_WR_CLASS_GET(x)		(((x) >> 4) & 0x3)
+#define FW_RDEV_WR_TASK_RETRY_ID_GET(x)	(((x) >> 5) & 0x1)
+#define FW_RDEV_WR_RETRY_GET(x)		(((x) >> 4) & 0x1)
+#define FW_RDEV_WR_CONF_CMPL_GET(x)	(((x) >> 3) & 0x1)
+#define FW_RDEV_WR_INI_GET(x)		(((x) >> 1) & 0x1)
+#define FW_RDEV_WR_TGT_GET(x)		(((x) >> 0) & 0x1)
+
+struct fw_fcoe_els_ct_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     els_ct_type;
+	u8     ctl_pri;
+	u8     cp_en_class;
+	__be16 xfer_cnt;
+	u8     fl_to_sp;
+	u8     l_id[3];
+	u8     r5;
+	u8     r_id[3];
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r6;
+};
+
+#define FW_FCOE_ELS_CT_WR_OPCODE(x)		((x) << 24)
+#define FW_FCOE_ELS_CT_WR_OPCODE_GET(x)		(((x) >> 24) & 0xff)
+#define FW_FCOE_ELS_CT_WR_IMMDLEN(x)		((x) << 0)
+#define FW_FCOE_ELS_CT_WR_IMMDLEN_GET(x)	(((x) >> 0) & 0xff)
+#define FW_FCOE_ELS_CT_WR_SP(x)			((x) << 0)
+
+struct fw_scsi_write_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     use_xfer_cnt;
+	union fw_scsi_write_priv {
+		struct fcoe_write_priv {
+			u8   ctl_pri;
+			u8   cp_en_class;
+			u8   r3_lo[2];
+		} fcoe;
+		struct iscsi_write_priv {
+			u8   r3[4];
+		} iscsi;
+	} u;
+	__be32 xfer_cnt;
+	__be32 ini_xfer_cnt;
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r4;
+};
+
+#define FW_SCSI_WRITE_WR_IMMDLEN(x)	((x) << 0)
+
+struct fw_scsi_read_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     use_xfer_cnt;
+	union fw_scsi_read_priv {
+		struct fcoe_read_priv {
+			u8   ctl_pri;
+			u8   cp_en_class;
+			u8   r3_lo[2];
+		} fcoe;
+		struct iscsi_read_priv {
+			u8   r3[4];
+		} iscsi;
+	} u;
+	__be32 xfer_cnt;
+	__be32 ini_xfer_cnt;
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r4;
+};
+
+#define FW_SCSI_READ_WR_IMMDLEN(x)	((x) << 0)
+
+struct fw_scsi_cmd_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     r3;
+	union fw_scsi_cmd_priv {
+		struct fcoe_cmd_priv {
+			u8   ctl_pri;
+			u8   cp_en_class;
+			u8   r4_lo[2];
+		} fcoe;
+		struct iscsi_cmd_priv {
+			u8   r4[4];
+		} iscsi;
+	} u;
+	u8     r5[8];
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r6;
+};
+
+#define FW_SCSI_CMD_WR_IMMDLEN(x)	((x) << 0)
+
+#define SCSI_ABORT 0
+#define SCSI_CLOSE 1
+
+struct fw_scsi_abrt_cls_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     sub_opcode_to_chk_all_io;
+	u8     r3[4];
+	__be64 t_cookie;
+};
+
+#define FW_SCSI_ABRT_CLS_WR_SUB_OPCODE(x)	((x) << 2)
+#define FW_SCSI_ABRT_CLS_WR_SUB_OPCODE_GET(x)	(((x) >> 2) & 0x3f)
+#define FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO(x)	((x) << 0)
+
+enum fw_cmd_stor_opcodes {
+	FW_FCOE_RES_INFO_CMD           = 0x31,
+	FW_FCOE_LINK_CMD               = 0x32,
+	FW_FCOE_VNP_CMD                = 0x33,
+	FW_FCOE_SPARAMS_CMD            = 0x35,
+	FW_FCOE_STATS_CMD              = 0x37,
+	FW_FCOE_FCF_CMD                = 0x38,
+};
+
+struct fw_fcoe_res_info_cmd {
+	__be32 op_to_read;
+	__be32 retval_len16;
+	__be16 e_d_tov;
+	__be16 r_a_tov_seq;
+	__be16 r_a_tov_els;
+	__be16 r_r_tov;
+	__be32 max_xchgs;
+	__be32 max_ssns;
+	__be32 used_xchgs;
+	__be32 used_ssns;
+	__be32 max_fcfs;
+	__be32 max_vnps;
+	__be32 used_fcfs;
+	__be32 used_vnps;
+};
+
+struct fw_fcoe_link_cmd {
+	__be32 op_to_portid;
+	__be32 retval_len16;
+	__be32 sub_opcode_fcfi;
+	u8     r3;
+	u8     lstatus;
+	__be16 flags;
+	u8     r4;
+	u8     set_vlan;
+	__be16 vlan_id;
+	__be32 vnpi_pkd;
+	__be16 r6;
+	u8     phy_mac[6];
+	u8     vnport_wwnn[8];
+	u8     vnport_wwpn[8];
+};
+
+#define FW_FCOE_LINK_CMD_PORTID(x)	((x) << 0)
+#define FW_FCOE_LINK_CMD_PORTID_GET(x)	(((x) >> 0) & 0xf)
+#define FW_FCOE_LINK_CMD_SUB_OPCODE(x)  ((x) << 24U)
+#define FW_FCOE_LINK_CMD_FCFI(x)	((x) << 0)
+#define FW_FCOE_LINK_CMD_FCFI_GET(x)	(((x) >> 0) & 0xffffff)
+#define FW_FCOE_LINK_CMD_VNPI_GET(x)	(((x) >> 0) & 0xfffff)
+
+struct fw_fcoe_vnp_cmd {
+	__be32 op_to_fcfi;
+	__be32 alloc_to_len16;
+	__be32 gen_wwn_to_vnpi;
+	__be32 vf_id;
+	__be16 iqid;
+	u8   vnport_mac[6];
+	u8   vnport_wwnn[8];
+	u8   vnport_wwpn[8];
+	u8   cmn_srv_parms[16];
+	u8   clsp_word_0_1[8];
+};
+
+#define FW_FCOE_VNP_CMD_FCFI(x)		((x) << 0)
+#define FW_FCOE_VNP_CMD_ALLOC		(1U << 31)
+#define FW_FCOE_VNP_CMD_FREE		(1U << 30)
+#define FW_FCOE_VNP_CMD_MODIFY		(1U << 29)
+#define FW_FCOE_VNP_CMD_GEN_WWN		(1U << 22)
+#define FW_FCOE_VNP_CMD_VFID_EN		(1U << 20)
+#define FW_FCOE_VNP_CMD_VNPI(x)		((x) << 0)
+#define FW_FCOE_VNP_CMD_VNPI_GET(x)	(((x) >> 0) & 0xfffff)
+
+struct fw_fcoe_sparams_cmd {
+	__be32 op_to_portid;
+	__be32 retval_len16;
+	u8     r3[7];
+	u8     cos;
+	u8     lport_wwnn[8];
+	u8     lport_wwpn[8];
+	u8     cmn_srv_parms[16];
+	u8     cls_srv_parms[16];
+};
+
+#define FW_FCOE_SPARAMS_CMD_PORTID(x)	((x) << 0)
+
+struct fw_fcoe_stats_cmd {
+	__be32 op_to_flowid;
+	__be32 free_to_len16;
+	union fw_fcoe_stats {
+		struct fw_fcoe_stats_ctl {
+			u8   nstats_port;
+			u8   port_valid_ix;
+			__be16 r6;
+			__be32 r7;
+			__be64 stat0;
+			__be64 stat1;
+			__be64 stat2;
+			__be64 stat3;
+			__be64 stat4;
+			__be64 stat5;
+		} ctl;
+		struct fw_fcoe_port_stats {
+			__be64 tx_bcast_bytes;
+			__be64 tx_bcast_frames;
+			__be64 tx_mcast_bytes;
+			__be64 tx_mcast_frames;
+			__be64 tx_ucast_bytes;
+			__be64 tx_ucast_frames;
+			__be64 tx_drop_frames;
+			__be64 tx_offload_bytes;
+			__be64 tx_offload_frames;
+			__be64 rx_bcast_bytes;
+			__be64 rx_bcast_frames;
+			__be64 rx_mcast_bytes;
+			__be64 rx_mcast_frames;
+			__be64 rx_ucast_bytes;
+			__be64 rx_ucast_frames;
+			__be64 rx_err_frames;
+		} port_stats;
+		struct fw_fcoe_fcf_stats {
+			__be32 fip_tx_bytes;
+			__be32 fip_tx_fr;
+			__be64 fcf_ka;
+			__be64 mcast_adv_rcvd;
+			__be16 ucast_adv_rcvd;
+			__be16 sol_sent;
+			__be16 vlan_req;
+			__be16 vlan_rpl;
+			__be16 clr_vlink;
+			__be16 link_down;
+			__be16 link_up;
+			__be16 logo;
+			__be16 flogi_req;
+			__be16 flogi_rpl;
+			__be16 fdisc_req;
+			__be16 fdisc_rpl;
+			__be16 fka_prd_chg;
+			__be16 fc_map_chg;
+			__be16 vfid_chg;
+			u8   no_fka_req;
+			u8   no_vnp;
+		} fcf_stats;
+		struct fw_fcoe_pcb_stats {
+			__be64 tx_bytes;
+			__be64 tx_frames;
+			__be64 rx_bytes;
+			__be64 rx_frames;
+			__be32 vnp_ka;
+			__be32 unsol_els_rcvd;
+			__be64 unsol_cmd_rcvd;
+			__be16 implicit_logo;
+			__be16 flogi_inv_sparm;
+			__be16 fdisc_inv_sparm;
+			__be16 flogi_rjt;
+			__be16 fdisc_rjt;
+			__be16 no_ssn;
+			__be16 mac_flt_fail;
+			__be16 inv_fr_rcvd;
+		} pcb_stats;
+		struct fw_fcoe_scb_stats {
+			__be64 tx_bytes;
+			__be64 tx_frames;
+			__be64 rx_bytes;
+			__be64 rx_frames;
+			__be32 host_abrt_req;
+			__be32 adap_auto_abrt;
+			__be32 adap_abrt_rsp;
+			__be32 host_ios_req;
+			__be16 ssn_offl_ios;
+			__be16 ssn_not_rdy_ios;
+			u8   rx_data_ddp_err;
+			u8   ddp_flt_set_err;
+			__be16 rx_data_fr_err;
+			u8   bad_st_abrt_req;
+			u8   no_io_abrt_req;
+			u8   abort_tmo;
+			u8   abort_tmo_2;
+			__be32 abort_req;
+			u8   no_ppod_res_tmo;
+			u8   bp_tmo;
+			u8   adap_auto_cls;
+			u8   no_io_cls_req;
+			__be32 host_cls_req;
+			__be64 unsol_cmd_rcvd;
+			__be32 plogi_req_rcvd;
+			__be32 prli_req_rcvd;
+			__be16 logo_req_rcvd;
+			__be16 prlo_req_rcvd;
+			__be16 plogi_rjt_rcvd;
+			__be16 prli_rjt_rcvd;
+			__be32 adisc_req_rcvd;
+			__be32 rscn_rcvd;
+			__be32 rrq_req_rcvd;
+			__be32 unsol_els_rcvd;
+			u8   adisc_rjt_rcvd;
+			u8   scr_rjt;
+			u8   ct_rjt;
+			u8   inval_bls_rcvd;
+			__be32 ba_rjt_rcvd;
+		} scb_stats;
+	} u;
+};
+
+#define FW_FCOE_STATS_CMD_FLOWID(x)	((x) << 0)
+#define FW_FCOE_STATS_CMD_FREE		(1U << 30)
+#define FW_FCOE_STATS_CMD_NSTATS(x)	((x) << 4)
+#define FW_FCOE_STATS_CMD_PORT(x)	((x) << 0)
+#define FW_FCOE_STATS_CMD_PORT_VALID	(1U << 7)
+#define FW_FCOE_STATS_CMD_IX(x)		((x) << 0)
+
+struct fw_fcoe_fcf_cmd {
+	__be32 op_to_fcfi;
+	__be32 retval_len16;
+	__be16 priority_pkd;
+	u8     mac[6];
+	u8     name_id[8];
+	u8     fabric[8];
+	__be16 vf_id;
+	__be16 max_fcoe_size;
+	u8     vlan_id;
+	u8     fc_map[3];
+	__be32 fka_adv;
+	__be32 r6;
+	u8     r7_hi;
+	u8     fpma_to_portid;
+	u8     spma_mac[6];
+	__be64 r8;
+};
+
+#define FW_FCOE_FCF_CMD_FCFI(x)		((x) << 0)
+#define FW_FCOE_FCF_CMD_FCFI_GET(x)	(((x) >> 0) & 0xfffff)
+#define FW_FCOE_FCF_CMD_PRIORITY_GET(x)	(((x) >> 0) & 0xff)
+#define FW_FCOE_FCF_CMD_FPMA_GET(x)	(((x) >> 6) & 0x1)
+#define FW_FCOE_FCF_CMD_SPMA_GET(x)	(((x) >> 5) & 0x1)
+#define FW_FCOE_FCF_CMD_LOGIN_GET(x)	(((x) >> 4) & 0x1)
+#define FW_FCOE_FCF_CMD_PORTID_GET(x)	(((x) >> 0) & 0xf)
+
+#endif /* _T4FW_API_STOR_H_ */
-- 
1.7.1

^ permalink raw reply related

* [V2 PATCH 5/9] csiostor: Chelsio FCoE offload driver submission (sources part 5).
From: Naresh Kumar Inna @ 2012-09-05 12:33 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1346848442-4573-1-git-send-email-naresh@chelsio.com>

This patch contains code to implement the interrupt handling and the fast
path I/O functionality. The interrupt handling includes allocation of
MSIX vectors, registering and implemeting the interrupt service routines.
The fast path I/O functionality includes posting the I/O request to firmware
via Work Requests, tracking/completing them, and handling task management
requests. SCSI midlayer host template implementation is also covered by
this patch.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
V2:
- Inlined code instead of macro in csio_isr.c
- Use true/false instead if CSIO_TRUE/CSIO_FALSE.
- Use DMA_ defines instead of CSIO_IOREQF_DMA_ defines.
- Replaced large local stack buffer with pre-allocated memory.
- Replaced fast path macros with static functions.
- Removed extra empty lines, needless comment.
	    
 drivers/scsi/csiostor/csio_isr.c  |  624 +++++++++
 drivers/scsi/csiostor/csio_scsi.c | 2561 +++++++++++++++++++++++++++++++++++++
 2 files changed, 3185 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_isr.c
 create mode 100644 drivers/scsi/csiostor/csio_scsi.c

diff --git a/drivers/scsi/csiostor/csio_isr.c b/drivers/scsi/csiostor/csio_isr.c
new file mode 100644
index 0000000..7ee9777
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_isr.c
@@ -0,0 +1,624 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/cpumask.h>
+#include <linux/string.h>
+
+#include "csio_init.h"
+#include "csio_hw.h"
+
+static irqreturn_t
+csio_nondata_isr(int irq, void *dev_id)
+{
+	struct csio_hw *hw = (struct csio_hw *) dev_id;
+	int rv;
+	unsigned long flags;
+
+	if (unlikely(!hw))
+		return IRQ_NONE;
+
+	if (unlikely(pci_channel_offline(hw->pdev))) {
+		CSIO_INC_STATS(hw, n_pcich_offline);
+		return IRQ_NONE;
+	}
+
+	spin_lock_irqsave(&hw->lock, flags);
+	csio_hw_slow_intr_handler(hw);
+	rv = csio_mb_isr_handler(hw);
+
+	if (rv == 0 && !(hw->flags & CSIO_HWF_FWEVT_PENDING)) {
+		hw->flags |= CSIO_HWF_FWEVT_PENDING;
+		spin_unlock_irqrestore(&hw->lock, flags);
+		schedule_work(&hw->evtq_work);
+		return IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&hw->lock, flags);
+	return IRQ_HANDLED;
+}
+
+/*
+ * csio_fwevt_handler - Common FW event handler routine.
+ * @hw: HW module.
+ *
+ * This is the ISR for FW events. It is shared b/w MSIX
+ * and INTx handlers.
+ */
+static void
+csio_fwevt_handler(struct csio_hw *hw)
+{
+	int rv;
+	unsigned long flags;
+
+	rv = csio_fwevtq_handler(hw);
+
+	spin_lock_irqsave(&hw->lock, flags);
+	if (rv == 0 && !(hw->flags & CSIO_HWF_FWEVT_PENDING)) {
+		hw->flags |= CSIO_HWF_FWEVT_PENDING;
+		spin_unlock_irqrestore(&hw->lock, flags);
+		schedule_work(&hw->evtq_work);
+		return;
+	}
+	spin_unlock_irqrestore(&hw->lock, flags);
+
+} /* csio_fwevt_handler */
+
+/*
+ * csio_fwevt_isr() - FW events MSIX ISR
+ * @irq:
+ * @dev_id:
+ *
+ * Process WRs on the FW event queue.
+ *
+ */
+static irqreturn_t
+csio_fwevt_isr(int irq, void *dev_id)
+{
+	struct csio_hw *hw = (struct csio_hw *) dev_id;
+
+	if (unlikely(!hw))
+		return IRQ_NONE;
+
+	if (unlikely(pci_channel_offline(hw->pdev))) {
+		CSIO_INC_STATS(hw, n_pcich_offline);
+		return IRQ_NONE;
+	}
+
+	csio_fwevt_handler(hw);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * csio_fwevt_isr() - INTx wrapper for handling FW events.
+ * @irq:
+ * @dev_id:
+ */
+void
+csio_fwevt_intx_handler(struct csio_hw *hw, void *wr, uint32_t len,
+			   struct csio_fl_dma_buf *flb, void *priv)
+{
+	csio_fwevt_handler(hw);
+} /* csio_fwevt_intx_handler */
+
+/*
+ * csio_process_scsi_cmpl - Process a SCSI WR completion.
+ * @hw: HW module.
+ * @wr: The completed WR from the ingress queue.
+ * @len: Length of the WR.
+ * @flb: Freelist buffer array.
+ *
+ */
+static void
+csio_process_scsi_cmpl(struct csio_hw *hw, void *wr, uint32_t len,
+			struct csio_fl_dma_buf *flb, void *cbfn_q)
+{
+	struct csio_ioreq *ioreq;
+	uint8_t *scsiwr;
+	uint8_t subop;
+	void *cmnd;
+	unsigned long flags;
+
+	ioreq = csio_scsi_cmpl_handler(hw, wr, len, flb, NULL, &scsiwr);
+	if (likely(ioreq)) {
+		if (unlikely(*scsiwr == FW_SCSI_ABRT_CLS_WR)) {
+			subop = FW_SCSI_ABRT_CLS_WR_SUB_OPCODE_GET(
+					((struct fw_scsi_abrt_cls_wr *)
+					    scsiwr)->sub_opcode_to_chk_all_io);
+
+			csio_dbg(hw, "%s cmpl recvd ioreq:%p status:%d\n",
+				    subop ? "Close" : "Abort",
+				    ioreq, ioreq->wr_status);
+
+			spin_lock_irqsave(&hw->lock, flags);
+			if (subop)
+				csio_scsi_closed(ioreq,
+						 (struct list_head *)cbfn_q);
+			else
+				csio_scsi_aborted(ioreq,
+						  (struct list_head *)cbfn_q);
+			/*
+			 * We call scsi_done for I/Os that driver thinks aborts
+			 * have timed out. If there is a race caused by FW
+			 * completing abort at the exact same time that the
+			 * driver has deteced the abort timeout, the following
+			 * check prevents calling of scsi_done twice for the
+			 * same command: once from the eh_abort_handler, another
+			 * from csio_scsi_isr_handler(). This also avoids the
+			 * need to check if csio_scsi_cmnd(req) is NULL in the
+			 * fast path.
+			 */
+			cmnd = csio_scsi_cmnd(ioreq);
+			if (unlikely(cmnd == NULL))
+				list_del_init(&ioreq->sm.sm_list);
+
+			spin_unlock_irqrestore(&hw->lock, flags);
+
+			if (unlikely(cmnd == NULL))
+				csio_put_scsi_ioreq_lock(hw,
+						csio_hw_to_scsim(hw), ioreq);
+		} else {
+			spin_lock_irqsave(&hw->lock, flags);
+			csio_scsi_completed(ioreq, (struct list_head *)cbfn_q);
+			spin_unlock_irqrestore(&hw->lock, flags);
+		}
+	}
+}
+
+/*
+ * csio_scsi_isr_handler() - Common SCSI ISR handler.
+ * @iq: Ingress queue pointer.
+ *
+ * Processes SCSI completions on the SCSI IQ indicated by scm->iq_idx
+ * by calling csio_wr_process_iq_idx. If there are completions on the
+ * isr_cbfn_q, yank them out into a local queue and call their io_cbfns.
+ * Once done, add these completions onto the freelist.
+ * This routine is shared b/w MSIX and INTx.
+ */
+static inline irqreturn_t
+csio_scsi_isr_handler(struct csio_q *iq)
+{
+	struct csio_hw *hw = (struct csio_hw *)iq->owner;
+	LIST_HEAD(cbfn_q);
+	struct list_head *tmp;
+	struct csio_scsim *scm;
+	struct csio_ioreq *ioreq;
+	int isr_completions = 0;
+
+	scm = csio_hw_to_scsim(hw);
+
+	if (unlikely(csio_wr_process_iq(hw, iq, csio_process_scsi_cmpl,
+					&cbfn_q) != 0))
+		return IRQ_NONE;
+
+	/* Call back the completion routines */
+	list_for_each(tmp, &cbfn_q) {
+		ioreq = (struct csio_ioreq *)tmp;
+		isr_completions++;
+		ioreq->io_cbfn(hw, ioreq);
+		/* Release ddp buffer if used for this req */
+		if (unlikely(ioreq->dcopy))
+			csio_put_scsi_ddp_list_lock(hw, scm, &ioreq->gen_list,
+						    ioreq->nsge);
+	}
+
+	if (isr_completions) {
+		/* Return the ioreqs back to ioreq->freelist */
+		csio_put_scsi_ioreq_list_lock(hw, scm, &cbfn_q,
+					      isr_completions);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * csio_scsi_isr() - SCSI MSIX handler
+ * @irq:
+ * @dev_id:
+ *
+ * This is the top level SCSI MSIX handler. Calls csio_scsi_isr_handler()
+ * for handling SCSI completions.
+ */
+static irqreturn_t
+csio_scsi_isr(int irq, void *dev_id)
+{
+	struct csio_q *iq = (struct csio_q *) dev_id;
+	struct csio_hw *hw;
+
+	if (unlikely(!iq))
+		return IRQ_NONE;
+
+	hw = (struct csio_hw *)iq->owner;
+
+	if (unlikely(pci_channel_offline(hw->pdev))) {
+		CSIO_INC_STATS(hw, n_pcich_offline);
+		return IRQ_NONE;
+	}
+
+	csio_scsi_isr_handler(iq);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * csio_scsi_intx_handler() - SCSI INTx handler
+ * @irq:
+ * @dev_id:
+ *
+ * This is the top level SCSI INTx handler. Calls csio_scsi_isr_handler()
+ * for handling SCSI completions.
+ */
+void
+csio_scsi_intx_handler(struct csio_hw *hw, void *wr, uint32_t len,
+			struct csio_fl_dma_buf *flb, void *priv)
+{
+	struct csio_q *iq = priv;
+
+	csio_scsi_isr_handler(iq);
+
+} /* csio_scsi_intx_handler */
+
+/*
+ * csio_fcoe_isr() - INTx/MSI interrupt service routine for FCoE.
+ * @irq:
+ * @dev_id:
+ *
+ *
+ */
+static irqreturn_t
+csio_fcoe_isr(int irq, void *dev_id)
+{
+	struct csio_hw *hw = (struct csio_hw *) dev_id;
+	struct csio_q *intx_q = NULL;
+	int rv;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned long flags;
+
+	if (unlikely(!hw))
+		return IRQ_NONE;
+
+	if (unlikely(pci_channel_offline(hw->pdev))) {
+		CSIO_INC_STATS(hw, n_pcich_offline);
+		return IRQ_NONE;
+	}
+
+	/* Disable the interrupt for this PCI function. */
+	if (hw->intr_mode == CSIO_IM_INTX)
+		csio_wr_reg32(hw, 0, MYPF_REG(PCIE_PF_CLI));
+
+	/*
+	 * The read in the following function will flush the
+	 * above write.
+	 */
+	if (csio_hw_slow_intr_handler(hw))
+		ret = IRQ_HANDLED;
+
+	/* Get the INTx Forward interrupt IQ. */
+	intx_q = csio_get_q(hw, hw->intr_iq_idx);
+
+	CSIO_DB_ASSERT(intx_q);
+
+	/* IQ handler is not possible for intx_q, hence pass in NULL */
+	if (likely(csio_wr_process_iq(hw, intx_q, NULL, NULL) == 0))
+		ret = IRQ_HANDLED;
+
+	spin_lock_irqsave(&hw->lock, flags);
+	rv = csio_mb_isr_handler(hw);
+	if (rv == 0 && !(hw->flags & CSIO_HWF_FWEVT_PENDING)) {
+		hw->flags |= CSIO_HWF_FWEVT_PENDING;
+		spin_unlock_irqrestore(&hw->lock, flags);
+		schedule_work(&hw->evtq_work);
+		return IRQ_HANDLED;
+	}
+	spin_unlock_irqrestore(&hw->lock, flags);
+
+	return ret;
+}
+
+static void
+csio_add_msix_desc(struct csio_hw *hw)
+{
+	int i;
+	struct csio_msix_entries *entryp = &hw->msix_entries[0];
+	int k = CSIO_EXTRA_VECS;
+	int len = sizeof(entryp->desc) - 1;
+	int cnt = hw->num_sqsets + k;
+
+	/* Non-data vector */
+	memset(entryp->desc, 0, len + 1);
+	snprintf(entryp->desc, len, "csio-%02x:%02x:%x-nondata",
+		 CSIO_PCI_BUS(hw), CSIO_PCI_DEV(hw), CSIO_PCI_FUNC(hw));
+
+	entryp++;
+	memset(entryp->desc, 0, len + 1);
+	snprintf(entryp->desc, len, "csio-%02x:%02x:%x-fwevt",
+		 CSIO_PCI_BUS(hw), CSIO_PCI_DEV(hw), CSIO_PCI_FUNC(hw));
+	entryp++;
+
+	/* Name SCSI vecs */
+	for (i = k; i < cnt; i++, entryp++) {
+		memset(entryp->desc, 0, len + 1);
+		snprintf(entryp->desc, len, "csio-%02x:%02x:%x-scsi%d",
+			 CSIO_PCI_BUS(hw), CSIO_PCI_DEV(hw),
+			 CSIO_PCI_FUNC(hw), i - CSIO_EXTRA_VECS);
+	}
+}
+
+int
+csio_request_irqs(struct csio_hw *hw)
+{
+	int rv, i, j, k = 0;
+	struct csio_msix_entries *entryp = &hw->msix_entries[0];
+	struct csio_scsi_cpu_info *info;
+
+	if (hw->intr_mode != CSIO_IM_MSIX) {
+		rv = request_irq(hw->pdev->irq, csio_fcoe_isr,
+					(hw->intr_mode == CSIO_IM_MSI) ?
+							0 : IRQF_SHARED,
+					KBUILD_MODNAME, hw);
+		if (rv) {
+			if (hw->intr_mode == CSIO_IM_MSI)
+				pci_disable_msi(hw->pdev);
+			csio_err(hw, "Failed to allocate interrupt line.\n");
+			return -EINVAL;
+		}
+
+		goto out;
+	}
+
+	/* Add the MSIX vector descriptions */
+	csio_add_msix_desc(hw);
+
+	rv = request_irq(entryp[k].vector, csio_nondata_isr, 0,
+			 entryp[k].desc, hw);
+	if (rv) {
+		csio_err(hw, "IRQ request failed for vec %d err:%d\n",
+			 entryp[k].vector, rv);
+		goto err;
+	}
+
+	entryp[k++].dev_id = (void *)hw;
+
+	rv = request_irq(entryp[k].vector, csio_fwevt_isr, 0,
+			 entryp[k].desc, hw);
+	if (rv) {
+		csio_err(hw, "IRQ request failed for vec %d err:%d\n",
+			 entryp[k].vector, rv);
+		goto err;
+	}
+
+	entryp[k++].dev_id = (void *)hw;
+
+	/* Allocate IRQs for SCSI */
+	for (i = 0; i < hw->num_pports; i++) {
+		info = &hw->scsi_cpu_info[i];
+		for (j = 0; j < info->max_cpus; j++, k++) {
+			struct csio_scsi_qset *sqset = &hw->sqset[i][j];
+			struct csio_q *q = hw->wrm.q_arr[sqset->iq_idx];
+
+			rv = request_irq(entryp[k].vector, csio_scsi_isr, 0,
+					 entryp[k].desc, q);
+			if (rv) {
+				csio_err(hw,
+				       "IRQ request failed for vec %d err:%d\n",
+				       entryp[k].vector, rv);
+				goto err;
+			}
+
+			entryp[k].dev_id = (void *)q;
+
+		} /* for all scsi cpus */
+	} /* for all ports */
+
+out:
+	hw->flags |= CSIO_HWF_HOST_INTR_ENABLED;
+
+	return 0;
+
+err:
+	for (i = 0; i < k; i++) {
+		entryp = &hw->msix_entries[i];
+		free_irq(entryp->vector, entryp->dev_id);
+	}
+	pci_disable_msix(hw->pdev);
+
+	return -EINVAL;
+}
+
+static void
+csio_disable_msix(struct csio_hw *hw, bool free)
+{
+	int i;
+	struct csio_msix_entries *entryp;
+	int cnt = hw->num_sqsets + CSIO_EXTRA_VECS;
+
+	if (free) {
+		for (i = 0; i < cnt; i++) {
+			entryp = &hw->msix_entries[i];
+			free_irq(entryp->vector, entryp->dev_id);
+		}
+	}
+	pci_disable_msix(hw->pdev);
+}
+
+/* Reduce per-port max possible CPUs */
+static void
+csio_reduce_sqsets(struct csio_hw *hw, int cnt)
+{
+	int i;
+	struct csio_scsi_cpu_info *info;
+
+	while (cnt < hw->num_sqsets) {
+		for (i = 0; i < hw->num_pports; i++) {
+			info = &hw->scsi_cpu_info[i];
+			if (info->max_cpus > 1) {
+				info->max_cpus--;
+				hw->num_sqsets--;
+				if (hw->num_sqsets <= cnt)
+					break;
+			}
+		}
+	}
+
+	csio_dbg(hw, "Reduced sqsets to %d\n", hw->num_sqsets);
+}
+
+static int
+csio_enable_msix(struct csio_hw *hw)
+{
+	int rv, i, j, k, n, min, cnt;
+	struct csio_msix_entries *entryp;
+	struct msix_entry *entries;
+	int extra = CSIO_EXTRA_VECS;
+	struct csio_scsi_cpu_info *info;
+
+	min = hw->num_pports + extra;
+	cnt = hw->num_sqsets + extra;
+
+	/* Max vectors required based on #niqs configured in fw */
+	if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS || !csio_is_hw_master(hw))
+		cnt = min_t(uint8_t, hw->cfg_niq, cnt);
+
+	entries = kzalloc(sizeof(struct msix_entry) * cnt, GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < cnt; i++)
+		entries[i].entry = (uint16_t)i;
+
+	csio_dbg(hw, "FW supp #niq:%d, trying %d msix's\n", hw->cfg_niq, cnt);
+
+	while ((rv = pci_enable_msix(hw->pdev, entries, cnt)) >= min)
+		cnt = rv;
+	if (!rv) {
+		if (cnt < (hw->num_sqsets + extra)) {
+			csio_dbg(hw, "Reducing sqsets to %d\n", cnt - extra);
+			csio_reduce_sqsets(hw, cnt - extra);
+		}
+	} else {
+		if (rv > 0) {
+			pci_disable_msix(hw->pdev);
+			csio_info(hw, "Not using MSI-X, remainder:%d\n", rv);
+		}
+
+		kfree(entries);
+		return -ENOMEM;
+	}
+
+	/* Save off vectors */
+	for (i = 0; i < cnt; i++) {
+		entryp = &hw->msix_entries[i];
+		entryp->vector = entries[i].vector;
+	}
+
+	/* Distribute vectors */
+	k = 0;
+	csio_set_nondata_intr_idx(hw, entries[k].entry);
+	csio_set_mb_intr_idx(csio_hw_to_mbm(hw), entries[k++].entry);
+	csio_set_fwevt_intr_idx(hw, entries[k++].entry);
+
+	for (i = 0; i < hw->num_pports; i++) {
+		info = &hw->scsi_cpu_info[i];
+
+		for (j = 0; j < hw->num_scsi_msix_cpus; j++) {
+			n = (j % info->max_cpus) +  k;
+			hw->sqset[i][j].intr_idx = entries[n].entry;
+		}
+
+		k += info->max_cpus;
+	}
+
+	kfree(entries);
+	return 0;
+}
+
+void
+csio_intr_enable(struct csio_hw *hw)
+{
+	hw->intr_mode = CSIO_IM_NONE;
+	hw->flags &= ~CSIO_HWF_HOST_INTR_ENABLED;
+
+	/* Try MSIX, then MSI or fall back to INTx */
+	if ((csio_msi == 2) && !csio_enable_msix(hw))
+		hw->intr_mode = CSIO_IM_MSIX;
+	else {
+		/* Max iqs required based on #niqs configured in fw */
+		if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS ||
+			!csio_is_hw_master(hw)) {
+			int extra = CSIO_EXTRA_MSI_IQS;
+
+			if (hw->cfg_niq < (hw->num_sqsets + extra)) {
+				csio_dbg(hw, "Reducing sqsets to %d\n",
+					 hw->cfg_niq - extra);
+				csio_reduce_sqsets(hw, hw->cfg_niq - extra);
+			}
+		}
+
+		if ((csio_msi == 1) && !pci_enable_msi(hw->pdev))
+			hw->intr_mode = CSIO_IM_MSI;
+		else
+			hw->intr_mode = CSIO_IM_INTX;
+	}
+
+	csio_dbg(hw, "Using %s interrupt mode.\n",
+		(hw->intr_mode == CSIO_IM_MSIX) ? "MSIX" :
+		((hw->intr_mode == CSIO_IM_MSI) ? "MSI" : "INTx"));
+}
+
+void
+csio_intr_disable(struct csio_hw *hw, bool free)
+{
+	csio_hw_intr_disable(hw);
+
+	switch (hw->intr_mode) {
+	case CSIO_IM_MSIX:
+		csio_disable_msix(hw, free);
+		break;
+	case CSIO_IM_MSI:
+		if (free)
+			free_irq(hw->pdev->irq, hw);
+		pci_disable_msi(hw->pdev);
+		break;
+	case CSIO_IM_INTX:
+		if (free)
+			free_irq(hw->pdev->irq, hw);
+		break;
+	default:
+		break;
+	}
+	hw->intr_mode = CSIO_IM_NONE;
+	hw->flags &= ~CSIO_HWF_HOST_INTR_ENABLED;
+}
diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
new file mode 100644
index 0000000..fe598bf
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -0,0 +1,2561 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/moduleparam.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/ctype.h>
+#include <linux/version.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/export.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+#include <asm/page.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include "csio_hw.h"
+#include "csio_lnode.h"
+#include "csio_rnode.h"
+#include "csio_scsi.h"
+#include "csio_init.h"
+
+int csio_scsi_eqsize = 65536;
+int csio_scsi_iqlen = 128;
+int csio_scsi_ioreqs = 2048;
+uint32_t csio_max_scan_tmo;
+uint32_t csio_delta_scan_tmo = 5;
+int csio_lun_qdepth = 32;
+
+static int csio_ddp_descs = 128;
+
+static int csio_do_abrt_cls(struct csio_hw *,
+				      struct csio_ioreq *, bool);
+
+static void csio_scsis_uninit(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_io_active(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_tm_active(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_aborting(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_closing(struct csio_ioreq *, enum csio_scsi_ev);
+static void csio_scsis_shost_cmpl_await(struct csio_ioreq *, enum csio_scsi_ev);
+
+/*
+ * csio_scsi_match_io - Match an ioreq with the given SCSI level data.
+ * @ioreq: The I/O request
+ * @sld: Level information
+ *
+ * Should be called with lock held.
+ *
+ */
+static bool
+csio_scsi_match_io(struct csio_ioreq *ioreq, struct csio_scsi_level_data *sld)
+{
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(ioreq);
+
+	switch (sld->level) {
+	case CSIO_LEV_LUN:
+		if (scmnd == NULL)
+			return false;
+
+		return ((ioreq->lnode == sld->lnode) &&
+			(ioreq->rnode == sld->rnode) &&
+			((uint64_t)scmnd->device->lun == sld->oslun));
+
+	case CSIO_LEV_RNODE:
+		return ((ioreq->lnode == sld->lnode) &&
+				(ioreq->rnode == sld->rnode));
+	case CSIO_LEV_LNODE:
+		return (ioreq->lnode == sld->lnode);
+	case CSIO_LEV_ALL:
+		return true;
+	default:
+		return false;
+	}
+}
+
+/*
+ * csio_scsi_gather_active_ios - Gather active I/Os based on level
+ * @scm: SCSI module
+ * @sld: Level information
+ * @dest: The queue where these I/Os have to be gathered.
+ *
+ * Should be called with lock held.
+ */
+static void
+csio_scsi_gather_active_ios(struct csio_scsim *scm,
+			    struct csio_scsi_level_data *sld,
+			    struct list_head *dest)
+{
+	struct list_head *tmp, *next;
+
+	if (list_empty(&scm->active_q))
+		return;
+
+	/* Just splice the entire active_q into dest */
+	if (sld->level == CSIO_LEV_ALL) {
+		list_splice_tail_init(&scm->active_q, dest);
+		return;
+	}
+
+	list_for_each_safe(tmp, next, &scm->active_q) {
+		if (csio_scsi_match_io((struct csio_ioreq *)tmp, sld)) {
+			list_del_init(tmp);
+			list_add_tail(tmp, dest);
+		}
+	}
+}
+
+static inline bool
+csio_scsi_itnexus_loss_error(uint16_t error)
+{
+	switch (error) {
+	case FW_ERR_LINK_DOWN:
+	case FW_RDEV_NOT_READY:
+	case FW_ERR_RDEV_LOST:
+	case FW_ERR_RDEV_LOGO:
+	case FW_ERR_RDEV_IMPL_LOGO:
+		return 1;
+	}
+	return 0;
+}
+
+static inline void
+csio_scsi_tag(struct scsi_cmnd *scmnd, uint8_t *tag, uint8_t hq,
+	      uint8_t oq, uint8_t sq)
+{
+	char stag[2];
+
+	if (scsi_populate_tag_msg(scmnd, stag)) {
+		switch (stag[0]) {
+		case HEAD_OF_QUEUE_TAG:
+			*tag = hq;
+			break;
+		case ORDERED_QUEUE_TAG:
+			*tag = oq;
+			break;
+		default:
+			*tag = sq;
+			break;
+		}
+	} else
+		*tag = 0;
+}
+
+/*
+ * csio_scsi_fcp_cmnd - Frame the SCSI FCP command paylod.
+ * @req: IO req structure.
+ * @addr: DMA location to place the payload.
+ *
+ * This routine is shared between FCP_WRITE, FCP_READ and FCP_CMD requests.
+ */
+static inline void
+csio_scsi_fcp_cmnd(struct csio_ioreq *req, void *addr)
+{
+	struct fcp_cmnd *fcp_cmnd = (struct fcp_cmnd *)addr;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+
+	/* Check for Task Management */
+	if (likely(scmnd->SCp.Message == 0)) {
+		int_to_scsilun(scmnd->device->lun, &fcp_cmnd->fc_lun);
+		fcp_cmnd->fc_tm_flags = 0;
+		fcp_cmnd->fc_cmdref = 0;
+		fcp_cmnd->fc_pri_ta = 0;
+
+		memcpy(fcp_cmnd->fc_cdb, scmnd->cmnd, 16);
+		csio_scsi_tag(scmnd, &fcp_cmnd->fc_pri_ta,
+			      FCP_PTA_HEADQ, FCP_PTA_ORDERED, FCP_PTA_SIMPLE);
+		fcp_cmnd->fc_dl = cpu_to_be32(scsi_bufflen(scmnd));
+
+		if (req->nsge)
+			if (req->datadir == DMA_TO_DEVICE)
+				fcp_cmnd->fc_flags = FCP_CFL_WRDATA;
+			else
+				fcp_cmnd->fc_flags = FCP_CFL_RDDATA;
+		else
+			fcp_cmnd->fc_flags = 0;
+	} else {
+		memset(fcp_cmnd, 0, sizeof(*fcp_cmnd));
+		int_to_scsilun(scmnd->device->lun, &fcp_cmnd->fc_lun);
+		fcp_cmnd->fc_tm_flags = (uint8_t)scmnd->SCp.Message;
+	}
+}
+
+/*
+ * csio_scsi_init_cmd_wr - Initialize the SCSI CMD WR.
+ * @req: IO req structure.
+ * @addr: DMA location to place the payload.
+ * @size: Size of WR (including FW WR + immed data + rsp SG entry
+ *
+ * Wrapper for populating fw_scsi_cmd_wr.
+ */
+static inline void
+csio_scsi_init_cmd_wr(struct csio_ioreq *req, void *addr, uint32_t size)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_rnode *rn = req->rnode;
+	struct fw_scsi_cmd_wr *wr = (struct fw_scsi_cmd_wr *)addr;
+	struct csio_dma_buf *dma_buf;
+	uint8_t imm = csio_hw_to_scsim(hw)->proto_cmd_len;
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_SCSI_CMD_WR) |
+					  FW_SCSI_CMD_WR_IMMDLEN(imm));
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(rn->flowid) |
+					    FW_WR_LEN16(
+						CSIO_ROUNDUP(size, 16)));
+
+	wr->cookie = (uintptr_t) req;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(hw, req->iq_idx));
+	wr->tmo_val = (uint8_t) req->tmo;
+	wr->r3 = 0;
+	memset(&wr->r5, 0, 8);
+
+	/* Get RSP DMA buffer */
+	dma_buf = &req->dma_buf;
+
+	/* Prepare RSP SGL */
+	wr->rsp_dmalen = cpu_to_be32(dma_buf->len);
+	wr->rsp_dmaaddr = cpu_to_be64(dma_buf->paddr);
+
+	wr->r6 = 0;
+
+	wr->u.fcoe.ctl_pri = 0;
+	wr->u.fcoe.cp_en_class = 0;
+	wr->u.fcoe.r4_lo[0] = 0;
+	wr->u.fcoe.r4_lo[1] = 0;
+
+	/* Frame a FCP command */
+	csio_scsi_fcp_cmnd(req, (void *)((uintptr_t)addr +
+				    sizeof(struct fw_scsi_cmd_wr)));
+}
+
+#define CSIO_SCSI_CMD_WR_SZ(_imm)					\
+	(sizeof(struct fw_scsi_cmd_wr) +		/* WR size */	\
+	 ALIGN((_imm), 16))				/* Immed data */
+
+#define CSIO_SCSI_CMD_WR_SZ_16(_imm)					\
+			(ALIGN(CSIO_SCSI_CMD_WR_SZ((_imm)), 16))
+
+/*
+ * csio_scsi_cmd - Create a SCSI CMD WR.
+ * @req: IO req structure.
+ *
+ * Gets a WR slot in the ingress queue and initializes it with SCSI CMD WR.
+ *
+ */
+static inline void
+csio_scsi_cmd(struct csio_ioreq *req)
+{
+	struct csio_wr_pair wrp;
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	uint32_t size = CSIO_SCSI_CMD_WR_SZ_16(scsim->proto_cmd_len);
+
+	req->drv_status = csio_wr_get(hw, req->eq_idx, size, &wrp);
+	if (unlikely(req->drv_status != 0))
+		return;
+
+	if (wrp.size1 >= size) {
+		/* Initialize WR in one shot */
+		csio_scsi_init_cmd_wr(req, wrp.addr1, size);
+	} else {
+		uint8_t *tmpwr = csio_q_eq_wrap(hw, req->eq_idx);
+
+		/*
+		 * Make a temporary copy of the WR and write back
+		 * the copy into the WR pair.
+		 */
+		csio_scsi_init_cmd_wr(req, (void *)tmpwr, size);
+		memcpy(wrp.addr1, tmpwr, wrp.size1);
+		memcpy(wrp.addr2, tmpwr + wrp.size1, size - wrp.size1);
+	}
+}
+
+/*
+ * csio_scsi_init_ulptx_dsgl - Fill in a ULP_TX_SC_DSGL
+ * @hw: HW module
+ * @req: IO request
+ * @sgl: ULP TX SGL pointer.
+ *
+ */
+static inline void
+csio_scsi_init_ultptx_dsgl(struct csio_hw *hw, struct csio_ioreq *req,
+			   struct ulptx_sgl *sgl)
+{
+	struct ulptx_sge_pair *sge_pair = NULL;
+	struct scatterlist *sgel;
+	uint32_t i = 0;
+	uint32_t xfer_len;
+	struct list_head *tmp;
+	struct csio_dma_buf *dma_buf;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+
+	sgl->cmd_nsge = htonl(ULPTX_CMD(ULP_TX_SC_DSGL) | ULPTX_MORE |
+				     ULPTX_NSGE(req->nsge));
+	/* Now add the data SGLs */
+	if (likely(!req->dcopy)) {
+		scsi_for_each_sg(scmnd, sgel, req->nsge, i) {
+			if (i == 0) {
+				sgl->addr0 = cpu_to_be64(sg_dma_address(sgel));
+				sgl->len0 = cpu_to_be32(sg_dma_len(sgel));
+				sge_pair = (struct ulptx_sge_pair *)(sgl + 1);
+				continue;
+			}
+			if ((i - 1) & 0x1) {
+				sge_pair->addr[1] = cpu_to_be64(
+							sg_dma_address(sgel));
+				sge_pair->len[1] = cpu_to_be32(
+							sg_dma_len(sgel));
+				sge_pair++;
+			} else {
+				sge_pair->addr[0] = cpu_to_be64(
+							sg_dma_address(sgel));
+				sge_pair->len[0] = cpu_to_be32(
+							sg_dma_len(sgel));
+			}
+		}
+	} else {
+		/* Program sg elements with driver's DDP buffer */
+		xfer_len = scsi_bufflen(scmnd);
+		list_for_each(tmp, &req->gen_list) {
+			dma_buf = (struct csio_dma_buf *)tmp;
+			if (i == 0) {
+				sgl->addr0 = cpu_to_be64(dma_buf->paddr);
+				sgl->len0 = cpu_to_be32(
+						min(xfer_len, dma_buf->len));
+				sge_pair = (struct ulptx_sge_pair *)(sgl + 1);
+			} else if ((i - 1) & 0x1) {
+				sge_pair->addr[1] = cpu_to_be64(dma_buf->paddr);
+				sge_pair->len[1] = cpu_to_be32(
+						min(xfer_len, dma_buf->len));
+				sge_pair++;
+			} else {
+				sge_pair->addr[0] = cpu_to_be64(dma_buf->paddr);
+				sge_pair->len[0] = cpu_to_be32(
+						min(xfer_len, dma_buf->len));
+			}
+			xfer_len -= min(xfer_len, dma_buf->len);
+			i++;
+		}
+	}
+}
+
+/*
+ * csio_scsi_init_read_wr - Initialize the READ SCSI WR.
+ * @req: IO req structure.
+ * @wrp: DMA location to place the payload.
+ * @size: Size of WR (including FW WR + immed data + rsp SG entry + data SGL
+ *
+ * Wrapper for populating fw_scsi_read_wr.
+ */
+static inline void
+csio_scsi_init_read_wr(struct csio_ioreq *req, void *wrp, uint32_t size)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_rnode *rn = req->rnode;
+	struct fw_scsi_read_wr *wr = (struct fw_scsi_read_wr *)wrp;
+	struct ulptx_sgl *sgl;
+	struct csio_dma_buf *dma_buf;
+	uint8_t imm = csio_hw_to_scsim(hw)->proto_cmd_len;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_SCSI_READ_WR) |
+				     FW_SCSI_READ_WR_IMMDLEN(imm));
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(rn->flowid) |
+				       FW_WR_LEN16(CSIO_ROUNDUP(size, 16)));
+	wr->cookie = (uintptr_t)req;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(hw, req->iq_idx));
+	wr->tmo_val = (uint8_t)(req->tmo);
+	wr->use_xfer_cnt = 1;
+	wr->xfer_cnt = cpu_to_be32(scsi_bufflen(scmnd));
+	wr->ini_xfer_cnt = cpu_to_be32(scsi_bufflen(scmnd));
+	/* Get RSP DMA buffer */
+	dma_buf = &req->dma_buf;
+
+	/* Prepare RSP SGL */
+	wr->rsp_dmalen = cpu_to_be32(dma_buf->len);
+	wr->rsp_dmaaddr = cpu_to_be64(dma_buf->paddr);
+
+	wr->r4 = 0;
+
+	wr->u.fcoe.ctl_pri = 0;
+	wr->u.fcoe.cp_en_class = 0;
+	wr->u.fcoe.r3_lo[0] = 0;
+	wr->u.fcoe.r3_lo[1] = 0;
+	csio_scsi_fcp_cmnd(req, (void *)((uintptr_t)wrp +
+					sizeof(struct fw_scsi_read_wr)));
+
+	/* Move WR pointer past command and immediate data */
+	sgl = (struct ulptx_sgl *)((uintptr_t)wrp +
+			      sizeof(struct fw_scsi_read_wr) + ALIGN(imm, 16));
+
+	/* Fill in the DSGL */
+	csio_scsi_init_ultptx_dsgl(hw, req, sgl);
+}
+
+/*
+ * csio_scsi_init_write_wr - Initialize the WRITE SCSI WR.
+ * @req: IO req structure.
+ * @wrp: DMA location to place the payload.
+ * @size: Size of WR (including FW WR + immed data + rsp SG entry + data SGL
+ *
+ * Wrapper for populating fw_scsi_write_wr.
+ */
+static inline void
+csio_scsi_init_write_wr(struct csio_ioreq *req, void *wrp, uint32_t size)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_rnode *rn = req->rnode;
+	struct fw_scsi_write_wr *wr = (struct fw_scsi_write_wr *)wrp;
+	struct ulptx_sgl *sgl;
+	struct csio_dma_buf *dma_buf;
+	uint8_t imm = csio_hw_to_scsim(hw)->proto_cmd_len;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_SCSI_WRITE_WR) |
+				     FW_SCSI_WRITE_WR_IMMDLEN(imm));
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(rn->flowid) |
+				       FW_WR_LEN16(CSIO_ROUNDUP(size, 16)));
+	wr->cookie = (uintptr_t)req;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(hw, req->iq_idx));
+	wr->tmo_val = (uint8_t)(req->tmo);
+	wr->use_xfer_cnt = 1;
+	wr->xfer_cnt = cpu_to_be32(scsi_bufflen(scmnd));
+	wr->ini_xfer_cnt = cpu_to_be32(scsi_bufflen(scmnd));
+	/* Get RSP DMA buffer */
+	dma_buf = &req->dma_buf;
+
+	/* Prepare RSP SGL */
+	wr->rsp_dmalen = cpu_to_be32(dma_buf->len);
+	wr->rsp_dmaaddr = cpu_to_be64(dma_buf->paddr);
+
+	wr->r4 = 0;
+
+	wr->u.fcoe.ctl_pri = 0;
+	wr->u.fcoe.cp_en_class = 0;
+	wr->u.fcoe.r3_lo[0] = 0;
+	wr->u.fcoe.r3_lo[1] = 0;
+	csio_scsi_fcp_cmnd(req, (void *)((uintptr_t)wrp +
+					sizeof(struct fw_scsi_write_wr)));
+
+	/* Move WR pointer past command and immediate data */
+	sgl = (struct ulptx_sgl *)((uintptr_t)wrp +
+			      sizeof(struct fw_scsi_write_wr) + ALIGN(imm, 16));
+
+	/* Fill in the DSGL */
+	csio_scsi_init_ultptx_dsgl(hw, req, sgl);
+}
+
+/* Calculate WR size needed for fw_scsi_read_wr/fw_scsi_write_wr */
+#define CSIO_SCSI_DATA_WRSZ(req, oper, sz, imm)				       \
+do {									       \
+	(sz) = sizeof(struct fw_scsi_##oper##_wr) +	/* WR size */          \
+	       ALIGN((imm), 16) +			/* Immed data */       \
+	       sizeof(struct ulptx_sgl);		/* ulptx_sgl */	       \
+									       \
+	if (unlikely((req)->nsge > 1))				               \
+		(sz) += (sizeof(struct ulptx_sge_pair) *		       \
+				(ALIGN(((req)->nsge - 1), 2) / 2));            \
+							/* Data SGE */	       \
+} while (0)
+
+/*
+ * csio_scsi_read - Create a SCSI READ WR.
+ * @req: IO req structure.
+ *
+ * Gets a WR slot in the ingress queue and initializes it with
+ * SCSI READ WR.
+ *
+ */
+static inline void
+csio_scsi_read(struct csio_ioreq *req)
+{
+	struct csio_wr_pair wrp;
+	uint32_t size;
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+
+	CSIO_SCSI_DATA_WRSZ(req, read, size, scsim->proto_cmd_len);
+	size = ALIGN(size, 16);
+
+	req->drv_status = csio_wr_get(hw, req->eq_idx, size, &wrp);
+	if (likely(req->drv_status == 0)) {
+		if (likely(wrp.size1 >= size)) {
+			/* Initialize WR in one shot */
+			csio_scsi_init_read_wr(req, wrp.addr1, size);
+		} else {
+			uint8_t *tmpwr = csio_q_eq_wrap(hw, req->eq_idx);
+			/*
+			 * Make a temporary copy of the WR and write back
+			 * the copy into the WR pair.
+			 */
+			csio_scsi_init_read_wr(req, (void *)tmpwr, size);
+			memcpy(wrp.addr1, tmpwr, wrp.size1);
+			memcpy(wrp.addr2, tmpwr + wrp.size1, size - wrp.size1);
+		}
+	}
+}
+
+/*
+ * csio_scsi_write - Create a SCSI WRITE WR.
+ * @req: IO req structure.
+ *
+ * Gets a WR slot in the ingress queue and initializes it with
+ * SCSI WRITE WR.
+ *
+ */
+static inline void
+csio_scsi_write(struct csio_ioreq *req)
+{
+	struct csio_wr_pair wrp;
+	uint32_t size;
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+
+	CSIO_SCSI_DATA_WRSZ(req, write, size, scsim->proto_cmd_len);
+	size = ALIGN(size, 16);
+
+	req->drv_status = csio_wr_get(hw, req->eq_idx, size, &wrp);
+	if (likely(req->drv_status == 0)) {
+		if (likely(wrp.size1 >= size)) {
+			/* Initialize WR in one shot */
+			csio_scsi_init_write_wr(req, wrp.addr1, size);
+		} else {
+			uint8_t *tmpwr = csio_q_eq_wrap(hw, req->eq_idx);
+			/*
+			 * Make a temporary copy of the WR and write back
+			 * the copy into the WR pair.
+			 */
+			csio_scsi_init_write_wr(req, (void *)tmpwr, size);
+			memcpy(wrp.addr1, tmpwr, wrp.size1);
+			memcpy(wrp.addr2, tmpwr + wrp.size1, size - wrp.size1);
+		}
+	}
+}
+
+/*
+ * csio_setup_ddp - Setup DDP buffers for Read request.
+ * @req: IO req structure.
+ *
+ * Checks SGLs/Data buffers are virtually contiguous required for DDP.
+ * If contiguous,driver posts SGLs in the WR otherwise post internal
+ * buffers for such request for DDP.
+ */
+static inline void
+csio_setup_ddp(struct csio_scsim *scsim, struct csio_ioreq *req)
+{
+#ifdef __CSIO_DEBUG__
+	struct csio_hw *hw = req->lnode->hwp;
+#endif
+	struct scatterlist *sgel = NULL;
+	struct scsi_cmnd *scmnd = csio_scsi_cmnd(req);
+	uint64_t sg_addr = 0;
+	uint32_t ddp_pagesz = 4096;
+	uint32_t buf_off;
+	struct csio_dma_buf *dma_buf = NULL;
+	uint32_t alloc_len = 0;
+	uint32_t xfer_len = 0;
+	uint32_t sg_len = 0;
+	uint32_t i;
+
+	scsi_for_each_sg(scmnd, sgel, req->nsge, i) {
+		sg_addr = sg_dma_address(sgel);
+		sg_len	= sg_dma_len(sgel);
+
+		buf_off = sg_addr & (ddp_pagesz - 1);
+
+		/* Except 1st buffer,all buffer addr have to be Page aligned */
+		if (i != 0 && buf_off) {
+			csio_dbg(hw, "SGL addr not DDP aligned (%llx:%d)\n",
+				 sg_addr, sg_len);
+			goto unaligned;
+		}
+
+		/* Except last buffer,all buffer must end on page boundary */
+		if ((i != (req->nsge - 1)) &&
+			((buf_off + sg_len) & (ddp_pagesz - 1))) {
+			csio_dbg(hw,
+				 "SGL addr not ending on page boundary"
+				 "(%llx:%d)\n", sg_addr, sg_len);
+			goto unaligned;
+		}
+	}
+
+	/* SGL's are virtually contiguous. HW will DDP to SGLs */
+	req->dcopy = 0;
+	csio_scsi_read(req);
+
+	return;
+
+unaligned:
+	CSIO_INC_STATS(scsim, n_unaligned);
+	/*
+	 * For unaligned SGLs, driver will allocate internal DDP buffer.
+	 * Once command is completed data from DDP buffer copied to SGLs
+	 */
+	req->dcopy = 1;
+
+	/* Use gen_list to store the DDP buffers */
+	INIT_LIST_HEAD(&req->gen_list);
+	xfer_len = scsi_bufflen(scmnd);
+
+	i = 0;
+	/* Allocate ddp buffers for this request */
+	while (alloc_len < xfer_len) {
+		dma_buf = csio_get_scsi_ddp(scsim);
+		if (dma_buf == NULL || i > scsim->max_sge) {
+			req->drv_status = -EBUSY;
+			break;
+		}
+		alloc_len += dma_buf->len;
+		/* Added to IO req */
+		list_add_tail(&dma_buf->list, &req->gen_list);
+		i++;
+	}
+
+	if (!req->drv_status) {
+		/* set number of ddp bufs used */
+		req->nsge = i;
+		csio_scsi_read(req);
+		return;
+	}
+
+	 /* release dma descs */
+	if (i > 0)
+		csio_put_scsi_ddp_list(scsim, &req->gen_list, i);
+}
+
+/*
+ * csio_scsi_init_abrt_cls_wr - Initialize an ABORT/CLOSE WR.
+ * @req: IO req structure.
+ * @addr: DMA location to place the payload.
+ * @size: Size of WR
+ * @abort: abort OR close
+ *
+ * Wrapper for populating fw_scsi_cmd_wr.
+ */
+static inline void
+csio_scsi_init_abrt_cls_wr(struct csio_ioreq *req, void *addr, uint32_t size,
+			   bool abort)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_rnode *rn = req->rnode;
+	struct fw_scsi_abrt_cls_wr *wr = (struct fw_scsi_abrt_cls_wr *)addr;
+
+	wr->op_immdlen = cpu_to_be32(FW_WR_OP(FW_SCSI_ABRT_CLS_WR));
+	wr->flowid_len16 = cpu_to_be32(FW_WR_FLOWID(rn->flowid) |
+					    FW_WR_LEN16(
+						CSIO_ROUNDUP(size, 16)));
+
+	wr->cookie = (uintptr_t) req;
+	wr->iqid = (uint16_t)cpu_to_be16(csio_q_physiqid(hw, req->iq_idx));
+	wr->tmo_val = (uint8_t) req->tmo;
+	/* 0 for CHK_ALL_IO tells FW to look up t_cookie */
+	wr->sub_opcode_to_chk_all_io =
+				(FW_SCSI_ABRT_CLS_WR_SUB_OPCODE(abort) |
+				 FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO(0));
+	wr->r3[0] = 0;
+	wr->r3[1] = 0;
+	wr->r3[2] = 0;
+	wr->r3[3] = 0;
+	/* Since we re-use the same ioreq for abort as well */
+	wr->t_cookie = (uintptr_t) req;
+}
+
+static inline void
+csio_scsi_abrt_cls(struct csio_ioreq *req, bool abort)
+{
+	struct csio_wr_pair wrp;
+	struct csio_hw *hw = req->lnode->hwp;
+	uint32_t size = ALIGN(sizeof(struct fw_scsi_abrt_cls_wr), 16);
+
+	req->drv_status = csio_wr_get(hw, req->eq_idx, size, &wrp);
+	if (req->drv_status != 0)
+		return;
+
+	if (wrp.size1 >= size) {
+		/* Initialize WR in one shot */
+		csio_scsi_init_abrt_cls_wr(req, wrp.addr1, size, abort);
+	} else {
+		uint8_t *tmpwr = csio_q_eq_wrap(hw, req->eq_idx);
+		/*
+		 * Make a temporary copy of the WR and write back
+		 * the copy into the WR pair.
+		 */
+		csio_scsi_init_abrt_cls_wr(req, (void *)tmpwr, size, abort);
+		memcpy(wrp.addr1, tmpwr, wrp.size1);
+		memcpy(wrp.addr2, tmpwr + wrp.size1, size - wrp.size1);
+	}
+}
+
+/*****************************************************************************/
+/* START: SCSI SM                                                            */
+/*****************************************************************************/
+static void
+csio_scsis_uninit(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+
+	switch (evt) {
+	case CSIO_SCSIE_START_IO:
+
+		if (req->nsge) {
+			if (req->datadir == DMA_TO_DEVICE) {
+				req->dcopy = 0;
+				csio_scsi_write(req);
+			} else
+				csio_setup_ddp(scsim, req);
+		} else {
+			csio_scsi_cmd(req);
+		}
+
+		if (likely(req->drv_status == 0)) {
+			/* change state and enqueue on active_q */
+			csio_set_state(&req->sm, csio_scsis_io_active);
+			list_add_tail(&req->sm.sm_list, &scsim->active_q);
+			csio_wr_issue(hw, req->eq_idx, false);
+			CSIO_INC_STATS(scsim, n_active);
+
+			return;
+		}
+		break;
+
+	case CSIO_SCSIE_START_TM:
+		csio_scsi_cmd(req);
+		if (req->drv_status == 0) {
+			/*
+			 * NOTE: We collect the affected I/Os prior to issuing
+			 * LUN reset, and not after it. This is to prevent
+			 * aborting I/Os that get issued after the LUN reset,
+			 * but prior to LUN reset completion (in the event that
+			 * the host stack has not blocked I/Os to a LUN that is
+			 * being reset.
+			 */
+			csio_set_state(&req->sm, csio_scsis_tm_active);
+			list_add_tail(&req->sm.sm_list, &scsim->active_q);
+			csio_wr_issue(hw, req->eq_idx, false);
+			CSIO_INC_STATS(scsim, n_tm_active);
+		}
+		return;
+
+	case CSIO_SCSIE_ABORT:
+	case CSIO_SCSIE_CLOSE:
+		/*
+		 * NOTE:
+		 * We could get here due to  :
+		 * - a window in the cleanup path of the SCSI module
+		 *   (csio_scsi_abort_io()). Please see NOTE in this function.
+		 * - a window in the time we tried to issue an abort/close
+		 *   of a request to FW, and the FW completed the request
+		 *   itself.
+		 *   Print a message for now, and return INVAL either way.
+		 */
+		req->drv_status = -EINVAL;
+		csio_warn(hw, "Trying to abort/close completed IO:%p!\n", req);
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_io_active(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+	struct csio_rnode *rn;
+
+	switch (evt) {
+	case CSIO_SCSIE_COMPLETED:
+		CSIO_DEC_STATS(scm, n_active);
+		list_del_init(&req->sm.sm_list);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		/*
+		 * In MSIX mode, with multiple queues, the SCSI compeltions
+		 * could reach us sooner than the FW events sent to indicate
+		 * I-T nexus loss (link down, remote device logo etc). We
+		 * dont want to be returning such I/Os to the upper layer
+		 * immediately, since we wouldnt have reported the I-T nexus
+		 * loss itself. This forces us to serialize such completions
+		 * with the reporting of the I-T nexus loss. Therefore, we
+		 * internally queue up such up such completions in the rnode.
+		 * The reporting of I-T nexus loss to the upper layer is then
+		 * followed by the returning of I/Os in this internal queue.
+		 * Having another state alongwith another queue helps us take
+		 * actions for events such as ABORT received while we are
+		 * in this rnode queue.
+		 */
+		if (unlikely(req->wr_status != FW_SUCCESS)) {
+			rn = req->rnode;
+			/*
+			 * FW says remote device is lost, but rnode
+			 * doesnt reflect it.
+			 */
+			if (csio_scsi_itnexus_loss_error(req->wr_status) &&
+						csio_is_rnode_ready(rn)) {
+				csio_set_state(&req->sm,
+						csio_scsis_shost_cmpl_await);
+				list_add_tail(&req->sm.sm_list,
+					      &rn->host_cmpl_q);
+			}
+		}
+
+		break;
+
+	case CSIO_SCSIE_ABORT:
+		csio_scsi_abrt_cls(req, SCSI_ABORT);
+		if (req->drv_status == 0) {
+			csio_wr_issue(hw, req->eq_idx, false);
+			csio_set_state(&req->sm, csio_scsis_aborting);
+		}
+		break;
+
+	case CSIO_SCSIE_CLOSE:
+		csio_scsi_abrt_cls(req, SCSI_CLOSE);
+		if (req->drv_status == 0) {
+			csio_wr_issue(hw, req->eq_idx, false);
+			csio_set_state(&req->sm, csio_scsis_closing);
+		}
+		break;
+
+	case CSIO_SCSIE_DRVCLEANUP:
+		req->wr_status = FW_HOSTERROR;
+		CSIO_DEC_STATS(scm, n_active);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_tm_active(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+
+	switch (evt) {
+	case CSIO_SCSIE_COMPLETED:
+		CSIO_DEC_STATS(scm, n_tm_active);
+		list_del_init(&req->sm.sm_list);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+
+		break;
+
+	case CSIO_SCSIE_ABORT:
+		csio_scsi_abrt_cls(req, SCSI_ABORT);
+		if (req->drv_status == 0) {
+			csio_wr_issue(hw, req->eq_idx, false);
+			csio_set_state(&req->sm, csio_scsis_aborting);
+		}
+		break;
+
+
+	case CSIO_SCSIE_CLOSE:
+		csio_scsi_abrt_cls(req, SCSI_CLOSE);
+		if (req->drv_status == 0) {
+			csio_wr_issue(hw, req->eq_idx, false);
+			csio_set_state(&req->sm, csio_scsis_closing);
+		}
+		break;
+
+	case CSIO_SCSIE_DRVCLEANUP:
+		req->wr_status = FW_HOSTERROR;
+		CSIO_DEC_STATS(scm, n_tm_active);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_aborting(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+
+	switch (evt) {
+	case CSIO_SCSIE_COMPLETED:
+		csio_dbg(hw,
+			 "ioreq %p recvd cmpltd (wr_status:%d) "
+			 "in aborting st\n", req, req->wr_status);
+		/*
+		 * Use -ECANCELED to explicitly tell the ABORTED event that
+		 * the original I/O was returned to driver by FW.
+		 * We dont really care if the I/O was returned with success by
+		 * FW (because the ABORT and completion of the I/O crossed each
+		 * other), or any other return value. Once we are in aborting
+		 * state, the success or failure of the I/O is unimportant to
+		 * us.
+		 */
+		req->drv_status = -ECANCELED;
+		break;
+
+	case CSIO_SCSIE_ABORT:
+		CSIO_INC_STATS(scm, n_abrt_dups);
+		break;
+
+	case CSIO_SCSIE_ABORTED:
+
+		csio_dbg(hw, "abort of %p return status:0x%x drv_status:%x\n",
+			 req, req->wr_status, req->drv_status);
+		/*
+		 * Check if original I/O WR completed before the Abort
+		 * completion.
+		 */
+		if (req->drv_status != -ECANCELED) {
+			csio_warn(hw,
+				  "Abort completed before original I/O,"
+				   " req:%p\n", req);
+			CSIO_DB_ASSERT(0);
+		}
+
+		/*
+		 * There are the following possible scenarios:
+		 * 1. The abort completed successfully, FW returned FW_SUCCESS.
+		 * 2. The completion of an I/O and the receipt of
+		 *    abort for that I/O by the FW crossed each other.
+		 *    The FW returned FW_EINVAL. The original I/O would have
+		 *    returned with FW_SUCCESS or any other SCSI error.
+		 * 3. The FW couldnt sent the abort out on the wire, as there
+		 *    was an I-T nexus loss (link down, remote device logged
+		 *    out etc). FW sent back an appropriate IT nexus loss status
+		 *    for the abort.
+		 * 4. FW sent an abort, but abort timed out (remote device
+		 *    didnt respond). FW replied back with
+		 *    FW_SCSI_ABORT_TIMEDOUT.
+		 * 5. FW couldnt genuinely abort the request for some reason,
+		 *    and sent us an error.
+		 *
+		 * The first 3 scenarios are treated as  succesful abort
+		 * operations by the host, while the last 2 are failed attempts
+		 * to abort. Manipulate the return value of the request
+		 * appropriately, so that host can convey these results
+		 * back to the upper layer.
+		 */
+		if ((req->wr_status == FW_SUCCESS) ||
+		    (req->wr_status == FW_EINVAL) ||
+		    csio_scsi_itnexus_loss_error(req->wr_status))
+			req->wr_status = FW_SCSI_ABORT_REQUESTED;
+
+		CSIO_DEC_STATS(scm, n_active);
+		list_del_init(&req->sm.sm_list);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	case CSIO_SCSIE_DRVCLEANUP:
+		req->wr_status = FW_HOSTERROR;
+		CSIO_DEC_STATS(scm, n_active);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	case CSIO_SCSIE_CLOSE:
+		/*
+		 * We can receive this event from the module
+		 * cleanup paths, if the FW forgot to reply to the ABORT WR
+		 * and left this ioreq in this state. For now, just ignore
+		 * the event. The CLOSE event is sent to this state, as
+		 * the LINK may have already gone down.
+		 */
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_closing(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	struct csio_hw *hw = req->lnode->hwp;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+
+	switch (evt) {
+	case CSIO_SCSIE_COMPLETED:
+		csio_dbg(hw,
+			 "ioreq %p recvd cmpltd (wr_status:%d) "
+			 "in closing st\n", req, req->wr_status);
+		/*
+		 * Use -ECANCELED to explicitly tell the CLOSED event that
+		 * the original I/O was returned to driver by FW.
+		 * We dont really care if the I/O was returned with success by
+		 * FW (because the CLOSE and completion of the I/O crossed each
+		 * other), or any other return value. Once we are in aborting
+		 * state, the success or failure of the I/O is unimportant to
+		 * us.
+		 */
+		req->drv_status = -ECANCELED;
+		break;
+
+	case CSIO_SCSIE_CLOSED:
+		/*
+		 * Check if original I/O WR completed before the Close
+		 * completion.
+		 */
+		if (req->drv_status != -ECANCELED) {
+			csio_fatal(hw,
+				   "Close completed before original I/O,"
+				   " req:%p\n", req);
+			CSIO_DB_ASSERT(0);
+		}
+
+		/*
+		 * Either close succeeded, or we issued close to FW at the
+		 * same time FW compelted it to us. Either way, the I/O
+		 * is closed.
+		 */
+		CSIO_DB_ASSERT((req->wr_status == FW_SUCCESS) ||
+					(req->wr_status == FW_EINVAL));
+		req->wr_status = FW_SCSI_CLOSE_REQUESTED;
+
+		CSIO_DEC_STATS(scm, n_active);
+		list_del_init(&req->sm.sm_list);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	case CSIO_SCSIE_CLOSE:
+		break;
+
+	case CSIO_SCSIE_DRVCLEANUP:
+		req->wr_status = FW_HOSTERROR;
+		CSIO_DEC_STATS(scm, n_active);
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+
+	default:
+		csio_dbg(hw, "Unhandled event:%d sent to req:%p\n", evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+static void
+csio_scsis_shost_cmpl_await(struct csio_ioreq *req, enum csio_scsi_ev evt)
+{
+	switch (evt) {
+	case CSIO_SCSIE_ABORT:
+	case CSIO_SCSIE_CLOSE:
+		/*
+		 * Just succeed the abort request, and hope that
+		 * the remote device unregister path will cleanup
+		 * this I/O to the upper layer within a sane
+		 * amount of time.
+		 */
+		/*
+		 * A close can come in during a LINK DOWN. The FW would have
+		 * returned us the I/O back, but not the remote device lost
+		 * FW event. In this interval, if the I/O times out at the upper
+		 * layer, a close can come in. Take the same action as abort:
+		 * return success, and hope that the remote device unregister
+		 * path will cleanup this I/O. If the FW still doesnt send
+		 * the msg, the close times out, and the upper layer resorts
+		 * to the next level of error recovery.
+		 */
+		req->drv_status = 0;
+		break;
+	case CSIO_SCSIE_DRVCLEANUP:
+		csio_set_state(&req->sm, csio_scsis_uninit);
+		break;
+	default:
+		csio_dbg(req->lnode->hwp, "Unhandled event:%d sent to req:%p\n",
+			 evt, req);
+		CSIO_DB_ASSERT(0);
+	}
+}
+
+/*
+ * csio_scsi_cmpl_handler - WR completion handler for SCSI.
+ * @hw: HW module.
+ * @wr: The completed WR from the ingress queue.
+ * @len: Length of the WR.
+ * @flb: Freelist buffer array.
+ * @priv: Private object
+ * @scsiwr: Pointer to SCSI WR.
+ *
+ * This is the WR completion handler called per completion from the
+ * ISR. It is called with lock held. It walks past the RSS and CPL message
+ * header where the actual WR is present.
+ * It then gets the status, WR handle (ioreq pointer) and the len of
+ * the WR, based on WR opcode. Only on a non-good status is the entire
+ * WR copied into the WR cache (ioreq->fw_wr).
+ * The ioreq corresponding to the WR is returned to the caller.
+ * NOTE: The SCSI queue doesnt allocate a freelist today, hence
+ * no freelist buffer is expected.
+ */
+struct csio_ioreq *
+csio_scsi_cmpl_handler(struct csio_hw *hw, void *wr, uint32_t len,
+		     struct csio_fl_dma_buf *flb, void *priv, uint8_t **scsiwr)
+{
+	struct csio_ioreq *ioreq = NULL;
+	struct cpl_fw6_msg *cpl;
+	uint8_t *tempwr;
+	uint8_t	status;
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+
+	/* skip RSS header */
+	cpl = (struct cpl_fw6_msg *)((uintptr_t)wr + sizeof(__be64));
+
+	if (unlikely(cpl->opcode != CPL_FW6_MSG)) {
+		csio_warn(hw, "Error: Invalid CPL msg %x recvd on SCSI q\n",
+			  cpl->opcode);
+		CSIO_INC_STATS(scm, n_inval_cplop);
+		return NULL;
+	}
+
+	tempwr = (uint8_t *)(cpl->data);
+	status = csio_wr_status(tempwr);
+	*scsiwr = tempwr;
+
+	if (likely((*tempwr == FW_SCSI_READ_WR) ||
+			(*tempwr == FW_SCSI_WRITE_WR) ||
+			(*tempwr == FW_SCSI_CMD_WR))) {
+		ioreq = (struct csio_ioreq *)((uintptr_t)
+				 (((struct fw_scsi_read_wr *)tempwr)->cookie));
+		CSIO_DB_ASSERT(virt_addr_valid(ioreq));
+
+		ioreq->wr_status = status;
+
+		return ioreq;
+	}
+
+	if (*tempwr == FW_SCSI_ABRT_CLS_WR) {
+		ioreq = (struct csio_ioreq *)((uintptr_t)
+			 (((struct fw_scsi_abrt_cls_wr *)tempwr)->cookie));
+		CSIO_DB_ASSERT(virt_addr_valid(ioreq));
+
+		ioreq->wr_status = status;
+		return ioreq;
+	}
+
+	csio_warn(hw, "WR with invalid opcode in SCSI IQ: %x\n", *tempwr);
+	CSIO_INC_STATS(scm, n_inval_scsiop);
+	return NULL;
+}
+
+/*
+ * csio_scsi_cleanup_io_q - Cleanup the given queue.
+ * @scm: SCSI module.
+ * @q: Queue to be cleaned up.
+ *
+ * Called with lock held. Has to exit with lock held.
+ */
+void
+csio_scsi_cleanup_io_q(struct csio_scsim *scm, struct list_head *q)
+{
+	struct csio_hw *hw = scm->hw;
+	struct csio_ioreq *ioreq;
+	struct list_head *tmp, *next;
+	struct scsi_cmnd *scmnd;
+
+	/* Call back the completion routines of the active_q */
+	list_for_each_safe(tmp, next, q) {
+		ioreq = (struct csio_ioreq *)tmp;
+		csio_scsi_drvcleanup(ioreq);
+		list_del_init(&ioreq->sm.sm_list);
+		scmnd = csio_scsi_cmnd(ioreq);
+		spin_unlock_irq(&hw->lock);
+
+		/*
+		 * Upper layers may have cleared this command, hence this
+		 * check to avoid accessing stale references.
+		 */
+		if (scmnd != NULL)
+			ioreq->io_cbfn(hw, ioreq);
+
+		spin_lock_irq(&scm->freelist_lock);
+		csio_put_scsi_ioreq(scm, ioreq);
+		spin_unlock_irq(&scm->freelist_lock);
+
+		spin_lock_irq(&hw->lock);
+	}
+}
+
+#define CSIO_SCSI_ABORT_Q_POLL_MS		2000
+
+static void
+csio_abrt_cls(struct csio_ioreq *ioreq, struct scsi_cmnd *scmnd)
+{
+	struct csio_lnode *ln = ioreq->lnode;
+	struct csio_hw *hw = ln->hwp;
+	int ready = 0;
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	int rv;
+
+	if (csio_scsi_cmnd(ioreq) != scmnd) {
+		CSIO_INC_STATS(scsim, n_abrt_race_comp);
+		return;
+	}
+
+	ready = csio_is_lnode_ready(ln);
+
+	rv = csio_do_abrt_cls(hw, ioreq, (ready ? SCSI_ABORT : SCSI_CLOSE));
+	if (rv != 0) {
+		if (ready)
+			CSIO_INC_STATS(scsim, n_abrt_busy_error);
+		else
+			CSIO_INC_STATS(scsim, n_cls_busy_error);
+	}
+}
+
+/*
+ * csio_scsi_abort_io_q - Abort all I/Os on given queue
+ * @scm: SCSI module.
+ * @q: Queue to abort.
+ * @tmo: Timeout in ms
+ *
+ * Attempt to abort all I/Os on given queue, and wait for a max
+ * of tmo milliseconds for them to complete. Returns success
+ * if all I/Os are aborted. Else returns -ETIMEDOUT.
+ * Should be entered with lock held. Exits with lock held.
+ * NOTE:
+ * Lock has to be held across the loop that aborts I/Os, since dropping the lock
+ * in between can cause the list to be corrupted. As a result, the caller
+ * of this function has to ensure that the number of I/os to be aborted
+ * is finite enough to not cause lock-held-for-too-long issues.
+ */
+static int
+csio_scsi_abort_io_q(struct csio_scsim *scm, struct list_head *q, uint32_t tmo)
+{
+	struct csio_hw *hw = scm->hw;
+	struct list_head *tmp, *next;
+	int count = CSIO_ROUNDUP(tmo, CSIO_SCSI_ABORT_Q_POLL_MS);
+	struct scsi_cmnd *scmnd;
+
+	if (list_empty(q))
+		return 0;
+
+	csio_dbg(hw, "Aborting SCSI I/Os\n");
+
+	/* Now abort/close I/Os in the queue passed */
+	list_for_each_safe(tmp, next, q) {
+		scmnd = csio_scsi_cmnd((struct csio_ioreq *)tmp);
+		csio_abrt_cls((struct csio_ioreq *)tmp, scmnd);
+	}
+
+	/* Wait till all active I/Os are completed/aborted/closed */
+	while (!list_empty(q) && count--) {
+		spin_unlock_irq(&hw->lock);
+		msleep(CSIO_SCSI_ABORT_Q_POLL_MS);
+		spin_lock_irq(&hw->lock);
+	}
+
+	/* all aborts completed */
+	if (list_empty(q))
+		return 0;
+
+	return -ETIMEDOUT;
+}
+
+/*
+ * csio_scsim_cleanup_io - Cleanup all I/Os in SCSI module.
+ * @scm: SCSI module.
+ * @abort: abort required.
+ * Called with lock held, should exit with lock held.
+ * Can sleep when waiting for I/Os to complete.
+ */
+int
+csio_scsim_cleanup_io(struct csio_scsim *scm, bool abort)
+{
+	struct csio_hw *hw = scm->hw;
+	int rv = 0;
+	int count = CSIO_ROUNDUP(60 * 1000, CSIO_SCSI_ABORT_Q_POLL_MS);
+
+	/* No I/Os pending */
+	if (list_empty(&scm->active_q))
+		return 0;
+
+	/* Wait until all active I/Os are completed */
+	while (!list_empty(&scm->active_q) && count--) {
+		spin_unlock_irq(&hw->lock);
+		msleep(CSIO_SCSI_ABORT_Q_POLL_MS);
+		spin_lock_irq(&hw->lock);
+	}
+
+	/* all I/Os completed */
+	if (list_empty(&scm->active_q))
+		return 0;
+
+	/* Else abort */
+	if (abort) {
+		rv = csio_scsi_abort_io_q(scm, &scm->active_q, 30000);
+		if (rv == 0)
+			return rv;
+		csio_dbg(hw, "Some I/O aborts timed out, cleaning up..\n");
+	}
+
+	csio_scsi_cleanup_io_q(scm, &scm->active_q);
+
+	CSIO_DB_ASSERT(list_empty(&scm->active_q));
+
+	return rv;
+}
+
+/*
+ * csio_scsim_cleanup_io_lnode - Cleanup all I/Os of given lnode.
+ * @scm: SCSI module.
+ * @lnode: lnode
+ *
+ * Called with lock held, should exit with lock held.
+ * Can sleep (with dropped lock) when waiting for I/Os to complete.
+ */
+int
+csio_scsim_cleanup_io_lnode(struct csio_scsim *scm, struct csio_lnode *ln)
+{
+	struct csio_hw *hw = scm->hw;
+	struct csio_scsi_level_data sld;
+	int rv;
+	int count = CSIO_ROUNDUP(60 * 1000, CSIO_SCSI_ABORT_Q_POLL_MS);
+
+	csio_dbg(hw, "Gathering all SCSI I/Os on lnode %p\n", ln);
+
+	sld.level = CSIO_LEV_LNODE;
+	sld.lnode = ln;
+	INIT_LIST_HEAD(&ln->cmpl_q);
+	csio_scsi_gather_active_ios(scm, &sld, &ln->cmpl_q);
+
+	/* No I/Os pending on this lnode  */
+	if (list_empty(&ln->cmpl_q))
+		return 0;
+
+	/* Wait until all active I/Os on this lnode are completed */
+	while (!list_empty(&ln->cmpl_q) && count--) {
+		spin_unlock_irq(&hw->lock);
+		msleep(CSIO_SCSI_ABORT_Q_POLL_MS);
+		spin_lock_irq(&hw->lock);
+	}
+
+	/* all I/Os completed */
+	if (list_empty(&ln->cmpl_q))
+		return 0;
+
+	csio_dbg(hw, "Some I/Os pending on ln:%p, aborting them..\n", ln);
+
+	/* I/Os are pending, abort them */
+	rv = csio_scsi_abort_io_q(scm, &ln->cmpl_q, 30000);
+	if (rv != 0) {
+		csio_dbg(hw, "Some I/O aborts timed out, cleaning up..\n");
+		csio_scsi_cleanup_io_q(scm, &ln->cmpl_q);
+	}
+
+	CSIO_DB_ASSERT(list_empty(&ln->cmpl_q));
+
+	return rv;
+}
+
+static ssize_t
+csio_show_hw_state(struct device *dev,
+		   struct device_attribute *attr, char *buf)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	if (csio_is_hw_ready(hw))
+		return snprintf(buf, PAGE_SIZE, "ready\n");
+	else
+		return snprintf(buf, PAGE_SIZE, "not ready\n");
+}
+
+/* Device reset */
+static ssize_t
+csio_device_reset(struct device *dev,
+		   struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	if (*buf != '1')
+		return -EINVAL;
+
+	/* Delete NPIV lnodes */
+	 csio_lnodes_exit(hw, 1);
+
+	/* Block upper IOs */
+	csio_lnodes_block_request(hw);
+
+	spin_lock_irq(&hw->lock);
+	csio_hw_reset(hw);
+	spin_unlock_irq(&hw->lock);
+
+	/* Unblock upper IOs */
+	csio_lnodes_unblock_request(hw);
+	return count;
+}
+
+/* disable port */
+static ssize_t
+csio_disable_port(struct device *dev,
+		   struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	bool disable;
+
+	if (*buf == '1' || *buf == '0')
+		disable = (*buf == '1') ? true : false;
+	else
+		return -EINVAL;
+
+	/* Block upper IOs */
+	csio_lnodes_block_by_port(hw, ln->portid);
+
+	spin_lock_irq(&hw->lock);
+	csio_disable_lnodes(hw, ln->portid, disable);
+	spin_unlock_irq(&hw->lock);
+
+	/* Unblock upper IOs */
+	csio_lnodes_unblock_by_port(hw, ln->portid);
+	return count;
+}
+
+/* Show debug level */
+static ssize_t
+csio_show_dbg_level(struct device *dev,
+		   struct device_attribute *attr, char *buf)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%x\n", ln->params.log_level);
+}
+
+/* Store debug level */
+static ssize_t
+csio_store_dbg_level(struct device *dev,
+		   struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	uint32_t dbg_level = 0;
+
+	if (!isdigit(buf[0]))
+		return -EINVAL;
+
+	if (sscanf(buf, "%i", &dbg_level))
+		return -EINVAL;
+
+	ln->params.log_level = dbg_level;
+	hw->params.log_level = dbg_level;
+
+	return 0;
+}
+
+static DEVICE_ATTR(hw_state, S_IRUGO, csio_show_hw_state, NULL);
+static DEVICE_ATTR(device_reset, S_IRUGO | S_IWUSR, NULL, csio_device_reset);
+static DEVICE_ATTR(disable_port, S_IRUGO | S_IWUSR, NULL, csio_disable_port);
+static DEVICE_ATTR(dbg_level, S_IRUGO | S_IWUSR, csio_show_dbg_level,
+		  csio_store_dbg_level);
+
+static struct device_attribute *csio_fcoe_lport_attrs[] = {
+	&dev_attr_hw_state,
+	&dev_attr_device_reset,
+	&dev_attr_disable_port,
+	&dev_attr_dbg_level,
+	NULL,
+};
+
+static ssize_t
+csio_show_num_reg_rnodes(struct device *dev,
+		     struct device_attribute *attr, char *buf)
+{
+	struct csio_lnode *ln = shost_priv(class_to_shost(dev));
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", ln->num_reg_rnodes);
+}
+
+static DEVICE_ATTR(num_reg_rnodes, S_IRUGO, csio_show_num_reg_rnodes, NULL);
+
+static struct device_attribute *csio_fcoe_vport_attrs[] = {
+	&dev_attr_num_reg_rnodes,
+	&dev_attr_dbg_level,
+	NULL,
+};
+
+static inline uint32_t
+csio_scsi_copy_to_sgl(struct csio_hw *hw, struct csio_ioreq *req)
+{
+	struct scsi_cmnd *scmnd  = (struct scsi_cmnd *)csio_scsi_cmnd(req);
+	struct scatterlist *sg;
+	uint32_t bytes_left;
+	uint32_t bytes_copy;
+	uint32_t buf_off = 0;
+	uint32_t start_off = 0;
+	uint32_t sg_off = 0;
+	void *sg_addr;
+	void *buf_addr;
+	struct csio_dma_buf *dma_buf;
+
+	bytes_left = scsi_bufflen(scmnd);
+	sg = scsi_sglist(scmnd);
+	dma_buf = (struct csio_dma_buf *)csio_list_next(&req->gen_list);
+
+	/* Copy data from driver buffer to SGs of SCSI CMD */
+	while (bytes_left > 0 && sg && dma_buf) {
+		if (buf_off >= dma_buf->len) {
+			buf_off = 0;
+			dma_buf = (struct csio_dma_buf *)
+					csio_list_next(dma_buf);
+			continue;
+		}
+
+		if (start_off >= sg->length) {
+			start_off -= sg->length;
+			sg = sg_next(sg);
+			continue;
+		}
+
+		buf_addr = dma_buf->vaddr + buf_off;
+		sg_off = sg->offset + start_off;
+		bytes_copy = min((dma_buf->len - buf_off),
+				sg->length - start_off);
+		bytes_copy = min((uint32_t)(PAGE_SIZE - (sg_off & ~PAGE_MASK)),
+				 bytes_copy);
+
+		sg_addr = kmap_atomic(sg_page(sg) + (sg_off >> PAGE_SHIFT));
+		if (!sg_addr) {
+			csio_err(hw, "failed to kmap sg:%p of ioreq:%p\n",
+				sg, req);
+			break;
+		}
+
+		csio_dbg(hw, "copy_to_sgl:sg_addr %p sg_off %d buf %p len %d\n",
+				sg_addr, sg_off, buf_addr, bytes_copy);
+		memcpy(sg_addr + (sg_off & ~PAGE_MASK), buf_addr, bytes_copy);
+		kunmap_atomic(sg_addr);
+
+		start_off +=  bytes_copy;
+		buf_off += bytes_copy;
+		bytes_left -= bytes_copy;
+	}
+
+	if (bytes_left > 0)
+		return DID_ERROR;
+	else
+		return DID_OK;
+}
+
+/*
+ * csio_scsi_err_handler - SCSI error handler.
+ * @hw: HW module.
+ * @req: IO request.
+ *
+ */
+static inline void
+csio_scsi_err_handler(struct csio_hw *hw, struct csio_ioreq *req)
+{
+	struct scsi_cmnd *cmnd  = (struct scsi_cmnd *)csio_scsi_cmnd(req);
+	struct csio_scsim *scm = csio_hw_to_scsim(hw);
+	struct fcp_resp_with_ext *fcp_resp;
+	struct fcp_resp_rsp_info *rsp_info;
+	struct csio_dma_buf *dma_buf;
+	uint8_t flags, scsi_status = 0;
+	uint32_t host_status = DID_OK;
+	uint32_t rsp_len = 0, sns_len = 0;
+	struct csio_rnode *rn = (struct csio_rnode *)(cmnd->device->hostdata);
+
+
+	switch (req->wr_status) {
+	case FW_HOSTERROR:
+		if (unlikely(!csio_is_hw_ready(hw)))
+			return;
+
+		host_status = DID_ERROR;
+		CSIO_INC_STATS(scm, n_hosterror);
+
+		break;
+	case FW_SCSI_RSP_ERR:
+		dma_buf = &req->dma_buf;
+		fcp_resp = (struct fcp_resp_with_ext *)dma_buf->vaddr;
+		rsp_info = (struct fcp_resp_rsp_info *)(fcp_resp + 1);
+		flags = fcp_resp->resp.fr_flags;
+		scsi_status = fcp_resp->resp.fr_status;
+
+		if (flags & FCP_RSP_LEN_VAL) {
+			rsp_len = be32_to_cpu(fcp_resp->ext.fr_rsp_len);
+			if ((rsp_len != 0 && rsp_len != 4 && rsp_len != 8) ||
+				(rsp_info->rsp_code != FCP_TMF_CMPL)) {
+				host_status = DID_ERROR;
+				goto out;
+			}
+		}
+
+		if ((flags & FCP_SNS_LEN_VAL) && fcp_resp->ext.fr_sns_len) {
+			sns_len = be32_to_cpu(fcp_resp->ext.fr_sns_len);
+			if (sns_len > SCSI_SENSE_BUFFERSIZE)
+				sns_len = SCSI_SENSE_BUFFERSIZE;
+
+			memcpy(cmnd->sense_buffer,
+			       &rsp_info->_fr_resvd[0] + rsp_len, sns_len);
+			CSIO_INC_STATS(scm, n_autosense);
+		}
+
+		scsi_set_resid(cmnd, 0);
+
+		/* Under run */
+		if (flags & FCP_RESID_UNDER) {
+			scsi_set_resid(cmnd,
+				       be32_to_cpu(fcp_resp->ext.fr_resid));
+
+			if (!(flags & FCP_SNS_LEN_VAL) &&
+			    (scsi_status == SAM_STAT_GOOD) &&
+			    ((scsi_bufflen(cmnd) - scsi_get_resid(cmnd))
+							< cmnd->underflow))
+				host_status = DID_ERROR;
+		} else if (flags & FCP_RESID_OVER)
+			host_status = DID_ERROR;
+
+		CSIO_INC_STATS(scm, n_rsperror);
+		break;
+
+	case FW_SCSI_OVER_FLOW_ERR:
+		csio_warn(hw,
+			  "Over-flow error,cmnd:0x%x expected len:0x%x"
+			  " resid:0x%x\n", cmnd->cmnd[0],
+			  scsi_bufflen(cmnd), scsi_get_resid(cmnd));
+		host_status = DID_ERROR;
+		CSIO_INC_STATS(scm, n_ovflerror);
+		break;
+
+	case FW_SCSI_UNDER_FLOW_ERR:
+		csio_warn(hw,
+			  "Under-flow error,cmnd:0x%x expected"
+			  " len:0x%x resid:0x%x lun:0x%x ssn:0x%x\n",
+			  cmnd->cmnd[0], scsi_bufflen(cmnd),
+			  scsi_get_resid(cmnd), cmnd->device->lun,
+			  rn->flowid);
+		host_status = DID_ERROR;
+		CSIO_INC_STATS(scm, n_unflerror);
+		break;
+
+	case FW_SCSI_ABORT_REQUESTED:
+	case FW_SCSI_ABORTED:
+	case FW_SCSI_CLOSE_REQUESTED:
+		csio_dbg(hw, "Req %p cmd:%p op:%x %s\n", req, cmnd,
+			     cmnd->cmnd[0],
+			    (req->wr_status == FW_SCSI_CLOSE_REQUESTED) ?
+			    "closed" : "aborted");
+		/*
+		 * csio_eh_abort_handler checks this value to
+		 * succeed or fail the abort request.
+		 */
+		host_status = DID_REQUEUE;
+		if (req->wr_status == FW_SCSI_CLOSE_REQUESTED)
+			CSIO_INC_STATS(scm, n_closed);
+		else
+			CSIO_INC_STATS(scm, n_aborted);
+		break;
+
+	case FW_SCSI_ABORT_TIMEDOUT:
+		/* FW timed out the abort itself */
+		csio_dbg(hw, "FW timed out abort req:%p cmnd:%p status:%x\n",
+			 req, cmnd, req->wr_status);
+		host_status = DID_ERROR;
+		CSIO_INC_STATS(scm, n_abrt_timedout);
+		break;
+
+	case FW_RDEV_NOT_READY:
+		/*
+		 * In firmware, a RDEV can get into this state
+		 * temporarily, before moving into dissapeared/lost
+		 * state. So, the driver should complete the request equivalent
+		 * to device-disappeared!
+		 */
+		CSIO_INC_STATS(scm, n_rdev_nr_error);
+		host_status = DID_ERROR;
+		break;
+
+	case FW_ERR_RDEV_LOST:
+		CSIO_INC_STATS(scm, n_rdev_lost_error);
+		host_status = DID_ERROR;
+		break;
+
+	case FW_ERR_RDEV_LOGO:
+		CSIO_INC_STATS(scm, n_rdev_logo_error);
+		host_status = DID_ERROR;
+		break;
+
+	case FW_ERR_RDEV_IMPL_LOGO:
+		host_status = DID_ERROR;
+		break;
+
+	case FW_ERR_LINK_DOWN:
+		CSIO_INC_STATS(scm, n_link_down_error);
+		host_status = DID_ERROR;
+		break;
+
+	case FW_FCOE_NO_XCHG:
+		CSIO_INC_STATS(scm, n_no_xchg_error);
+		host_status = DID_ERROR;
+		break;
+
+	default:
+		csio_err(hw, "Unknown SCSI FW WR status:%d req:%p cmnd:%p\n",
+			    req->wr_status, req, cmnd);
+		CSIO_DB_ASSERT(0);
+
+		CSIO_INC_STATS(scm, n_unknown_error);
+		host_status = DID_ERROR;
+		break;
+	}
+
+out:
+	if (req->nsge > 0)
+		scsi_dma_unmap(cmnd);
+
+	cmnd->result = (((host_status) << 16) | scsi_status);
+	cmnd->scsi_done(cmnd);
+
+	/* Wake up waiting threads */
+	csio_scsi_cmnd(req) = NULL;
+	complete_all(&req->cmplobj);
+}
+
+/*
+ * csio_scsi_cbfn - SCSI callback function.
+ * @hw: HW module.
+ * @req: IO request.
+ *
+ */
+static void
+csio_scsi_cbfn(struct csio_hw *hw, struct csio_ioreq *req)
+{
+	struct scsi_cmnd *cmnd  = (struct scsi_cmnd *)csio_scsi_cmnd(req);
+	uint8_t scsi_status = SAM_STAT_GOOD;
+	uint32_t host_status = DID_OK;
+
+	if (likely(req->wr_status == FW_SUCCESS)) {
+		if (req->nsge > 0) {
+			scsi_dma_unmap(cmnd);
+			if (req->dcopy)
+				host_status = csio_scsi_copy_to_sgl(hw, req);
+		}
+
+		cmnd->result = (((host_status) << 16) | scsi_status);
+		cmnd->scsi_done(cmnd);
+		csio_scsi_cmnd(req) = NULL;
+		CSIO_INC_STATS(csio_hw_to_scsim(hw), n_tot_success);
+	} else {
+		/* Error handling */
+		csio_scsi_err_handler(hw, req);
+	}
+}
+
+/**
+ * csio_queuecommand_lck - Entry point to kickstart an I/O request.
+ * @cmnd:	The I/O request from ML.
+ * @done:	The ML callback routine.
+ *
+ * This routine does the following:
+ *	- Checks for HW and Rnode module readiness.
+ *	- Gets a free ioreq structure (which is already initialized
+ *	  to uninit during its allocation).
+ *	- Maps SG elements.
+ *	- Initializes ioreq members.
+ *	- Kicks off the SCSI state machine for this IO.
+ *	- Returns busy status on error.
+ */
+static int
+csio_queuecommand_lck(struct scsi_cmnd *cmnd, void (*done)(struct scsi_cmnd *))
+{
+	struct csio_lnode *ln = shost_priv(cmnd->device->host);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	struct csio_rnode *rn = (struct csio_rnode *)(cmnd->device->hostdata);
+	struct csio_ioreq *ioreq = NULL;
+	unsigned long flags;
+	int nsge = 0;
+	int rv = SCSI_MLQUEUE_HOST_BUSY, nr;
+	int retval;
+	int cpu;
+	struct csio_scsi_qset *sqset;
+	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
+
+	if (!blk_rq_cpu_valid(cmnd->request))
+		cpu = smp_processor_id();
+	else
+		cpu = cmnd->request->cpu;
+
+	sqset = &hw->sqset[ln->portid][cpu];
+
+	nr = fc_remote_port_chkready(rport);
+	if (nr) {
+		cmnd->result = nr;
+		CSIO_INC_STATS(scsim, n_rn_nr_error);
+		goto err_done;
+	}
+
+	if (unlikely(!csio_is_hw_ready(hw))) {
+		cmnd->result = (DID_REQUEUE << 16);
+		CSIO_INC_STATS(scsim, n_hw_nr_error);
+		goto err_done;
+	}
+
+	/* Get req->nsge, if there are SG elements to be mapped  */
+	nsge = scsi_dma_map(cmnd);
+	if (unlikely(nsge < 0)) {
+		CSIO_INC_STATS(scsim, n_dmamap_error);
+		goto err;
+	}
+
+	/* Do we support so many mappings? */
+	if (unlikely(nsge > scsim->max_sge)) {
+		csio_warn(hw,
+			  "More SGEs than can be supported."
+			  " SGEs: %d, Max SGEs: %d\n", nsge, scsim->max_sge);
+		CSIO_INC_STATS(scsim, n_unsupp_sge_error);
+		goto err_dma_unmap;
+	}
+
+	/* Get a free ioreq structure - SM is already set to uninit */
+	ioreq = csio_get_scsi_ioreq_lock(hw, scsim);
+	if (!ioreq) {
+		csio_err(hw, "Out of I/O request elements. Active #:%d\n",
+			 scsim->stats.n_active);
+		CSIO_INC_STATS(scsim, n_no_req_error);
+		goto err_dma_unmap;
+	}
+
+	ioreq->nsge		= nsge;
+	ioreq->lnode		= ln;
+	ioreq->rnode		= rn;
+	ioreq->iq_idx		= sqset->iq_idx;
+	ioreq->eq_idx		= sqset->eq_idx;
+	ioreq->wr_status	= 0;
+	ioreq->drv_status	= 0;
+	csio_scsi_cmnd(ioreq)	= (void *)cmnd;
+	ioreq->tmo		= 0;
+	ioreq->datadir		= cmnd->sc_data_direction;
+
+	if (cmnd->sc_data_direction == DMA_TO_DEVICE) {
+		CSIO_INC_STATS(ln, n_output_requests);
+		ln->stats.n_output_bytes += scsi_bufflen(cmnd);
+	} else if (cmnd->sc_data_direction == DMA_FROM_DEVICE) {
+		CSIO_INC_STATS(ln, n_input_requests);
+		ln->stats.n_input_bytes += scsi_bufflen(cmnd);
+	} else
+		CSIO_INC_STATS(ln, n_control_requests);
+
+	/* Set cbfn */
+	ioreq->io_cbfn = csio_scsi_cbfn;
+
+	/* Needed during abort */
+	cmnd->host_scribble = (unsigned char *)ioreq;
+	cmnd->scsi_done = done;
+	cmnd->SCp.Message = 0;
+
+	/* Kick off SCSI IO SM on the ioreq */
+	spin_lock_irqsave(&hw->lock, flags);
+	retval = csio_scsi_start_io(ioreq);
+	spin_unlock_irqrestore(&hw->lock, flags);
+
+	if (retval != 0) {
+		csio_err(hw, "ioreq: %p couldnt be started, status:%d\n",
+			 ioreq, retval);
+		CSIO_INC_STATS(scsim, n_busy_error);
+		goto err_put_req;
+	}
+
+	return 0;
+
+err_put_req:
+	csio_put_scsi_ioreq_lock(hw, scsim, ioreq);
+err_dma_unmap:
+	if (nsge > 0)
+		scsi_dma_unmap(cmnd);
+err:
+	return rv;
+
+err_done:
+	done(cmnd);
+	return 0;
+}
+
+static DEF_SCSI_QCMD(csio_queuecommand);
+
+static int
+csio_do_abrt_cls(struct csio_hw *hw, struct csio_ioreq *ioreq, bool abort)
+{
+	int rv;
+	int cpu = smp_processor_id();
+	struct csio_lnode *ln = ioreq->lnode;
+	struct csio_scsi_qset *sqset = &hw->sqset[ln->portid][cpu];
+
+	ioreq->tmo = CSIO_SCSI_ABRT_TMO_MS;
+	/*
+	 * Use current processor queue for posting the abort/close, but retain
+	 * the ingress queue ID of the original I/O being aborted/closed - we
+	 * need the abort/close completion to be received on the same queue
+	 * as the original I/O.
+	 */
+	ioreq->eq_idx = sqset->eq_idx;
+
+	if (abort == SCSI_ABORT)
+		rv = csio_scsi_abort(ioreq);
+	else
+		rv = csio_scsi_close(ioreq);
+
+	return rv;
+}
+
+static int
+csio_eh_abort_handler(struct scsi_cmnd *cmnd)
+{
+	struct csio_ioreq *ioreq;
+	struct csio_lnode *ln = shost_priv(cmnd->device->host);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	int ready = 0, ret;
+	unsigned long tmo = 0;
+	int rv;
+	struct csio_rnode *rn = (struct csio_rnode *)(cmnd->device->hostdata);
+
+	ret = fc_block_scsi_eh(cmnd);
+	if (ret)
+		return ret;
+
+	ioreq = (struct csio_ioreq *)cmnd->host_scribble;
+	if (!ioreq)
+		return SUCCESS;
+
+	if (!rn)
+		return FAILED;
+
+	csio_dbg(hw,
+		 "Request to abort ioreq:%p cmd:%p cdb:%08llx"
+		 " ssni:0x%x lun:%d iq:0x%x\n",
+		ioreq, cmnd, *((uint64_t *)cmnd->cmnd), rn->flowid,
+		cmnd->device->lun, csio_q_physiqid(hw, ioreq->iq_idx));
+
+	if (((struct scsi_cmnd *)csio_scsi_cmnd(ioreq)) != cmnd) {
+		CSIO_INC_STATS(scsim, n_abrt_race_comp);
+		return SUCCESS;
+	}
+
+	ready = csio_is_lnode_ready(ln);
+	tmo = CSIO_SCSI_ABRT_TMO_MS;
+
+	spin_lock_irq(&hw->lock);
+	rv = csio_do_abrt_cls(hw, ioreq, (ready ? SCSI_ABORT : SCSI_CLOSE));
+	spin_unlock_irq(&hw->lock);
+
+	if (rv != 0) {
+		if (rv == -EINVAL) {
+			/* Return success, if abort/close request issued on
+			 * already completed IO
+			 */
+			return SUCCESS;
+		}
+		if (ready)
+			CSIO_INC_STATS(scsim, n_abrt_busy_error);
+		else
+			CSIO_INC_STATS(scsim, n_cls_busy_error);
+
+		goto inval_scmnd;
+	}
+
+	/* Wait for completion */
+	init_completion(&ioreq->cmplobj);
+	wait_for_completion_timeout(&ioreq->cmplobj, msecs_to_jiffies(tmo));
+
+	/* FW didnt respond to abort within our timeout */
+	if (((struct scsi_cmnd *)csio_scsi_cmnd(ioreq)) == cmnd) {
+
+		csio_err(hw, "Abort timed out -- req: %p\n", ioreq);
+		CSIO_INC_STATS(scsim, n_abrt_timedout);
+
+inval_scmnd:
+		if (ioreq->nsge > 0)
+			scsi_dma_unmap(cmnd);
+
+		spin_lock_irq(&hw->lock);
+		csio_scsi_cmnd(ioreq) = NULL;
+		spin_unlock_irq(&hw->lock);
+
+		cmnd->result = (DID_ERROR << 16);
+		cmnd->scsi_done(cmnd);
+
+		return FAILED;
+	}
+
+	/* FW successfully aborted the request */
+	if (host_byte(cmnd->result) == DID_REQUEUE) {
+		csio_info(hw,
+			"Aborted SCSI command to (%d:%d) serial#:0x%lx\n",
+			cmnd->device->id, cmnd->device->lun,
+			cmnd->serial_number);
+		return SUCCESS;
+	} else {
+		csio_info(hw,
+			"Failed to abort SCSI command, (%d:%d) serial#:0x%lx\n",
+			cmnd->device->id, cmnd->device->lun,
+			cmnd->serial_number);
+		return FAILED;
+	}
+}
+
+/*
+ * csio_tm_cbfn - TM callback function.
+ * @hw: HW module.
+ * @req: IO request.
+ *
+ * Cache the result in 'cmnd', since ioreq will be freed soon
+ * after we return from here, and the waiting thread shouldnt trust
+ * the ioreq contents.
+ */
+static void
+csio_tm_cbfn(struct csio_hw *hw, struct csio_ioreq *req)
+{
+	struct scsi_cmnd *cmnd  = (struct scsi_cmnd *)csio_scsi_cmnd(req);
+	struct csio_dma_buf *dma_buf;
+	uint8_t flags = 0;
+	struct fcp_resp_with_ext *fcp_resp;
+	struct fcp_resp_rsp_info *rsp_info;
+
+	csio_dbg(hw, "req: %p in csio_tm_cbfn status: %d\n",
+		      req, req->wr_status);
+
+	/* Cache FW return status */
+	cmnd->SCp.Status = req->wr_status;
+
+	/* Special handling based on FCP response */
+
+	/*
+	 * FW returns us this error, if flags were set. FCP4 says
+	 * FCP_RSP_LEN_VAL in flags shall be set for TM completions.
+	 * So if a target were to set this bit, we expect that the
+	 * rsp_code is set to FCP_TMF_CMPL for a successful TM
+	 * completion. Any other rsp_code means TM operation failed.
+	 * If a target were to just ignore setting flags, we treat
+	 * the TM operation as success, and FW returns FW_SUCCESS.
+	 */
+	if (req->wr_status == FW_SCSI_RSP_ERR) {
+		dma_buf = &req->dma_buf;
+		fcp_resp = (struct fcp_resp_with_ext *)dma_buf->vaddr;
+		rsp_info = (struct fcp_resp_rsp_info *)(fcp_resp + 1);
+
+		flags = fcp_resp->resp.fr_flags;
+
+		/* Modify return status if flags indicate success */
+		if (flags & FCP_RSP_LEN_VAL)
+			if (rsp_info->rsp_code == FCP_TMF_CMPL)
+				cmnd->SCp.Status = FW_SUCCESS;
+
+		csio_dbg(hw, "TM FCP rsp code: %d\n", rsp_info->rsp_code);
+	}
+
+	/* Wake up the TM handler thread */
+	csio_scsi_cmnd(req) = NULL;
+}
+
+static int
+csio_eh_lun_reset_handler(struct scsi_cmnd *cmnd)
+{
+	struct csio_lnode *ln = shost_priv(cmnd->device->host);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct csio_scsim *scsim = csio_hw_to_scsim(hw);
+	struct csio_rnode *rn = (struct csio_rnode *)(cmnd->device->hostdata);
+	struct csio_ioreq *ioreq = NULL;
+	struct csio_scsi_qset *sqset;
+	unsigned long flags;
+	int retval;
+	int count, ret;
+	LIST_HEAD(local_q);
+	struct csio_scsi_level_data sld;
+
+	if (!rn)
+		goto fail;
+
+	csio_dbg(hw, "Request to reset LUN:%d (ssni:0x%x tgtid:%d)\n",
+		      cmnd->device->lun, rn->flowid, rn->scsi_id);
+
+	if (!csio_is_lnode_ready(ln)) {
+		csio_err(hw,
+			 "LUN reset cannot be issued on non-ready"
+			 " local node vnpi:0x%x (LUN:%d)\n",
+			 ln->vnp_flowid, cmnd->device->lun);
+		goto fail;
+	}
+
+	/* Lnode is ready, now wait on rport node readiness */
+	ret = fc_block_scsi_eh(cmnd);
+	if (ret)
+		return ret;
+
+	/*
+	 * If we have blocked in the previous call, at this point, either the
+	 * remote node has come back online, or device loss timer has fired
+	 * and the remote node is destroyed. Allow the LUN reset only for
+	 * the former case, since LUN reset is a TMF I/O on the wire, and we
+	 * need a valid session to issue it.
+	 */
+	if (fc_remote_port_chkready(rn->rport)) {
+		csio_err(hw,
+			 "LUN reset cannot be issued on non-ready"
+			 " remote node ssni:0x%x (LUN:%d)\n",
+			 rn->flowid, cmnd->device->lun);
+		goto fail;
+	}
+
+	/* Get a free ioreq structure - SM is already set to uninit */
+	ioreq = csio_get_scsi_ioreq_lock(hw, scsim);
+
+	if (!ioreq) {
+		csio_err(hw, "Out of IO request elements. Active # :%d\n",
+			 scsim->stats.n_active);
+		goto fail;
+	}
+
+	sqset			= &hw->sqset[ln->portid][smp_processor_id()];
+	ioreq->nsge		= 0;
+	ioreq->lnode		= ln;
+	ioreq->rnode		= rn;
+	ioreq->iq_idx		= sqset->iq_idx;
+	ioreq->eq_idx		= sqset->eq_idx;
+
+	csio_scsi_cmnd(ioreq)	= cmnd;
+	cmnd->host_scribble	= (unsigned char *)ioreq;
+	cmnd->SCp.Status	= 0;
+
+	cmnd->SCp.Message	= FCP_TMF_LUN_RESET;
+	ioreq->tmo		= CSIO_SCSI_LUNRST_TMO_MS / 1000;
+
+	/*
+	 * FW times the LUN reset for ioreq->tmo, so we got to wait a little
+	 * longer (10s for now) than that to allow FW to return the timed
+	 * out command.
+	 */
+	count = CSIO_ROUNDUP((ioreq->tmo + 10) * 1000, CSIO_SCSI_TM_POLL_MS);
+
+	/* Set cbfn */
+	ioreq->io_cbfn = csio_tm_cbfn;
+
+	/* Save of the ioreq info for later use */
+	sld.level = CSIO_LEV_LUN;
+	sld.lnode = ioreq->lnode;
+	sld.rnode = ioreq->rnode;
+	sld.oslun = (uint64_t)cmnd->device->lun;
+
+	spin_lock_irqsave(&hw->lock, flags);
+	/* Kick off TM SM on the ioreq */
+	retval = csio_scsi_start_tm(ioreq);
+	spin_unlock_irqrestore(&hw->lock, flags);
+
+	if (retval != 0) {
+		csio_err(hw, "Failed to issue LUN reset, req:%p, status:%d\n",
+			    ioreq, retval);
+		goto fail_ret_ioreq;
+	}
+
+	csio_dbg(hw, "Waiting max %d secs for LUN reset completion\n",
+		    count * (CSIO_SCSI_TM_POLL_MS / 1000));
+	/* Wait for completion */
+	while ((((struct scsi_cmnd *)csio_scsi_cmnd(ioreq)) == cmnd)
+								&& count--)
+		msleep(CSIO_SCSI_TM_POLL_MS);
+
+	/* LUN reset timed-out */
+	if (((struct scsi_cmnd *)csio_scsi_cmnd(ioreq)) == cmnd) {
+		csio_err(hw, "LUN reset (%d:%d) timed out\n",
+			 cmnd->device->id, cmnd->device->lun);
+
+		spin_lock_irq(&hw->lock);
+		csio_scsi_drvcleanup(ioreq);
+		list_del_init(&ioreq->sm.sm_list);
+		spin_unlock_irq(&hw->lock);
+
+		goto fail_ret_ioreq;
+	}
+
+	/* LUN reset returned, check cached status */
+	if (cmnd->SCp.Status != FW_SUCCESS) {
+		csio_err(hw, "LUN reset failed (%d:%d), status: %d\n",
+			 cmnd->device->id, cmnd->device->lun, cmnd->SCp.Status);
+		goto fail;
+	}
+
+	/* LUN reset succeeded, Start aborting affected I/Os */
+	/*
+	 * Since the host guarantees during LUN reset that there
+	 * will not be any more I/Os to that LUN, until the LUN reset
+	 * completes, we gather pending I/Os after the LUN reset.
+	 */
+	spin_lock_irq(&hw->lock);
+	csio_scsi_gather_active_ios(scsim, &sld, &local_q);
+
+	retval = csio_scsi_abort_io_q(scsim, &local_q, 30000);
+	spin_unlock_irq(&hw->lock);
+
+	/* Aborts may have timed out */
+	if (retval != 0) {
+		csio_err(hw,
+			 "Attempt to abort I/Os during LUN reset of %d"
+			 " returned %d\n", cmnd->device->lun, retval);
+		/* Return I/Os back to active_q */
+		spin_lock_irq(&hw->lock);
+		list_splice_tail_init(&local_q, &scsim->active_q);
+		spin_unlock_irq(&hw->lock);
+		goto fail;
+	}
+
+	CSIO_INC_STATS(rn, n_lun_rst);
+
+	csio_info(hw, "LUN reset occurred (%d:%d)\n",
+		  cmnd->device->id, cmnd->device->lun);
+
+	return SUCCESS;
+
+fail_ret_ioreq:
+	csio_put_scsi_ioreq_lock(hw, scsim, ioreq);
+fail:
+	CSIO_INC_STATS(rn, n_lun_rst_fail);
+	return FAILED;
+}
+
+static int
+csio_slave_alloc(struct scsi_device *sdev)
+{
+	struct fc_rport *rport = starget_to_rport(scsi_target(sdev));
+
+	if (!rport || fc_remote_port_chkready(rport))
+		return -ENXIO;
+
+	sdev->hostdata = *((struct csio_lnode **)(rport->dd_data));
+
+	return 0;
+}
+
+static int
+csio_slave_configure(struct scsi_device *sdev)
+{
+	if (sdev->tagged_supported)
+		scsi_activate_tcq(sdev, csio_lun_qdepth);
+	else
+		scsi_deactivate_tcq(sdev, csio_lun_qdepth);
+
+	return 0;
+}
+
+static void
+csio_slave_destroy(struct scsi_device *sdev)
+{
+	sdev->hostdata = NULL;
+}
+
+static int
+csio_scan_finished(struct Scsi_Host *shost, unsigned long time)
+{
+	struct csio_lnode *ln = shost_priv(shost);
+	int rv = 0;
+
+	spin_lock_irq(shost->host_lock);
+	if (!ln->hwp || csio_list_deleted(&ln->sm.sm_list)) {
+		spin_unlock_irq(shost->host_lock);
+		return 1;
+	}
+
+	rv = csio_scan_done(ln, jiffies, time, csio_max_scan_tmo * HZ,
+			    csio_delta_scan_tmo * HZ);
+
+	spin_unlock_irq(shost->host_lock);
+
+	return rv;
+}
+
+struct scsi_host_template csio_fcoe_shost_template = {
+	.module			= THIS_MODULE,
+	.name			= CSIO_DRV_DESC,
+	.proc_name		= KBUILD_MODNAME,
+	.queuecommand		= csio_queuecommand,
+	.eh_abort_handler	= csio_eh_abort_handler,
+	.eh_device_reset_handler = csio_eh_lun_reset_handler,
+	.slave_alloc		= csio_slave_alloc,
+	.slave_configure	= csio_slave_configure,
+	.slave_destroy		= csio_slave_destroy,
+	.scan_finished		= csio_scan_finished,
+	.this_id		= -1,
+	.sg_tablesize		= CSIO_SCSI_MAX_SGE,
+	.cmd_per_lun		= CSIO_MAX_CMD_PER_LUN,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= csio_fcoe_lport_attrs,
+	.max_sectors		= CSIO_MAX_SECTOR_SIZE,
+};
+
+struct scsi_host_template csio_fcoe_shost_vport_template = {
+	.module			= THIS_MODULE,
+	.name			= CSIO_DRV_DESC,
+	.proc_name		= KBUILD_MODNAME,
+	.queuecommand		= csio_queuecommand,
+	.eh_abort_handler	= csio_eh_abort_handler,
+	.eh_device_reset_handler = csio_eh_lun_reset_handler,
+	.slave_alloc		= csio_slave_alloc,
+	.slave_configure	= csio_slave_configure,
+	.slave_destroy		= csio_slave_destroy,
+	.scan_finished		= csio_scan_finished,
+	.this_id		= -1,
+	.sg_tablesize		= CSIO_SCSI_MAX_SGE,
+	.cmd_per_lun		= CSIO_MAX_CMD_PER_LUN,
+	.use_clustering		= ENABLE_CLUSTERING,
+	.shost_attrs		= csio_fcoe_vport_attrs,
+	.max_sectors		= CSIO_MAX_SECTOR_SIZE,
+};
+
+/*
+ * csio_scsi_alloc_ddp_bufs - Allocate buffers for DDP of unaligned SGLs.
+ * @scm: SCSI Module
+ * @hw: HW device.
+ * @buf_size: buffer size
+ * @num_buf : Number of buffers.
+ *
+ * This routine allocates DMA buffers required for SCSI Data xfer, if
+ * each SGL buffer for a SCSI Read request posted by SCSI midlayer are
+ * not virtually contiguous.
+ */
+static int
+csio_scsi_alloc_ddp_bufs(struct csio_scsim *scm, struct csio_hw *hw,
+			 int buf_size, int num_buf)
+{
+	int n = 0;
+	struct list_head *tmp;
+	struct csio_dma_buf *ddp_desc = NULL;
+	uint32_t unit_size = 0;
+
+	if (!num_buf)
+		return 0;
+
+	if (!buf_size)
+		return -EINVAL;
+
+	INIT_LIST_HEAD(&scm->ddp_freelist);
+
+	/* Align buf size to page size */
+	buf_size = (buf_size + PAGE_SIZE - 1) & PAGE_MASK;
+	/* Initialize dma descriptors */
+	for (n = 0; n < num_buf; n++) {
+		/* Set unit size to request size */
+		unit_size = buf_size;
+		ddp_desc = kzalloc(sizeof(struct csio_dma_buf), GFP_KERNEL);
+		if (!ddp_desc) {
+			csio_err(hw,
+				 "Failed to allocate ddp descriptors,"
+				 " Num allocated = %d.\n",
+				 scm->stats.n_free_ddp);
+			goto no_mem;
+		}
+
+		/* Allocate Dma buffers for DDP */
+		ddp_desc->vaddr = pci_alloc_consistent(hw->pdev, unit_size,
+							&ddp_desc->paddr);
+		if (!ddp_desc->vaddr) {
+			csio_err(hw,
+				 "SCSI response DMA buffer (ddp) allocation"
+				 " failed!\n");
+			kfree(ddp_desc);
+			goto no_mem;
+		}
+
+		ddp_desc->len = unit_size;
+
+		/* Added it to scsi ddp freelist */
+		list_add_tail(&ddp_desc->list, &scm->ddp_freelist);
+		CSIO_INC_STATS(scm, n_free_ddp);
+	}
+
+	return 0;
+no_mem:
+	/* release dma descs back to freelist and free dma memory */
+	list_for_each(tmp, &scm->ddp_freelist) {
+		ddp_desc = (struct csio_dma_buf *) tmp;
+		tmp = csio_list_prev(tmp);
+		pci_free_consistent(hw->pdev, ddp_desc->len, ddp_desc->vaddr,
+				    ddp_desc->paddr);
+		list_del_init(&ddp_desc->list);
+		kfree(ddp_desc);
+	}
+	scm->stats.n_free_ddp = 0;
+
+	return -ENOMEM;
+}
+
+/*
+ * csio_scsi_free_ddp_bufs - free DDP buffers of unaligned SGLs.
+ * @scm: SCSI Module
+ * @hw: HW device.
+ *
+ * This routine frees ddp buffers.
+ */
+static void
+csio_scsi_free_ddp_bufs(struct csio_scsim *scm, struct csio_hw *hw)
+{
+	struct list_head *tmp;
+	struct csio_dma_buf *ddp_desc;
+
+	/* release dma descs back to freelist and free dma memory */
+	list_for_each(tmp, &scm->ddp_freelist) {
+		ddp_desc = (struct csio_dma_buf *) tmp;
+		tmp = csio_list_prev(tmp);
+		pci_free_consistent(hw->pdev, ddp_desc->len, ddp_desc->vaddr,
+				    ddp_desc->paddr);
+		list_del_init(&ddp_desc->list);
+		kfree(ddp_desc);
+	}
+	scm->stats.n_free_ddp = 0;
+}
+
+/**
+ * csio_scsim_init - Initialize SCSI Module
+ * @scm:	SCSI Module
+ * @hw:		HW module
+ *
+ */
+int
+csio_scsim_init(struct csio_scsim *scm, struct csio_hw *hw)
+{
+	int i;
+	struct csio_ioreq *ioreq;
+	struct csio_dma_buf *dma_buf;
+
+	INIT_LIST_HEAD(&scm->active_q);
+	scm->hw = hw;
+
+	scm->proto_cmd_len = sizeof(struct fcp_cmnd);
+	scm->proto_rsp_len = CSIO_SCSI_RSP_LEN;
+	scm->max_sge = CSIO_SCSI_MAX_SGE;
+
+	spin_lock_init(&scm->freelist_lock);
+
+	/* Pre-allocate ioreqs and initialize them */
+	INIT_LIST_HEAD(&scm->ioreq_freelist);
+	for (i = 0; i < csio_scsi_ioreqs; i++) {
+
+		ioreq = kzalloc(sizeof(struct csio_ioreq), GFP_KERNEL);
+		if (!ioreq) {
+			csio_err(hw,
+				 "I/O request element allocation failed, "
+				 " Num allocated = %d.\n",
+				 scm->stats.n_free_ioreq);
+
+			goto free_ioreq;
+		}
+
+		/* Allocate Dma buffers for Response Payload */
+		dma_buf = &ioreq->dma_buf;
+		dma_buf->vaddr = pci_pool_alloc(hw->scsi_pci_pool, GFP_KERNEL,
+						&dma_buf->paddr);
+		if (!dma_buf->vaddr) {
+			csio_err(hw,
+				 "SCSI response DMA buffer allocation"
+				 " failed!\n");
+			kfree(ioreq);
+			goto free_ioreq;
+		}
+
+		dma_buf->len = scm->proto_rsp_len;
+
+		/* Set state to uninit */
+		csio_init_state(&ioreq->sm, csio_scsis_uninit);
+		INIT_LIST_HEAD(&ioreq->gen_list);
+		init_completion(&ioreq->cmplobj);
+
+		list_add_tail(&ioreq->sm.sm_list, &scm->ioreq_freelist);
+		CSIO_INC_STATS(scm, n_free_ioreq);
+	}
+
+	if (csio_scsi_alloc_ddp_bufs(scm, hw, PAGE_SIZE, csio_ddp_descs))
+		goto free_ioreq;
+
+	return 0;
+
+free_ioreq:
+	/*
+	 * Free up existing allocations, since an error
+	 * from here means we are returning for good
+	 */
+	while (!list_empty(&scm->ioreq_freelist)) {
+		struct csio_sm *tmp;
+
+		tmp = list_first_entry(&scm->ioreq_freelist,
+				       struct csio_sm, sm_list);
+		list_del_init(&tmp->sm_list);
+		ioreq = (struct csio_ioreq *)tmp;
+
+		dma_buf = &ioreq->dma_buf;
+		pci_pool_free(hw->scsi_pci_pool, dma_buf->vaddr,
+			      dma_buf->paddr);
+
+		kfree(ioreq);
+	}
+
+	scm->stats.n_free_ioreq = 0;
+
+	return -ENOMEM;
+}
+
+/**
+ * csio_scsim_exit: Uninitialize SCSI Module
+ * @scm: SCSI Module
+ *
+ */
+void
+csio_scsim_exit(struct csio_scsim *scm)
+{
+	struct csio_ioreq *ioreq;
+	struct csio_dma_buf *dma_buf;
+
+	while (!list_empty(&scm->ioreq_freelist)) {
+		struct csio_sm *tmp;
+
+		tmp = list_first_entry(&scm->ioreq_freelist,
+				       struct csio_sm, sm_list);
+		list_del_init(&tmp->sm_list);
+		ioreq = (struct csio_ioreq *)tmp;
+
+		dma_buf = &ioreq->dma_buf;
+		pci_pool_free(scm->hw->scsi_pci_pool, dma_buf->vaddr,
+			      dma_buf->paddr);
+
+		kfree(ioreq);
+	}
+
+	scm->stats.n_free_ioreq = 0;
+
+	csio_scsi_free_ddp_bufs(scm, scm->hw);
+}
-- 
1.7.1

^ permalink raw reply related

* [PATCH] ceph: [PATCH] ceph: use list_move_tail instead of list_del/list_add_tail
From: Wei Yongjun @ 2012-09-05  6:34 UTC (permalink / raw)
  To: sage, davem; +Cc: yongjun_wei, ceph-devel, netdev

From: Wei Yongjun <yongjun_wei@trendmicro.com.cn>

Using list_move_tail() instead of list_del() + list_add_tail().

Signed-off-by: Wei Yongjun <yongjun_wei@trendmicro.com.cn>
---
 net/ceph/pagelist.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 665cd23..92866be 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -1,4 +1,3 @@
-
 #include <linux/module.h>
 #include <linux/gfp.h>
 #include <linux/pagemap.h>
@@ -134,8 +133,8 @@ int ceph_pagelist_truncate(struct ceph_pagelist *pl,
 	ceph_pagelist_unmap_tail(pl);
 	while (pl->head.prev != c->page_lru) {
 		page = list_entry(pl->head.prev, struct page, lru);
-		list_del(&page->lru);                /* remove from pagelist */
-		list_add_tail(&page->lru, &pl->free_list); /* add to reserve */
+		/* move from pagelist to reserve */
+		list_move_tail(&page->lru, &pl->free_list);
 		++pl->num_pages_free;
 	}
 	pl->room = c->room;


^ permalink raw reply related

* [PATCH] cfg80211: use list_move_tail instead of list_del/list_add_tail
From: Wei Yongjun @ 2012-09-05  6:34 UTC (permalink / raw)
  To: johannes-cdvu00un1VgdHxzADdlk8Q, linville-2XuSBdqkA4R54TAoqtyWWQ,
	davem-fT/PcQaiUtIeIZ0/mPfg9Q
  Cc: yongjun_wei-zrsr2BFq86L20UzCJQGyNP8+0UxHXcjY,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	netdev-u79uwXL29TY76Z2rM5mHXA

From: Wei Yongjun <yongjun_wei-zrsr2BFq86L20UzCJQGyNP8+0UxHXcjY@public.gmane.org>

Using list_move_tail() instead of list_del() + list_add_tail().

Signed-off-by: Wei Yongjun <yongjun_wei-zrsr2BFq86L20UzCJQGyNP8+0UxHXcjY@public.gmane.org>
---
 net/wireless/reg.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2ded3c7..1ad04e5 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1949,8 +1949,7 @@ static void restore_regulatory_settings(bool reset_user)
 			if (reg_request->initiator !=
 			    NL80211_REGDOM_SET_BY_USER)
 				continue;
-			list_del(&reg_request->list);
-			list_add_tail(&reg_request->list, &tmp_reg_req_list);
+			list_move_tail(&reg_request->list, &tmp_reg_req_list);
 		}
 	}
 	spin_unlock(&reg_requests_lock);
@@ -2009,8 +2008,7 @@ static void restore_regulatory_settings(bool reset_user)
 			      "into the queue\n",
 			      reg_request->alpha2[0],
 			      reg_request->alpha2[1]);
-		list_del(&reg_request->list);
-		list_add_tail(&reg_request->list, &reg_requests_list);
+		list_move_tail(&reg_request->list, &reg_requests_list);
 	}
 	spin_unlock(&reg_requests_lock);
 

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [net-next 4/4] ixgbe: remove old init remnant
From: Jeff Kirsher @ 2012-09-05  6:25 UTC (permalink / raw)
  To: davem; +Cc: Eliezer Tamir, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1346826350-27733-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Eliezer Tamir <eliezer.tamir@linux.intel.com>

Remove a for loop that does nothing in ixgbe_probe().
This is a remnant from when we had IO bars (compare to the ixgb code).

Signed-off-by: Eliezer Tamir <eliezer.tamir@linux.intel.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index fa0d6e1..1cbb34f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7169,11 +7169,6 @@ static int __devinit ixgbe_probe(struct pci_dev *pdev,
 		goto err_ioremap;
 	}
 
-	for (i = 1; i <= 5; i++) {
-		if (pci_resource_len(pdev, i) == 0)
-			continue;
-	}
-
 	netdev->netdev_ops = &ixgbe_netdev_ops;
 	ixgbe_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
-- 
1.7.11.4

^ permalink raw reply related

* [net-next 3/4] igb: Supported and Advertised Pause Frame
From: Jeff Kirsher @ 2012-09-05  6:25 UTC (permalink / raw)
  To: davem; +Cc: Akeem G. Abodunrin, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1346826350-27733-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: "Akeem G. Abodunrin" <akeem.g.abodunrin@intel.com>

This patch add ethtool supports for Supported and Advertised Pause Frame,
based on Adapter Flow Control settings.

Signed-off-by: Akeem G. Abodunrin <akeem.g.abodunrin@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ethtool.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index c4def55..a294441 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -148,9 +148,9 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 				   SUPPORTED_100baseT_Full |
 				   SUPPORTED_1000baseT_Full|
 				   SUPPORTED_Autoneg |
-				   SUPPORTED_TP);
-		ecmd->advertising = (ADVERTISED_TP |
-				     ADVERTISED_Pause);
+				   SUPPORTED_TP |
+				   SUPPORTED_Pause);
+		ecmd->advertising = ADVERTISED_TP;
 
 		if (hw->mac.autoneg == 1) {
 			ecmd->advertising |= ADVERTISED_Autoneg;
@@ -158,6 +158,21 @@ static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 			ecmd->advertising |= hw->phy.autoneg_advertised;
 		}
 
+		if (hw->mac.autoneg != 1)
+			ecmd->advertising &= ~(ADVERTISED_Pause |
+					       ADVERTISED_Asym_Pause);
+
+		if (hw->fc.requested_mode == e1000_fc_full)
+			ecmd->advertising |= ADVERTISED_Pause;
+		else if (hw->fc.requested_mode == e1000_fc_rx_pause)
+			ecmd->advertising |= (ADVERTISED_Pause |
+					      ADVERTISED_Asym_Pause);
+		else if (hw->fc.requested_mode == e1000_fc_tx_pause)
+			ecmd->advertising |=  ADVERTISED_Asym_Pause;
+		else
+			ecmd->advertising &= ~(ADVERTISED_Pause |
+					       ADVERTISED_Asym_Pause);
+
 		ecmd->port = PORT_TP;
 		ecmd->phy_address = hw->phy.addr;
 	} else {
-- 
1.7.11.4

^ permalink raw reply related

* [net-next 2/4] igb: reduce Rx header size
From: Jeff Kirsher @ 2012-09-05  6:25 UTC (permalink / raw)
  To: davem; +Cc: Eric Dumazet, netdev, gospo, sassmann, Alexander Duyck,
	Jeff Kirsher
In-Reply-To: <1346826350-27733-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Eric Dumazet <edumazet@google.com>

Reduce skb truesize by 256 bytes.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 9e572dd..0c9f62c 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -131,9 +131,9 @@ struct vf_data_storage {
 #define MAXIMUM_ETHERNET_VLAN_SIZE 1522
 
 /* Supported Rx Buffer Sizes */
-#define IGB_RXBUFFER_512   512
+#define IGB_RXBUFFER_256   256
 #define IGB_RXBUFFER_16384 16384
-#define IGB_RX_HDR_LEN     IGB_RXBUFFER_512
+#define IGB_RX_HDR_LEN     IGB_RXBUFFER_256
 
 /* How many Tx Descriptors do we need to call netif_wake_queue ? */
 #define IGB_TX_QUEUE_WAKE	16
-- 
1.7.11.4

^ permalink raw reply related

* [net-next 1/4] igb: Add loopback test support for i210
From: Jeff Kirsher @ 2012-09-05  6:25 UTC (permalink / raw)
  To: davem; +Cc: Carolyn Wyborny, netdev, gospo, sassmann, Jeff Kirsher
In-Reply-To: <1346826350-27733-1-git-send-email-jeffrey.t.kirsher@intel.com>

From: Carolyn Wyborny <carolyn.wyborny@intel.com>

Early release of i210 devices had the loopback test of the ethtool
self-test disabled. This patch enables the loopback test for i210 devices.

Signed-off-by: Carolyn Wyborny <carolyn.wyborny@intel.com>
Tested-by: Jeff Pieper <jeffrey.e.pieper@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ethtool.c | 51 +++++++++-------------------
 1 file changed, 16 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index be02168..c4def55 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -1511,33 +1511,22 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	u32 ctrl_reg = 0;
-	u16 phy_reg = 0;
 
 	hw->mac.autoneg = false;
 
-	switch (hw->phy.type) {
-	case e1000_phy_m88:
-		/* Auto-MDI/MDIX Off */
-		igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
-		/* reset to update Auto-MDI/MDIX */
-		igb_write_phy_reg(hw, PHY_CONTROL, 0x9140);
-		/* autoneg off */
-		igb_write_phy_reg(hw, PHY_CONTROL, 0x8140);
-		break;
-	case e1000_phy_82580:
-		/* enable MII loopback */
-		igb_write_phy_reg(hw, I82580_PHY_LBK_CTRL, 0x8041);
-		break;
-	case e1000_phy_i210:
-		/* set loopback speed in PHY */
-		igb_read_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2),
-					&phy_reg);
-		phy_reg |= GS40G_MAC_SPEED_1G;
-		igb_write_phy_reg(hw, (GS40G_PAGE_SELECT & GS40G_PAGE_2),
-					phy_reg);
-		ctrl_reg = rd32(E1000_CTRL_EXT);
-	default:
-		break;
+	if (hw->phy.type == e1000_phy_m88) {
+		if (hw->phy.id != I210_I_PHY_ID) {
+			/* Auto-MDI/MDIX Off */
+			igb_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808);
+			/* reset to update Auto-MDI/MDIX */
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x9140);
+			/* autoneg off */
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x8140);
+		} else {
+			/* force 1000, set loopback  */
+			igb_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0);
+			igb_write_phy_reg(hw, PHY_CONTROL, 0x4140);
+		}
 	}
 
 	/* add small delay to avoid loopback test failure */
@@ -1555,7 +1544,7 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
 		     E1000_CTRL_FD |	 /* Force Duplex to FULL */
 		     E1000_CTRL_SLU);	 /* Set link up enable bit */
 
-	if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210))
+	if (hw->phy.type == e1000_phy_m88)
 		ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */
 
 	wr32(E1000_CTRL, ctrl_reg);
@@ -1563,11 +1552,10 @@ static int igb_integrated_phy_loopback(struct igb_adapter *adapter)
 	/* Disable the receiver on the PHY so when a cable is plugged in, the
 	 * PHY does not begin to autoneg when a cable is reconnected to the NIC.
 	 */
-	if ((hw->phy.type == e1000_phy_m88) || (hw->phy.type == e1000_phy_i210))
+	if (hw->phy.type == e1000_phy_m88)
 		igb_phy_disable_receiver(adapter);
 
-	udelay(500);
-
+	mdelay(500);
 	return 0;
 }
 
@@ -1827,13 +1815,6 @@ static int igb_loopback_test(struct igb_adapter *adapter, u64 *data)
 		*data = 0;
 		goto out;
 	}
-	if ((adapter->hw.mac.type == e1000_i210)
-		|| (adapter->hw.mac.type == e1000_i211)) {
-		dev_err(&adapter->pdev->dev,
-			"Loopback test not supported on this part at this time.\n");
-		*data = 0;
-		goto out;
-	}
 	*data = igb_setup_desc_rings(adapter);
 	if (*data)
 		goto out;
-- 
1.7.11.4

^ permalink raw reply related

* [net-next 0/4][pull request] Intel Wired LAN Driver Updates
From: Jeff Kirsher @ 2012-09-05  6:25 UTC (permalink / raw)
  To: davem; +Cc: Jeff Kirsher, netdev, gospo, sassmann

This series contains updates to igb and ixgbe.

The following are changes since commit 600e177920df936d03b807780ca92c662af98990:
  net: Providing protocol type via system.sockprotoname xattr of /proc/PID/fd entries
and are available in the git repository at:
  git://git.kernel.org/pub/scm/linux/kernel/git/jkirsher/net-next master

Akeem G. Abodunrin (1):
  igb: Supported and Advertised Pause Frame

Carolyn Wyborny (1):
  igb: Add loopback test support for i210

Eliezer Tamir (1):
  ixgbe: remove old init remnant

Eric Dumazet (1):
  igb: reduce Rx header size

 drivers/net/ethernet/intel/igb/igb.h          |  4 +-
 drivers/net/ethernet/intel/igb/igb_ethtool.c  | 72 +++++++++++++--------------
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  5 --
 3 files changed, 36 insertions(+), 45 deletions(-)

-- 
1.7.11.4

^ permalink raw reply

* linux-next: build failure after merge of the final tree (net-next tree related)
From: Stephen Rothwell @ 2012-09-05  5:39 UTC (permalink / raw)
  To: David Miller, netdev; +Cc: linux-next, linux-kernel, Patrick McHardy

[-- Attachment #1: Type: text/plain, Size: 1620 bytes --]

Hi all,

After merging the final tree, today's linux-next build (powerpc
allyesconfig) failed like this:

net/ipv6/netfilter/nf_nat_l3proto_ipv6.c: In function 'nf_nat_ipv6_csum_recalc':
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:144:4: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration]

Caused by commit 58a317f1061c ("netfilter: ipv6: add IPv6 NAT support").

(I always thought IPv6 NAT was a bad idea ;-))

I have applied the following patch for today:

From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 5 Sep 2012 15:34:58 +1000
Subject: [PATCH] netfilter: ipv6: using csum_ipv6_magic requires
 net/ip6_checksum.h

Fixes this build error:

net/ipv6/netfilter/nf_nat_l3proto_ipv6.c: In function 'nf_nat_ipv6_csum_recalc':
net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:144:4: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration]

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---
 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c |    1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index 81a2d1c..abfe75a 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -15,6 +15,7 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/secure_seq.h>
 #include <net/checksum.h>
+#include <net/ip6_checksum.h>
 #include <net/ip6_route.h>
 #include <net/ipv6.h>
 
-- 
1.7.10.280.gaa39

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related

* Re: [PATCHv2] virtio-spec: virtio network device multiqueue support
From: Michael S. Tsirkin @ 2012-09-05  5:34 UTC (permalink / raw)
  To: Jason Wang; +Cc: netdev, kvm, virtualization
In-Reply-To: <5046C837.7040609@redhat.com>

On Wed, Sep 05, 2012 at 11:34:15AM +0800, Jason Wang wrote:
> On 09/03/2012 07:55 PM, Michael S. Tsirkin wrote:
> >At Jason's request, I am trying to help finalize the spec for
> >the new multiqueue feature.
> >
> >Changes from Jason's rfc:
> >- reserved vq 3: this makes all rx vqs even and tx vqs odd, which
> >   looks nicer to me.
> >- documented packet steering, added a generalized steering programming
> >   command. Current modes are single queue and host driven multiqueue,
> >   but I envision support for guest driven multiqueue in the future.
> 
> For host driven, more thought in the long term. Maybe we could add
> more policy to choose the rxq such as hashing, round-robin and
> cpuid.

As we discussed off-list, different guests may need wildly
different strategies. For example different queues for
different qos priorities might make a lot of sense.
So for now I'll remove the host-driven option and
add _GUEST (or maybe better name is _RX_FOLLOWS_TX)
rule which records the queue number on packet transmit and
uses that on receive.

> >- make default vqs unused when in mq mode - this wastes some memory
> >   but makes it more efficient to switch between modes as
> >   we can avoid this causing packet reordering.
> 
> Not sure whether or not this can really helps. Depending on the host
> scheduler, we may always see a disorder when we do the switching.

Since guest handles one queue at a time during switch,
won't this mean host reorders packets even with a single queue?

> >Rusty, could you please take a look and comment?
> >If this looks OK to everyone, we can proceed with finalizing the
> >implementation.  This patch is against
> >eb9fc84d0d3c46438aaab190e2401a9e5409a052 in virtio-spec git tree.
> >
> >-->
> >
> >virtio-spec: virtio network device multiqueue support
> >
> >Add multiqueue support to virtio network device.
> >Add a new feature flag VIRTIO_NET_F_MULTIQUEUE for this feature, a new
> >configuration field max_virtqueue_pairs to detect supported number of
> >virtqueues as well as a new command VIRTIO_NET_CTRL_STEERING to program
> >packet steering.
> >
> >Signed-off-by: Michael S. Tsirkin<mst@redhat.com>
> >
> >--
> >
> >diff --git a/virtio-spec.lyx b/virtio-spec.lyx
> >index 7a073f4..583debc 100644
> >--- a/virtio-spec.lyx
> >+++ b/virtio-spec.lyx
> >@@ -58,6 +58,7 @@
> >  \html_be_strict false
> >  \author -608949062 "Rusty Russell,,,"
> >  \author 1531152142 "Paolo Bonzini,,,"
> >+\author 1986246365 "Michael S. Tsirkin"
> >  \end_header
> >
> >  \begin_body
> >@@ -3896,6 +3897,37 @@ Only if VIRTIO_NET_F_CTRL_VQ set
> >  \end_inset
> >
> >
> >+\change_inserted 1986246365 1346663522
> >+ 3: reserved
> >+\end_layout
> >+
> >+\begin_layout Description
> >+
> >+\change_inserted 1986246365 1346663550
> >+4: receiveq1.
> >+ 5: transmitq1.
> >+ 6: receiveq2.
> >+ 7.
> >+ transmitq2.
> >+ ...
> >+ 2N+2:receivqN, 2N+3:transmitqN
> >+\begin_inset Foot
> >+status open
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346663558
> >+Only if VIRTIO_NET_F_CTRL_VQ set.
> >+ N is indicated by max_virtqueue_pairs field.
> >+\change_unchanged
> >+
> >+\end_layout
> >+
> >+\end_inset
> >+
> >+
> >+\change_unchanged
> >+
> >  \end_layout
> >
> >  \begin_layout Description
> >@@ -4056,6 +4088,17 @@ VIRTIO_NET_F_CTRL_VLAN
> >
> >  \begin_layout Description
> >  VIRTIO_NET_F_GUEST_ANNOUNCE(21) Guest can send gratuitous packets.
> >+\change_inserted 1986246365 1346617842
> >+
> >+\end_layout
> >+
> >+\begin_layout Description
> >+
> >+\change_inserted 1986246365 1346618103
> >+VIRTIO_NET_F_MULTIQUEUE(22) Device has multiple receive and transmission
> >+ queues.
> >+\change_unchanged
> >+
> >  \end_layout
> >
> >  \end_deeper
> >@@ -4068,11 +4111,45 @@ configuration
> >  \begin_inset space ~
> >  \end_inset
> >
> >-layout Two configuration fields are currently defined.
> >+layout
> >+\change_deleted 1986246365 1346671560
> >+Two
> >+\change_inserted 1986246365 1346671647
> >+Six
> >+\change_unchanged
> >+ configuration fields are currently defined.
> >   The mac address field always exists (though is only valid if VIRTIO_NET_F_MAC
> >   is set), and the status field only exists if VIRTIO_NET_F_STATUS is set.
> >   Two read-only bits are currently defined for the status field: VIRTIO_NET_S_LIN
> >  K_UP and VIRTIO_NET_S_ANNOUNCE.
> >+
> >+\change_inserted 1986246365 1346672138
> >+ The following four read-only fields only exists if VIRTIO_NET_F_MULTIQUEUE
> >+ is set.
> >+ The max_virtqueue_pairs field specifies the maximum number of each of transmit
> >+ and receive virtqueues that can used for multiqueue operation.
> 
> s/can/can be/
> >+ The following read-only fields:
> >+\emph on
> >+current_steering_rule
> >+\emph default
> >+,
> >+\emph on
> >+reserved
> >+\emph default
> >+ and
> >+\emph on
> >+current_steering_param
> >+\emph default
> >+ store the last successful VIRTIO_NET_CTRL_STEERING
> >+\begin_inset CommandInset ref
> >+LatexCommand ref
> >+reference "sub:Transmit-Packet-Steering"
> >+
> >+\end_inset
> >+
> >+ command executed by driver, for debugging.
> >+
> >+\change_unchanged
> >
> >  \begin_inset listings
> >  inline false
> >@@ -4105,6 +4182,40 @@ struct virtio_net_config {
> >  \begin_layout Plain Layout
> >
> >      u16 status;
> >+\change_inserted 1986246365 1346671221
> >+
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346671532
> >+
> >+    u16 max_virtqueue_pairs;
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346671531
> >+
> >+    u8 current_steering_rule;
> >+\change_unchanged
> >+
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346671499
> >+
> >+    u8 reserved;
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346671530
> >+
> >+    u16 current_steering_param;
> >+\change_unchanged
> >+
> >  \end_layout
> >
> >  \begin_layout Plain Layout
> >@@ -4151,6 +4262,18 @@ physical
> >  \begin_layout Enumerate
> >  If the VIRTIO_NET_F_CTRL_VQ feature bit is negotiated, identify the control
> >   virtqueue.
> >+\change_inserted 1986246365 1346618052
> >+
> >+\end_layout
> >+
> >+\begin_layout Enumerate
> >+
> >+\change_inserted 1986246365 1346618175
> >+If VIRTIO_NET_F_MULTIQUEUE feature bit is negotiated, identify the receive
> >+ and transmission queues that are going to be used in multiqueue mode.
> >+ Only queues that are going to be used need to be initialized.
> >+\change_unchanged
> >+
> >  \end_layout
> >
> >  \begin_layout Enumerate
> >@@ -4168,7 +4291,11 @@ status
> >  \end_layout
> >
> >  \begin_layout Enumerate
> >-The receive virtqueue should be filled with receive buffers.
> >+The receive virtqueue
> >+\change_inserted 1986246365 1346618180
> >+(s)
> >+\change_unchanged
> >+ should be filled with receive buffers.
> >   This is described in detail below in
> >  \begin_inset Quotes eld
> >  \end_inset
> >@@ -4516,6 +4643,201 @@ Note that the header will be two bytes longer for the VIRTIO_NET_F_MRG_RXBUF
> >  \end_layout
> >
> >  \begin_layout Subsection*
> >+
> >+\change_inserted 1986246365 1346670975
> >+\begin_inset CommandInset label
> >+LatexCommand label
> >+name "sub:Transmit-Packet-Steering"
> >+
> >+\end_inset
> >+
> >+Transmit Packet Steering
> >+\end_layout
> >+
> 
> Since this needs control vq, move this after the section of control vq?

I don't mind but this is also transmit related.

> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346670592
> >+When VIRTIO_NET_F_MULTIQUEUE feature bit is negotiated, guest can use any
> >+ of multiple configured transmit queues to transmit a given packet.
> >+ To avoid packet reordering by device (which gnerally leads to performance
> >+ degradation) driver should attempt to utilize the same transmit virtqueue
> >+ for all packets of a given transmit flow.
> >+ For bi-directional protocols (in practice, TCP), a given network connection
> >+ can utilize both transmit and receive queues.
> >+ For best performance, packets from a single connection should utilize the
> >+ paired transmit and receive queues from the same virtqueue pair; for example
> >+ both transmitqN and receiveqN.
> >+ This rule makes it possible to optimize processing on the device side,
> >+ but this is not a hard requirement: devices should function correctly even
> >+ when this rule is not followed.
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346670727
> >+Driver selects an active steering rule using VIRTIO_NET_CTRL_STEERING command
> >+ (this controls both which virtqueue is selected for a given packet for
> >+ receive and notifies the device which virtqueues are about to be used for
> >+ transmit).
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> 
> Not sure it's suitable to define the policy in the spec ( and even a
> not hard requirement ). An alternative is to just define some tools
> such as the queue number negotiation mechanism like modern card and
> just let the driver to choose the policy, this can give both
> flexibility and space for the future evolution of the device. E.g.
> we may have a guest controlled flow director in the future, we can
> have a seperate command to enable/disable it and a separate feature
> bits.

Problem with such approaches is, if we make them very generic they
will be slow (interpreter) or hard to implement (jit compiler).
I think rx follows tx above solves this neatly without
a lot of complexity.


> >+\change_inserted 1986246365 1346670594
> >+This command accepts a single out argument in the following format:
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346670594
> >+\begin_inset listings
> >+inline false
> >+status open
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346670594
> >+
> >+#define VIRTIO_NET_CTRL_STEERING       4
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346670594
> >+
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346670594
> >+
> >+struct virtio_net_ctrl_steering {
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346671425
> >+
> >+	u8 current_steering_rule;
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346670594
> >+
> >+    u8 reserved;
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346671485
> >+
> >+	u16 current_steering_param;
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346670594
> >+
> >+};
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346670594
> >+
> >+#define VIRTIO_NET_CTRL_STEERING_SINGLE       0
> >+\end_layout
> >+
> >+\begin_layout Plain Layout
> >+
> >+\change_inserted 1986246365 1346670594
> >+
> >+#define VIRTIO_NET_CTRL_STEERING_HOST  1
> >+\end_layout
> >+
> >+\end_inset
> >+
> >+
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346671803
> >+The field
> >+\emph on
> >+rule
> >+\emph default
> >+ specifies the function used to select transmit virtqueue for a given packet;
> >+ the field
> >+\emph on
> >+param
> >+\emph default
> >+ makes it possible to pass an extra parameter if appropriate.
> >+ When
> >+\emph on
> >+rule
> >+\emph default
> >+ is set to VIRTIO_NET_CTRL_STEERING_SINGLE all packets are steered to the
> >+ default virtqueue transmitq (1); param is unused; this is the default.
> >+ When
> >+\emph on
> >+rule
> >+\emph default
> >+ is set to VIRTIO_NET_CTRL_STEERING_HOST packets are steered by driver to
> >+ the first (
> >+\emph on
> >+param
> >+\emph default
> >++1) multiqueue virtqueues transmitq1...transmitqN; the default transmitq is
> >+ unused.
> 
> It's driver that select the transmitq, does the device need to know
> about the transmit steering used in driver? defining the those
> transmission steering commands for the deivce looks useless. Maybe
> we'd better rename the subsection to "Packet Steering" and focus on
> the receive part?

Fow rx follows tx device needs to know how many tx queues
will be used.
Also I think it's good to be explicit and write up what
we expect driver to do rather than leave it to
drivers to interpret the spec any way they like.

> >+ Driver must have configured all these (
> >+\emph on
> >+param
> >+\emph default
> >++1) virtqueues beforehand.
> >+ For best performance, driver should detects flow to virtqueue pair mapping
> >+ on receive and selects the transmit virtqueue from the same virtqueue pair.
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346670762
> >+Supported steering rules can be added and removed in the future.
> >+ Driver should probe for supported rules by checking ack values of the command.
> >+\end_layout
> >+
> 
> What's the advantages of using this over feature bits?

Feature bits are negotiated at startup. Experience shows
there might be no ideal steering rule for all workloads
so switching them at runtime is needed.


> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346671315
> >+When steering rule is modified, some packets can still be outstanding in
> >+ one or more of the virtqueues.
> >+ For transmit, device is recommended to complete processing of the transmit
> >+ queue(s) utilized by the original steering before processing any packets
> >+ delivered by the modified steering rule.
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346671412
> >+For debugging, current steering rule can also be read from the configuration
> >+ space.
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346670762
> >+See also receive steering description
> >+\begin_inset CommandInset ref
> >+LatexCommand ref
> >+reference "sub:Receive-Packet-Steering"
> >+
> >+\end_inset
> >+
> >+.
> >+\end_layout
> >+
> >+\begin_layout Subsection*
> >  Packet Transmission Interrupt
> >  \end_layout
> >
> >@@ -4988,8 +5310,24 @@ status open
> >  The Guest needs to check VIRTIO_NET_S_ANNOUNCE bit in status field when
> >   it notices the changes of device configuration.
> >   The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that driver
> >- has recevied the notification and device would clear the VIRTIO_NET_S_ANNOUNCE
> >- bit in the status filed after it received this command.
> >+ has rece
> >+\change_inserted 1986246365 1346663932
> >+i
> >+\change_unchanged
> >+v
> >+\change_deleted 1986246365 1346663934
> >+i
> >+\change_unchanged
> >+ed the notification and device would clear the VIRTIO_NET_S_ANNOUNCE bit
> >+ in the status fi
> >+\change_inserted 1986246365 1346663942
> >+e
> >+\change_unchanged
> >+l
> >+\change_deleted 1986246365 1346663943
> >+e
> >+\change_unchanged
> >+d after it received this command.
> >  \end_layout
> >
> >  \begin_layout Standard
> >@@ -5004,10 +5342,101 @@ Sending the gratuitous packets or marking there are pending gratuitous packets
> >  \begin_layout Enumerate
> >  Sending VIRTIO_NET_CTRL_ANNOUNCE_ACK command through control vq.
> >
> >+\change_deleted 1986246365 1346662247
> >+
> >  \end_layout
> >
> >-\begin_layout Enumerate
> >+\begin_layout Subsection*
> >+
> >+\change_inserted 1986246365 1346670357
> >+\begin_inset CommandInset label
> >+LatexCommand label
> >+name "sub:Receive-Packet-Steering"
> >+
> >+\end_inset
> >+
> >+Receive Packet Steering
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346671046
> >+When VIRTIO_NET_F_MULTIQUEUE feature bit is negotiated, device can use any
> >+ of multiple configured receive queues to pass a given packet to driver.
> >+ Driver controls which virtqueue is selected in practice by configuring
> >+ packet steering rule using VIRTIO_NET_CTRL_STEERING command, as described
> >+ above
> >+\begin_inset CommandInset ref
> >+LatexCommand ref
> >+reference "sub:Transmit-Packet-Steering"
> >+
> >+\end_inset
> >+
> >  .
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346671818
> >+The field
> >+\emph on
> >+rule
> >+\emph default
> >+ specifies the function used to select receive virtqueue for a given packet;
> >+ the field
> >+\emph on
> >+param
> >+\emph default
> >+ makes it possible to pass an extra parameter if appropriate.
> >+ When
> >+\emph on
> >+rule
> >+\emph default
> >+ is set to VIRTIO_NET_CTRL_STEERING_SINGLE all packets are steered to the
> >+ default virtqueue receveq (0); param is unused; this is the default.
> 
> A question here is whether or not we need to do the seamless
> switching between mq modes and sq modes. If we expect it happens
> rare, we could afforad some disordering by dropping this and just do
> the switching by resetting the device and negotiating the MULTIQUEUE
> bit or not on demand.

So far your own performance results show there is no
ideal mode. And to me, this makes a lot of sense.
If you reset on switch this is very slow, so this basically means you
will forever rely on guest admin to know what the right thing to do is.
I agree it's a sensible thing to do in v1 but long term
an adaptive strategy requires that mode switch is lightweight.

> This can simplify the implemention.

It only seems like that. That's much more work for a driver to do, and
lots of races to avoid.  Just using vqs negotiated at startup is exactly
what all drivers currently do - it is much easier to do correctly.

> >+ When
> >+\emph on
> >+rule
> >+\emph default
> >+ is set to VIRTIO_NET_CTRL_STEERING_HOST packets are steered by host using
> >+ a device-specific steering function to the first (
> >+\emph on
> >+param
> >+\emph default
> >++1) multiqueue virtqueues receiveq1...receiveqN; the default receiveq is unused.
> >+ Driver must have configured all these (
> >+\emph on
> >+param
> >+\emph default
> >++1) virtqueues beforehand.
> >+ For best performance, driver is expected to detect flow to virtqueue pair
> >+ mapping on receive and select the transmit virtqueue from the same virtqueue
> >+ pair.
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346669564
> >+Supported steering rules can be added and removed in the future.
> >+ Driver should probe for supported rules by checking ack values of the command.
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_inserted 1986246365 1346671151
> >+When steering rule is modified, some packets can still be outstanding in
> >+ one or more of the virtqueues.
> >+ For receive, driver is recommended to complete processing of the receive
> >+ queue(s) utilized by the original steering before processing any packets
> >+ delivered by the modified steering rule.
> >+\end_layout
> >+
> >+\begin_layout Standard
> >+
> >+\change_deleted 1986246365 1346664095
> >+.
> >+
> >+\change_unchanged
> >
> >  \end_layout
> >

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox