Netdev List
 help / color / mirror / Atom feed
* [V4 PATCH 5/8] csiostor: Chelsio FCoE offload driver submission (sources part 2).
From: Naresh Kumar Inna @ 2012-09-12 17:18 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1347470328-32490-1-git-send-email-naresh@chelsio.com>

This patch contains code for the FC transport template callbacks and the
Mailbox module functionality. The FC transport callbacks include Virtual
Node ports creation and deletion, FC session registration, unregistration and
teardown. The Mailbox module provides services to issue/track/cancel
mailbox commands and wrappers for them.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
 drivers/scsi/csiostor/csio_attr.c |  809 +++++++++++++++++
 drivers/scsi/csiostor/csio_mb.c   | 1769 +++++++++++++++++++++++++++++++++++++
 2 files changed, 2578 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_attr.c
 create mode 100644 drivers/scsi/csiostor/csio_mb.c

diff --git a/drivers/scsi/csiostor/csio_attr.c b/drivers/scsi/csiostor/csio_attr.c
new file mode 100644
index 0000000..ad29fd9
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_attr.c
@@ -0,0 +1,809 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/mm.h>
+#include <linux/jiffies.h>
+#include <scsi/fc/fc_fs.h>
+
+#include "csio_init.h"
+
+static void
+csio_vport_set_state(struct csio_lnode *ln);
+
+/*
+ * csio_reg_rnode - Register a remote port with FC transport.
+ * @rn: Rnode representing remote port.
+ *
+ * Call fc_remote_port_add() to register this remote port with FC transport.
+ * If remote port is Initiator OR Target OR both, change the role appropriately.
+ *
+ */
+void
+csio_reg_rnode(struct csio_rnode *rn)
+{
+	struct csio_lnode *ln		= csio_rnode_to_lnode(rn);
+	struct Scsi_Host *shost		= csio_ln_to_shost(ln);
+	struct fc_rport_identifiers ids;
+	struct fc_rport  *rport;
+	struct csio_service_parms *sp;
+
+	ids.node_name	= wwn_to_u64(csio_rn_wwnn(rn));
+	ids.port_name	= wwn_to_u64(csio_rn_wwpn(rn));
+	ids.port_id	= rn->nport_id;
+	ids.roles	= FC_RPORT_ROLE_UNKNOWN;
+
+	if (rn->role & CSIO_RNFR_INITIATOR || rn->role & CSIO_RNFR_TARGET) {
+		rport = rn->rport;
+		CSIO_ASSERT(rport != NULL);
+		goto update_role;
+	}
+
+	rn->rport = fc_remote_port_add(shost, 0, &ids);
+	if (!rn->rport) {
+		csio_ln_err(ln, "Failed to register rport = 0x%x.\n",
+					rn->nport_id);
+		return;
+	}
+
+	ln->num_reg_rnodes++;
+	rport = rn->rport;
+	spin_lock_irq(shost->host_lock);
+	*((struct csio_rnode **)rport->dd_data) = rn;
+	spin_unlock_irq(shost->host_lock);
+
+	sp = &rn->rn_sparm;
+	rport->maxframe_size		= sp->csp.sp_bb_data;
+	if (ntohs(sp->clsp[2].cp_class) & FC_CPC_VALID)
+		rport->supported_classes = FC_COS_CLASS3;
+	else
+		rport->supported_classes = FC_COS_UNSPECIFIED;
+update_role:
+	if (rn->role & CSIO_RNFR_INITIATOR)
+		ids.roles |= FC_RPORT_ROLE_FCP_INITIATOR;
+	if (rn->role & CSIO_RNFR_TARGET) {
+		ids.roles |= FC_RPORT_ROLE_FCP_TARGET;
+		ln->n_scsi_tgts++;
+	}
+
+	if (ids.roles != FC_RPORT_ROLE_UNKNOWN)
+		fc_remote_port_rolechg(rport, ids.roles);
+
+	rn->scsi_id = rport->scsi_target_id;
+
+	csio_ln_dbg(ln, "Remote port x%x role 0x%x registered\n",
+		rn->nport_id, ids.roles);
+}
+
+/*
+ * csio_unreg_rnode - Unregister a remote port with FC transport.
+ * @rn: Rnode representing remote port.
+ *
+ * Call fc_remote_port_delete() to unregister this remote port with FC
+ * transport.
+ *
+ */
+void
+csio_unreg_rnode(struct csio_rnode *rn)
+{
+	struct csio_lnode *ln = csio_rnode_to_lnode(rn);
+	struct fc_rport *rport = rn->rport;
+
+	rn->role &= ~(CSIO_RNFR_INITIATOR | CSIO_RNFR_TARGET);
+	fc_remote_port_delete(rport);
+	ln->num_reg_rnodes--;
+
+	if (ln->n_scsi_tgts)
+		ln->n_scsi_tgts--;
+
+	if (ln->last_scan_ntgts)
+		ln->last_scan_ntgts--;
+
+	csio_ln_dbg(ln, "Remote port x%x un-registered\n", rn->nport_id);
+}
+
+/*
+ * csio_lnode_async_event - Async events from local port.
+ * @ln: lnode representing local port.
+ *
+ * Async events from local node that FC transport/SCSI ML
+ * should be made aware of (Eg: RSCN).
+ */
+void
+csio_lnode_async_event(struct csio_lnode *ln, enum csio_ln_fc_evt fc_evt)
+{
+	switch (fc_evt) {
+	case CSIO_LN_FC_RSCN:
+		/* Get payload of rscn from ln */
+		/* For each RSCN entry */
+			/*
+			 * fc_host_post_event(shost,
+			 *		      fc_get_event_number(),
+			 *		      FCH_EVT_RSCN,
+			 *		      rscn_entry);
+			 */
+		break;
+	case CSIO_LN_FC_LINKUP:
+		/* send fc_host_post_event */
+		/* set vport state */
+		if (csio_is_npiv_ln(ln))
+			csio_vport_set_state(ln);
+
+		break;
+	case CSIO_LN_FC_LINKDOWN:
+		/* send fc_host_post_event */
+		/* set vport state */
+		if (csio_is_npiv_ln(ln))
+			csio_vport_set_state(ln);
+
+		break;
+	case CSIO_LN_FC_ATTRIB_UPDATE:
+		csio_fchost_attr_init(ln);
+		break;
+	default:
+		break;
+	}
+}
+
+/*
+ * csio_fchost_attr_init - Initialize FC transport attributes
+ * @ln: Lnode.
+ *
+ */
+void
+csio_fchost_attr_init(struct csio_lnode *ln)
+{
+	struct Scsi_Host  *shost = csio_ln_to_shost(ln);
+
+	fc_host_node_name(shost) = wwn_to_u64(csio_ln_wwnn(ln));
+	fc_host_port_name(shost) = wwn_to_u64(csio_ln_wwpn(ln));
+
+	fc_host_supported_classes(shost) = FC_COS_CLASS3;
+	fc_host_max_npiv_vports(shost) =
+			(csio_lnode_to_hw(ln))->fres_info.max_vnps;
+	fc_host_supported_speeds(shost) = FC_PORTSPEED_10GBIT |
+		FC_PORTSPEED_1GBIT;
+
+	fc_host_maxframe_size(shost) = ln->ln_sparm.csp.sp_bb_data;
+	memset(fc_host_supported_fc4s(shost), 0,
+		sizeof(fc_host_supported_fc4s(shost)));
+	fc_host_supported_fc4s(shost)[7] = 1;
+
+	memset(fc_host_active_fc4s(shost), 0,
+		sizeof(fc_host_active_fc4s(shost)));
+	fc_host_active_fc4s(shost)[7] = 1;
+}
+
+/*
+ * csio_get_host_port_id - sysfs entries for nport_id is
+ * populated/cached from this function
+ */
+static void
+csio_get_host_port_id(struct Scsi_Host *shost)
+{
+	struct csio_lnode *ln	= shost_priv(shost);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	spin_lock_irq(&hw->lock);
+	fc_host_port_id(shost) = ln->nport_id;
+	spin_unlock_irq(&hw->lock);
+}
+
+/*
+ * csio_get_port_type - Return FC local port type.
+ * @shost: scsi host.
+ *
+ */
+static void
+csio_get_host_port_type(struct Scsi_Host *shost)
+{
+	struct csio_lnode *ln = shost_priv(shost);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	spin_lock_irq(&hw->lock);
+	if (csio_is_npiv_ln(ln))
+		fc_host_port_type(shost) = FC_PORTTYPE_NPIV;
+	else
+		fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+	spin_unlock_irq(&hw->lock);
+}
+
+/*
+ * csio_get_port_state - Return FC local port state.
+ * @shost: scsi host.
+ *
+ */
+static void
+csio_get_host_port_state(struct Scsi_Host *shost)
+{
+	struct csio_lnode *ln = shost_priv(shost);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	char state[16];
+
+	spin_lock_irq(&hw->lock);
+
+	csio_lnode_state_to_str(ln, state);
+	if (!strcmp(state, "READY"))
+		fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+	else if (!strcmp(state, "OFFLINE"))
+		fc_host_port_state(shost) = FC_PORTSTATE_LINKDOWN;
+	else
+		fc_host_port_state(shost) = FC_PORTSTATE_UNKNOWN;
+
+	spin_unlock_irq(&hw->lock);
+}
+
+/*
+ * csio_get_host_speed - Return link speed to FC transport.
+ * @shost: scsi host.
+ *
+ */
+static void
+csio_get_host_speed(struct Scsi_Host *shost)
+{
+	struct csio_lnode *ln = shost_priv(shost);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	spin_lock_irq(&hw->lock);
+	switch (hw->pport[ln->portid].link_speed) {
+	case FW_PORT_CAP_SPEED_1G:
+		fc_host_speed(shost) = FC_PORTSPEED_1GBIT;
+		break;
+	case FW_PORT_CAP_SPEED_10G:
+		fc_host_speed(shost) = FC_PORTSPEED_10GBIT;
+		break;
+	default:
+		fc_host_speed(shost) = FC_PORTSPEED_UNKNOWN;
+		break;
+	}
+	spin_unlock_irq(&hw->lock);
+}
+
+/*
+ * csio_get_host_fabric_name - Return fabric name
+ * @shost: scsi host.
+ *
+ */
+static void
+csio_get_host_fabric_name(struct Scsi_Host *shost)
+{
+	struct csio_lnode *ln = shost_priv(shost);
+	struct csio_rnode *rn = NULL;
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	spin_lock_irq(&hw->lock);
+	rn = csio_rnode_lookup_portid(ln, FC_FID_FLOGI);
+	if (rn)
+		fc_host_fabric_name(shost) = wwn_to_u64(csio_rn_wwnn(rn));
+	else
+		fc_host_fabric_name(shost) = 0;
+	spin_unlock_irq(&hw->lock);
+}
+
+/*
+ * csio_get_host_speed - Return FC transport statistics.
+ * @ln: Lnode.
+ *
+ */
+static struct fc_host_statistics *
+csio_get_stats(struct Scsi_Host *shost)
+{
+	struct csio_lnode *ln = shost_priv(shost);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+	struct fc_host_statistics *fhs = &ln->fch_stats;
+	struct fw_fcoe_port_stats fcoe_port_stats;
+	uint64_t seconds;
+
+	memset(&fcoe_port_stats, 0, sizeof(struct fw_fcoe_port_stats));
+	csio_get_phy_port_stats(hw, ln->portid, &fcoe_port_stats);
+
+	fhs->tx_frames  += (fcoe_port_stats.tx_bcast_frames +
+				fcoe_port_stats.tx_mcast_frames +
+				fcoe_port_stats.tx_ucast_frames +
+				fcoe_port_stats.tx_offload_frames);
+	fhs->tx_words  += (fcoe_port_stats.tx_bcast_bytes +
+			   fcoe_port_stats.tx_mcast_bytes +
+			   fcoe_port_stats.tx_ucast_bytes +
+			   fcoe_port_stats.tx_offload_bytes) /
+							CSIO_WORD_TO_BYTE;
+	fhs->rx_frames += (fcoe_port_stats.rx_bcast_frames +
+				fcoe_port_stats.rx_mcast_frames +
+				fcoe_port_stats.rx_ucast_frames);
+	fhs->rx_words += (fcoe_port_stats.rx_bcast_bytes +
+				fcoe_port_stats.rx_mcast_bytes +
+				fcoe_port_stats.rx_ucast_bytes) /
+							CSIO_WORD_TO_BYTE;
+	fhs->error_frames += fcoe_port_stats.rx_err_frames;
+	fhs->fcp_input_requests +=  ln->stats.n_input_requests;
+	fhs->fcp_output_requests +=  ln->stats.n_output_requests;
+	fhs->fcp_control_requests +=  ln->stats.n_control_requests;
+	fhs->fcp_input_megabytes +=  ln->stats.n_input_bytes >> 20;
+	fhs->fcp_output_megabytes +=  ln->stats.n_output_bytes >> 20;
+	fhs->link_failure_count = ln->stats.n_link_down;
+	/* Reset stats for the device */
+	seconds = jiffies_to_msecs(jiffies);
+	fhs->seconds_since_last_reset =
+				(seconds - hw->stats.n_reset_start) / 1000;
+	return fhs;
+}
+
+/*
+ * csio_set_rport_loss_tmo - Set the rport dev loss timeout
+ * @rport: fc rport.
+ * @timeout: new value for dev loss tmo.
+ *
+ * If timeout is non zero set the dev_loss_tmo to timeout, else set
+ * dev_loss_tmo to one.
+ */
+static void
+csio_set_rport_loss_tmo(struct fc_rport *rport, uint32_t timeout)
+{
+	if (timeout)
+		rport->dev_loss_tmo = timeout;
+	else
+		rport->dev_loss_tmo = 1;
+}
+
+static void
+csio_vport_set_state(struct csio_lnode *ln)
+{
+	struct fc_vport *fc_vport = ln->fc_vport;
+	struct csio_lnode  *pln = ln->pln;
+	char state[16];
+
+	/* Set fc vport state based on phyiscal lnode */
+	csio_lnode_state_to_str(pln, state);
+	if (strcmp(state, "READY")) {
+		fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
+		return;
+	}
+
+	if (!(pln->flags & CSIO_LNF_NPIVSUPP)) {
+		fc_vport_set_state(fc_vport, FC_VPORT_NO_FABRIC_SUPP);
+		return;
+	}
+
+	/* Set fc vport state based on virtual lnode */
+	csio_lnode_state_to_str(ln, state);
+	if (strcmp(state, "READY")) {
+		fc_vport_set_state(fc_vport, FC_VPORT_LINKDOWN);
+		return;
+	}
+	fc_vport_set_state(fc_vport, FC_VPORT_ACTIVE);
+}
+
+static int
+csio_fcoe_alloc_vnp(struct csio_hw *hw, struct csio_lnode *ln)
+{
+	struct csio_lnode *pln;
+	struct csio_mb  *mbp;
+	struct fw_fcoe_vnp_cmd *rsp;
+	int ret;
+	int retry = 0;
+
+	/* Issue VNP cmd to alloc vport */
+	/* Allocate Mbox request */
+	spin_lock_irq(&hw->lock);
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		CSIO_INC_STATS(hw, n_err_nomem);
+		spin_unlock_irq(&hw->lock);
+		return -ENOMEM;
+	}
+
+	pln = ln->pln;
+	ln->fcf_flowid = pln->fcf_flowid;
+	ln->portid = pln->portid;
+
+	csio_fcoe_vnp_alloc_init_mb(ln, mbp, CSIO_MB_DEFAULT_TMO,
+				    pln->fcf_flowid, pln->vnp_flowid, 0,
+				    csio_ln_wwnn(ln), csio_ln_wwpn(ln),
+				    NULL);
+
+	for (retry = 0; retry < 3; retry++) {
+		/* FW is expected to complete vnp cmd in immediate mode
+		 * without much delay.
+		 * Otherwise, there will be increase in IO latency since HW
+		 * lock is held till completion of vnp mbox cmd.
+		 */
+		ret = csio_mb_issue(hw, mbp);
+		if (ret != -EBUSY)
+			break;
+
+		/* Retry if mbox returns busy */
+		spin_unlock_irq(&hw->lock);
+		msleep(2000);
+		spin_lock_irq(&hw->lock);
+	}
+
+	if (ret != 0) {
+		csio_ln_err(ln, "Failed to issue mbox FCoE VNP command\n");
+		mempool_free(mbp, hw->mb_mempool);
+		spin_unlock_irq(&hw->lock);
+		return ret;
+	}
+
+	/* Process Mbox response of VNP command */
+	rsp = (struct fw_fcoe_vnp_cmd *)(mbp->mb);
+	if (FW_CMD_RETVAL_GET(ntohl(rsp->alloc_to_len16)) != FW_SUCCESS) {
+		csio_ln_err(ln, "FCOE VNP ALLOC cmd returned 0x%x!\n",
+			    FW_CMD_RETVAL_GET(ntohl(rsp->alloc_to_len16)));
+		mempool_free(mbp, hw->mb_mempool);
+		spin_unlock_irq(&hw->lock);
+		return -ENOMEM;
+	}
+
+	ln->vnp_flowid = FW_FCOE_VNP_CMD_VNPI_GET(
+				ntohl(rsp->gen_wwn_to_vnpi));
+	memcpy(csio_ln_wwnn(ln), rsp->vnport_wwnn, 8);
+	memcpy(csio_ln_wwpn(ln), rsp->vnport_wwpn, 8);
+
+	csio_ln_dbg(ln, "FCOE VNPI: 0x%x\n", ln->vnp_flowid);
+	csio_ln_dbg(ln, "\tWWNN: %x%x%x%x%x%x%x%x\n",
+		    ln->ln_sparm.wwnn[0], ln->ln_sparm.wwnn[1],
+		    ln->ln_sparm.wwnn[2], ln->ln_sparm.wwnn[3],
+		    ln->ln_sparm.wwnn[4], ln->ln_sparm.wwnn[5],
+		    ln->ln_sparm.wwnn[6], ln->ln_sparm.wwnn[7]);
+	csio_ln_dbg(ln, "\tWWPN: %x%x%x%x%x%x%x%x\n",
+		    ln->ln_sparm.wwpn[0], ln->ln_sparm.wwpn[1],
+		    ln->ln_sparm.wwpn[2], ln->ln_sparm.wwpn[3],
+		    ln->ln_sparm.wwpn[4], ln->ln_sparm.wwpn[5],
+		    ln->ln_sparm.wwpn[6], ln->ln_sparm.wwpn[7]);
+
+	mempool_free(mbp, hw->mb_mempool);
+	spin_unlock_irq(&hw->lock);
+
+	return 0;
+}
+
+static int
+csio_fcoe_free_vnp(struct csio_hw *hw, struct csio_lnode *ln)
+{
+	struct csio_lnode *pln;
+	struct csio_mb  *mbp;
+	struct fw_fcoe_vnp_cmd *rsp;
+	int ret;
+	int retry = 0;
+
+	/* Issue VNP cmd to free vport */
+	/* Allocate Mbox request */
+
+	spin_lock_irq(&hw->lock);
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		CSIO_INC_STATS(hw, n_err_nomem);
+		spin_unlock_irq(&hw->lock);
+		return -ENOMEM;
+	}
+
+	pln = ln->pln;
+
+	csio_fcoe_vnp_free_init_mb(ln, mbp, CSIO_MB_DEFAULT_TMO,
+				   ln->fcf_flowid, ln->vnp_flowid,
+				   NULL);
+
+	for (retry = 0; retry < 3; retry++) {
+		ret = csio_mb_issue(hw, mbp);
+		if (ret != -EBUSY)
+			break;
+
+		/* Retry if mbox returns busy */
+		spin_unlock_irq(&hw->lock);
+		msleep(2000);
+		spin_lock_irq(&hw->lock);
+	}
+
+	if (ret) {
+		csio_ln_err(ln, "Failed to issue mbox FCoE VNP command\n");
+		mempool_free(mbp, hw->mb_mempool);
+		spin_unlock_irq(&hw->lock);
+		return -EINVAL;
+	}
+
+	/* Process Mbox response of VNP command */
+	rsp = (struct fw_fcoe_vnp_cmd *)(mbp->mb);
+	if (FW_CMD_RETVAL_GET(ntohl(rsp->alloc_to_len16)) != FW_SUCCESS) {
+		csio_ln_err(ln, "FCOE VNP FREE cmd returned 0x%x!\n",
+			    FW_CMD_RETVAL_GET(ntohl(rsp->alloc_to_len16)));
+		mempool_free(mbp, hw->mb_mempool);
+		spin_unlock_irq(&hw->lock);
+		return -ENOMEM;
+	}
+
+	mempool_free(mbp, hw->mb_mempool);
+	spin_unlock_irq(&hw->lock);
+
+	return 0;
+}
+
+static int
+csio_vport_create(struct fc_vport *fc_vport, bool disable)
+{
+	struct Scsi_Host *shost = fc_vport->shost;
+	struct csio_lnode *pln = shost_priv(shost);
+	struct csio_lnode *ln = NULL;
+	struct csio_hw *hw = csio_lnode_to_hw(pln);
+	uint8_t wwn[8];
+	int ret = -1;
+
+	ln = csio_shost_init(hw, &fc_vport->dev, false, pln);
+	if (!ln)
+		goto error;
+
+	if (fc_vport->node_name != 0) {
+		u64_to_wwn(fc_vport->node_name, wwn);
+
+		if (!CSIO_VALID_WWN(wwn)) {
+			csio_ln_err(ln,
+				    "vport create failed. Invalid wwnn\n");
+			goto error;
+		}
+		memcpy(csio_ln_wwnn(ln), wwn, 8);
+	}
+
+	if (fc_vport->port_name != 0) {
+		u64_to_wwn(fc_vport->port_name, wwn);
+
+		if (!CSIO_VALID_WWN(wwn)) {
+			csio_ln_err(ln,
+				    "vport create failed. Invalid wwpn\n");
+			goto error;
+		}
+
+		if (csio_lnode_lookup_by_wwpn(hw, wwn)) {
+			csio_ln_err(ln,
+			    "vport create failed. wwpn already exists\n");
+			goto error;
+		}
+		memcpy(csio_ln_wwpn(ln), wwn, 8);
+	}
+
+	fc_vport_set_state(fc_vport, FC_VPORT_INITIALIZING);
+
+	if (csio_fcoe_alloc_vnp(hw, ln))
+		goto error;
+
+	*(struct csio_lnode **)fc_vport->dd_data = ln;
+	ln->fc_vport = fc_vport;
+	if (!fc_vport->node_name)
+		fc_vport->node_name = wwn_to_u64(csio_ln_wwnn(ln));
+	if (!fc_vport->port_name)
+		fc_vport->port_name = wwn_to_u64(csio_ln_wwpn(ln));
+	csio_fchost_attr_init(ln);
+	return 0;
+error:
+	if (ln)
+		csio_shost_exit(ln);
+
+	return ret;
+}
+
+static int
+csio_vport_delete(struct fc_vport *fc_vport)
+{
+	struct csio_lnode *ln = *(struct csio_lnode **)fc_vport->dd_data;
+	struct Scsi_Host *shost = csio_ln_to_shost(ln);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	spin_lock_irq(&hw->lock);
+	if (csio_is_hw_removing(hw)) {
+		spin_unlock_irq(&hw->lock);
+		csio_shost_exit(ln);
+		return 0;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	/* Quiesce ios and send remove event to lnode */
+	scsi_block_requests(shost);
+	spin_lock_irq(&hw->lock);
+	csio_scsim_cleanup_io_lnode(csio_hw_to_scsim(hw), ln);
+	csio_lnode_close(ln);
+	spin_unlock_irq(&hw->lock);
+	scsi_unblock_requests(shost);
+
+	/* Free vnp */
+	if (fc_vport->vport_state !=  FC_VPORT_DISABLED)
+		csio_fcoe_free_vnp(hw, ln);
+	csio_ln_err(ln, "vport deleted\n");
+	csio_shost_exit(ln);
+	return 0;
+}
+
+static int
+csio_vport_disable(struct fc_vport *fc_vport, bool disable)
+{
+	struct csio_lnode *ln = *(struct csio_lnode **)fc_vport->dd_data;
+	struct Scsi_Host *shost = csio_ln_to_shost(ln);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	/* disable vport */
+	if (disable) {
+		/* Quiesce ios and send stop event to lnode */
+		scsi_block_requests(shost);
+		spin_lock_irq(&hw->lock);
+		csio_scsim_cleanup_io_lnode(csio_hw_to_scsim(hw), ln);
+		csio_lnode_stop(ln);
+		spin_unlock_irq(&hw->lock);
+		scsi_unblock_requests(shost);
+
+		/* Free vnp */
+		csio_fcoe_free_vnp(hw, ln);
+		fc_vport_set_state(fc_vport, FC_VPORT_DISABLED);
+		csio_ln_err(ln, "vport disabled\n");
+		return 0;
+	} else {
+		/* enable vport */
+		fc_vport_set_state(fc_vport, FC_VPORT_INITIALIZING);
+		if (csio_fcoe_alloc_vnp(hw, ln)) {
+			csio_ln_err(ln, "vport enabled failed.\n");
+			return -1;
+		}
+		csio_ln_err(ln, "vport enabled\n");
+		return 0;
+	}
+}
+
+static void
+csio_dev_loss_tmo_callbk(struct fc_rport *rport)
+{
+	struct csio_rnode *rn;
+	struct csio_hw *hw;
+	struct csio_lnode *ln;
+
+	rn = *((struct csio_rnode **)rport->dd_data);
+	ln = csio_rnode_to_lnode(rn);
+	hw = csio_lnode_to_hw(ln);
+
+	spin_lock_irq(&hw->lock);
+
+	/* return if driver is being removed or same rnode comes back online */
+	if (csio_is_hw_removing(hw) || csio_is_rnode_ready(rn)) {
+		spin_unlock_irq(&hw->lock);
+		return;
+	}
+
+	csio_ln_dbg(ln, "devloss timeout on rnode:%p portid:x%x flowid:x%x\n",
+		    rn, rn->nport_id, csio_rn_flowid(rn));
+
+	CSIO_INC_STATS(ln, n_dev_loss_tmo);
+
+	/*
+	 * enqueue devloss event to event worker thread to serialize all
+	 * rnode events.
+	 */
+	if (csio_enqueue_evt(hw, CSIO_EVT_DEV_LOSS, &rn, sizeof(rn))) {
+		CSIO_INC_STATS(hw, n_evt_drop);
+		spin_unlock_irq(&hw->lock);
+		return;
+	}
+
+	if (!(hw->flags & CSIO_HWF_FWEVT_PENDING)) {
+		hw->flags |= CSIO_HWF_FWEVT_PENDING;
+		spin_unlock_irq(&hw->lock);
+		schedule_work(&hw->evtq_work);
+		return;
+	}
+
+	spin_unlock_irq(&hw->lock);
+}
+
+/* FC transport functions template - Physical port */
+struct fc_function_template csio_fc_transport_funcs = {
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_maxframe_size = 1,
+
+	.get_host_port_id = csio_get_host_port_id,
+	.show_host_port_id = 1,
+
+	.get_host_port_type = csio_get_host_port_type,
+	.show_host_port_type = 1,
+
+	.get_host_port_state = csio_get_host_port_state,
+	.show_host_port_state = 1,
+
+	.show_host_active_fc4s = 1,
+	.get_host_speed = csio_get_host_speed,
+	.show_host_speed = 1,
+	.get_host_fabric_name = csio_get_host_fabric_name,
+	.show_host_fabric_name = 1,
+
+	.get_fc_host_stats = csio_get_stats,
+
+	.dd_fcrport_size = sizeof(struct csio_rnode *),
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+
+	.set_rport_dev_loss_tmo = csio_set_rport_loss_tmo,
+	.show_rport_dev_loss_tmo = 1,
+
+	.show_starget_port_id = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+
+	.dev_loss_tmo_callbk = csio_dev_loss_tmo_callbk,
+	.dd_fcvport_size = sizeof(struct csio_lnode *),
+
+	.vport_create = csio_vport_create,
+	.vport_disable = csio_vport_disable,
+	.vport_delete = csio_vport_delete,
+};
+
+/* FC transport functions template - Virtual  port */
+struct fc_function_template csio_fc_transport_vport_funcs = {
+	.show_host_node_name = 1,
+	.show_host_port_name = 1,
+	.show_host_supported_classes = 1,
+	.show_host_supported_fc4s = 1,
+	.show_host_maxframe_size = 1,
+
+	.get_host_port_id = csio_get_host_port_id,
+	.show_host_port_id = 1,
+
+	.get_host_port_type = csio_get_host_port_type,
+	.show_host_port_type = 1,
+
+	.get_host_port_state = csio_get_host_port_state,
+	.show_host_port_state = 1,
+	.show_host_active_fc4s = 1,
+
+	.get_host_speed = csio_get_host_speed,
+	.show_host_speed = 1,
+
+	.get_host_fabric_name = csio_get_host_fabric_name,
+	.show_host_fabric_name = 1,
+
+	.get_fc_host_stats = csio_get_stats,
+
+	.dd_fcrport_size = sizeof(struct csio_rnode *),
+	.show_rport_maxframe_size = 1,
+	.show_rport_supported_classes = 1,
+
+	.set_rport_dev_loss_tmo = csio_set_rport_loss_tmo,
+	.show_rport_dev_loss_tmo = 1,
+
+	.show_starget_port_id = 1,
+	.show_starget_node_name = 1,
+	.show_starget_port_name = 1,
+
+	.dev_loss_tmo_callbk = csio_dev_loss_tmo_callbk,
+
+};
diff --git a/drivers/scsi/csiostor/csio_mb.c b/drivers/scsi/csiostor/csio_mb.c
new file mode 100644
index 0000000..100afd1
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_mb.c
@@ -0,0 +1,1769 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include "csio_hw.h"
+#include "csio_lnode.h"
+#include "csio_rnode.h"
+#include "csio_mb.h"
+#include "csio_wr.h"
+
+#define csio_mb_is_host_owner(__owner)		((__owner) == CSIO_MBOWNER_PL)
+
+/* MB Command/Response Helpers */
+/*
+ * csio_mb_fw_retval - FW return value from a mailbox response.
+ * @mbp: Mailbox structure
+ *
+ */
+enum fw_retval
+csio_mb_fw_retval(struct csio_mb *mbp)
+{
+	struct fw_cmd_hdr *hdr;
+
+	hdr = (struct fw_cmd_hdr *)(mbp->mb);
+
+	return FW_CMD_RETVAL_GET(ntohl(hdr->lo));
+}
+
+/*
+ * csio_mb_hello - FW HELLO command helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @m_mbox: Master mailbox number, if any.
+ * @a_mbox: Mailbox number for asycn notifications.
+ * @master: Device mastership.
+ * @cbfn: Callback, if any.
+ *
+ */
+void
+csio_mb_hello(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+	      uint32_t m_mbox, uint32_t a_mbox, enum csio_dev_master master,
+	      void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_hello_cmd *cmdp = (struct fw_hello_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn, 1);
+
+	cmdp->op_to_write = htonl(FW_CMD_OP(FW_HELLO_CMD) |
+				       FW_CMD_REQUEST | FW_CMD_WRITE);
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+	cmdp->err_to_clearinit = htonl(
+		FW_HELLO_CMD_MASTERDIS(master == CSIO_MASTER_CANT)	|
+		FW_HELLO_CMD_MASTERFORCE(master == CSIO_MASTER_MUST)	|
+		FW_HELLO_CMD_MBMASTER(master == CSIO_MASTER_MUST ?
+				m_mbox : FW_HELLO_CMD_MBMASTER_MASK)	|
+		FW_HELLO_CMD_MBASYNCNOT(a_mbox) |
+		FW_HELLO_CMD_STAGE(FW_HELLO_CMD_STAGE_OS) |
+		FW_HELLO_CMD_CLEARINIT);
+
+}
+
+/*
+ * csio_mb_process_hello_rsp - FW HELLO response processing helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @retval: Mailbox return value from Firmware
+ * @state: State that the function is in.
+ * @mpfn: Master pfn
+ *
+ */
+void
+csio_mb_process_hello_rsp(struct csio_hw *hw, struct csio_mb *mbp,
+			  enum fw_retval *retval, enum csio_dev_state *state,
+			  uint8_t *mpfn)
+{
+	struct fw_hello_cmd *rsp = (struct fw_hello_cmd *)(mbp->mb);
+	uint32_t value;
+
+	*retval = FW_CMD_RETVAL_GET(ntohl(rsp->retval_len16));
+
+	if (*retval == FW_SUCCESS) {
+		hw->fwrev = ntohl(rsp->fwrev);
+
+		value = ntohl(rsp->err_to_clearinit);
+		*mpfn = FW_HELLO_CMD_MBMASTER_GET(value);
+
+		if (value & FW_HELLO_CMD_INIT)
+			*state = CSIO_DEV_STATE_INIT;
+		else if (value & FW_HELLO_CMD_ERR)
+			*state = CSIO_DEV_STATE_ERR;
+		else
+			*state = CSIO_DEV_STATE_UNINIT;
+	}
+}
+
+/*
+ * csio_mb_bye - FW BYE command helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @cbfn: Callback, if any.
+ *
+ */
+void
+csio_mb_bye(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+	    void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_bye_cmd *cmdp = (struct fw_bye_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn, 1);
+
+	cmdp->op_to_write = htonl(FW_CMD_OP(FW_BYE_CMD) |
+				       FW_CMD_REQUEST | FW_CMD_WRITE);
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+}
+
+/*
+ * csio_mb_reset - FW RESET command helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @reset: Type of reset.
+ * @cbfn: Callback, if any.
+ *
+ */
+void
+csio_mb_reset(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+	      int reset, int halt,
+	      void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_reset_cmd *cmdp = (struct fw_reset_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn, 1);
+
+	cmdp->op_to_write = htonl(FW_CMD_OP(FW_RESET_CMD) |
+				  FW_CMD_REQUEST | FW_CMD_WRITE);
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+	cmdp->val = htonl(reset);
+	cmdp->halt_pkd = htonl(halt);
+
+}
+
+/*
+ * csio_mb_params - FW PARAMS command helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @tmo: Command timeout.
+ * @pf: PF number.
+ * @vf: VF number.
+ * @nparams: Number of paramters
+ * @params: Parameter mnemonic array.
+ * @val: Parameter value array.
+ * @wr: Write/Read PARAMS.
+ * @cbfn: Callback, if any.
+ *
+ */
+void
+csio_mb_params(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+	       unsigned int pf, unsigned int vf, unsigned int nparams,
+	       const u32 *params, u32 *val, bool wr,
+	       void (*cbfn)(struct csio_hw *, struct csio_mb *))
+{
+	uint32_t i;
+	uint32_t temp_params = 0, temp_val = 0;
+	struct fw_params_cmd *cmdp = (struct fw_params_cmd *)(mbp->mb);
+	__be32 *p = &cmdp->param[0].mnem;
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn, 1);
+
+	cmdp->op_to_vfn = htonl(FW_CMD_OP(FW_PARAMS_CMD)		|
+				FW_CMD_REQUEST				|
+				(wr ? FW_CMD_WRITE : FW_CMD_READ)	|
+				FW_PARAMS_CMD_PFN(pf)			|
+				FW_PARAMS_CMD_VFN(vf));
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+	/* Write Params */
+	if (wr) {
+		while (nparams--) {
+			temp_params = *params++;
+			temp_val = *val++;
+
+			*p++ = htonl(temp_params);
+			*p++ = htonl(temp_val);
+		}
+	} else {
+		for (i = 0; i < nparams; i++, p += 2) {
+			temp_params = *params++;
+			*p = htonl(temp_params);
+		}
+	}
+
+}
+
+/*
+ * csio_mb_process_read_params_rsp - FW PARAMS response processing helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @retval: Mailbox return value from Firmware
+ * @nparams: Number of parameters
+ * @val: Parameter value array.
+ *
+ */
+void
+csio_mb_process_read_params_rsp(struct csio_hw *hw, struct csio_mb *mbp,
+			   enum fw_retval *retval, unsigned int nparams,
+			   u32 *val)
+{
+	struct fw_params_cmd *rsp = (struct fw_params_cmd *)(mbp->mb);
+	uint32_t i;
+	__be32 *p = &rsp->param[0].val;
+
+	*retval = FW_CMD_RETVAL_GET(ntohl(rsp->retval_len16));
+
+	if (*retval == FW_SUCCESS)
+		for (i = 0; i < nparams; i++, p += 2)
+			*val++ = ntohl(*p);
+}
+
+/*
+ * csio_mb_ldst - FW LDST command
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @tmo: timeout
+ * @reg: register
+ *
+ */
+void
+csio_mb_ldst(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo, int reg)
+{
+	struct fw_ldst_cmd *ldst_cmd = (struct fw_ldst_cmd *)(mbp->mb);
+	CSIO_INIT_MBP(mbp, ldst_cmd, tmo, hw, NULL, 1);
+
+	/*
+	 * Construct and send the Firmware LDST Command to retrieve the
+	 * specified PCI-E Configuration Space register.
+	 */
+	ldst_cmd->op_to_addrspace =
+			htonl(FW_CMD_OP(FW_LDST_CMD)	|
+			FW_CMD_REQUEST			|
+			FW_CMD_READ			|
+			FW_LDST_CMD_ADDRSPACE(FW_LDST_ADDRSPC_FUNC_PCIE));
+	ldst_cmd->cycles_to_len16 = htonl(FW_LEN16(struct fw_ldst_cmd));
+	ldst_cmd->u.pcie.select_naccess = FW_LDST_CMD_NACCESS(1);
+	ldst_cmd->u.pcie.ctrl_to_fn =
+		(FW_LDST_CMD_LC | FW_LDST_CMD_FN(hw->pfn));
+	ldst_cmd->u.pcie.r = (uint8_t)reg;
+}
+
+/*
+ *
+ * csio_mb_caps_config - FW Read/Write Capabilities command helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @wr: Write if 1, Read if 0
+ * @init: Turn on initiator mode.
+ * @tgt: Turn on target mode.
+ * @cofld:  If 1, Control Offload for FCoE
+ * @cbfn: Callback, if any.
+ *
+ * This helper assumes that cmdp has MB payload from a previous CAPS
+ * read command.
+ */
+void
+csio_mb_caps_config(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+		    bool wr, bool init, bool tgt, bool cofld,
+		    void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_caps_config_cmd *cmdp =
+				(struct fw_caps_config_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn, wr ? 0 : 1);
+
+	cmdp->op_to_write = htonl(FW_CMD_OP(FW_CAPS_CONFIG_CMD) |
+				  FW_CMD_REQUEST		|
+				  (wr ? FW_CMD_WRITE : FW_CMD_READ));
+	cmdp->cfvalid_to_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+	/* Read config */
+	if (!wr)
+		return;
+
+	/* Write config */
+	cmdp->fcoecaps = 0;
+
+	if (cofld)
+		cmdp->fcoecaps |= htons(FW_CAPS_CONFIG_FCOE_CTRL_OFLD);
+	if (init)
+		cmdp->fcoecaps |= htons(FW_CAPS_CONFIG_FCOE_INITIATOR);
+	if (tgt)
+		cmdp->fcoecaps |= htons(FW_CAPS_CONFIG_FCOE_TARGET);
+}
+
+void
+csio_rss_glb_config(struct csio_hw *hw, struct csio_mb *mbp,
+		    uint32_t tmo, uint8_t mode, unsigned int flags,
+		    void (*cbfn)(struct csio_hw *, struct csio_mb *))
+{
+	struct fw_rss_glb_config_cmd *cmdp =
+				(struct fw_rss_glb_config_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn, 1);
+
+	cmdp->op_to_write = htonl(FW_CMD_OP(FW_RSS_GLB_CONFIG_CMD) |
+				  FW_CMD_REQUEST | FW_CMD_WRITE);
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+	if (mode == FW_RSS_GLB_CONFIG_CMD_MODE_MANUAL) {
+		cmdp->u.manual.mode_pkd =
+			htonl(FW_RSS_GLB_CONFIG_CMD_MODE(mode));
+	} else if (mode == FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
+		cmdp->u.basicvirtual.mode_pkd =
+			htonl(FW_RSS_GLB_CONFIG_CMD_MODE(mode));
+		cmdp->u.basicvirtual.synmapen_to_hashtoeplitz = htonl(flags);
+	}
+}
+
+
+/*
+ * csio_mb_pfvf - FW Write PF/VF capabilities command helper.
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @pf:
+ * @vf:
+ * @txq:
+ * @txq_eht_ctrl:
+ * @rxqi:
+ * @rxq:
+ * @tc:
+ * @vi:
+ * @pmask:
+ * @rcaps:
+ * @wxcaps:
+ * @cbfn: Callback, if any.
+ *
+ */
+void
+csio_mb_pfvf(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+	     unsigned int pf, unsigned int vf, unsigned int txq,
+	     unsigned int txq_eth_ctrl, unsigned int rxqi,
+	     unsigned int rxq, unsigned int tc, unsigned int vi,
+	     unsigned int cmask, unsigned int pmask, unsigned int nexactf,
+	     unsigned int rcaps, unsigned int wxcaps,
+	     void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_pfvf_cmd *cmdp = (struct fw_pfvf_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn, 1);
+
+	cmdp->op_to_vfn = htonl(FW_CMD_OP(FW_PFVF_CMD)			|
+				FW_CMD_REQUEST				|
+				FW_CMD_WRITE				|
+				FW_PFVF_CMD_PFN(pf)			|
+				FW_PFVF_CMD_VFN(vf));
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+	cmdp->niqflint_niq = htonl(FW_PFVF_CMD_NIQFLINT(rxqi)		|
+					     FW_PFVF_CMD_NIQ(rxq));
+
+	cmdp->type_to_neq = htonl(FW_PFVF_CMD_TYPE			|
+				  FW_PFVF_CMD_CMASK(cmask)		|
+				  FW_PFVF_CMD_PMASK(pmask)		|
+				  FW_PFVF_CMD_NEQ(txq));
+	cmdp->tc_to_nexactf = htonl(FW_PFVF_CMD_TC(tc)			|
+				    FW_PFVF_CMD_NVI(vi)			|
+				    FW_PFVF_CMD_NEXACTF(nexactf));
+	cmdp->r_caps_to_nethctrl = htonl(FW_PFVF_CMD_R_CAPS(rcaps)	|
+					 FW_PFVF_CMD_WX_CAPS(wxcaps)	|
+					 FW_PFVF_CMD_NETHCTRL(txq_eth_ctrl));
+}
+
+#define CSIO_ADVERT_MASK     (FW_PORT_CAP_SPEED_100M | FW_PORT_CAP_SPEED_1G |\
+			      FW_PORT_CAP_SPEED_10G | FW_PORT_CAP_ANEG)
+
+/*
+ * csio_mb_port- FW PORT command helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @tmo: COmmand timeout
+ * @portid: Port ID to get/set info
+ * @wr: Write/Read PORT information.
+ * @fc: Flow control
+ * @caps: Port capabilites to set.
+ * @cbfn: Callback, if any.
+ *
+ */
+void
+csio_mb_port(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+	     uint8_t portid, bool wr, uint32_t fc, uint16_t caps,
+	     void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_port_cmd *cmdp = (struct fw_port_cmd *)(mbp->mb);
+	unsigned int lfc = 0, mdi = FW_PORT_MDI(FW_PORT_MDI_AUTO);
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn,  1);
+
+	cmdp->op_to_portid = htonl(FW_CMD_OP(FW_PORT_CMD)		|
+				   FW_CMD_REQUEST			|
+				   (wr ? FW_CMD_EXEC : FW_CMD_READ)	|
+				   FW_PORT_CMD_PORTID(portid));
+	if (!wr) {
+		cmdp->action_to_len16 = htonl(
+			FW_PORT_CMD_ACTION(FW_PORT_ACTION_GET_PORT_INFO) |
+			FW_CMD_LEN16(sizeof(*cmdp) / 16));
+		return;
+	}
+
+	/* Set port */
+	cmdp->action_to_len16 = htonl(
+			FW_PORT_CMD_ACTION(FW_PORT_ACTION_L1_CFG) |
+			FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+	if (fc & PAUSE_RX)
+		lfc |= FW_PORT_CAP_FC_RX;
+	if (fc & PAUSE_TX)
+		lfc |= FW_PORT_CAP_FC_TX;
+
+	if (!(caps & FW_PORT_CAP_ANEG))
+		cmdp->u.l1cfg.rcap = htonl((caps & CSIO_ADVERT_MASK) | lfc);
+	else
+		cmdp->u.l1cfg.rcap = htonl((caps & CSIO_ADVERT_MASK) |
+								lfc | mdi);
+}
+
+/*
+ * csio_mb_process_read_port_rsp - FW PORT command response processing helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @retval: Mailbox return value from Firmware
+ * @caps: port capabilities
+ *
+ */
+void
+csio_mb_process_read_port_rsp(struct csio_hw *hw, struct csio_mb *mbp,
+			 enum fw_retval *retval, uint16_t *caps)
+{
+	struct fw_port_cmd *rsp = (struct fw_port_cmd *)(mbp->mb);
+
+	*retval = FW_CMD_RETVAL_GET(ntohl(rsp->action_to_len16));
+
+	if (*retval == FW_SUCCESS)
+		*caps = ntohs(rsp->u.info.pcap);
+}
+
+/*
+ * csio_mb_initialize - FW INITIALIZE command helper
+ * @hw: The HW structure
+ * @mbp: Mailbox structure
+ * @tmo: COmmand timeout
+ * @cbfn: Callback, if any.
+ *
+ */
+void
+csio_mb_initialize(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+		   void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_initialize_cmd *cmdp = (struct fw_initialize_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, tmo, hw, cbfn, 1);
+
+	cmdp->op_to_write = htonl(FW_CMD_OP(FW_INITIALIZE_CMD)	|
+				  FW_CMD_REQUEST | FW_CMD_WRITE);
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+}
+
+/*
+ * csio_mb_iq_alloc - Initializes the mailbox to allocate an
+ *				Ingress DMA queue in the firmware.
+ *
+ * @hw: The hw structure
+ * @mbp: Mailbox structure to initialize
+ * @priv: Private object
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @iq_params: Ingress queue params needed for allocation.
+ * @cbfn: The call-back function
+ *
+ *
+ */
+static void
+csio_mb_iq_alloc(struct csio_hw *hw, struct csio_mb *mbp, void *priv,
+		 uint32_t mb_tmo, struct csio_iq_params *iq_params,
+		 void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_iq_cmd *cmdp = (struct fw_iq_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, priv, cbfn, 1);
+
+	cmdp->op_to_vfn = htonl(FW_CMD_OP(FW_IQ_CMD)		|
+				FW_CMD_REQUEST | FW_CMD_EXEC	|
+				FW_IQ_CMD_PFN(iq_params->pfn)	|
+				FW_IQ_CMD_VFN(iq_params->vfn));
+
+	cmdp->alloc_to_len16 = htonl(FW_IQ_CMD_ALLOC		|
+				FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+	cmdp->type_to_iqandstindex = htonl(
+				FW_IQ_CMD_VIID(iq_params->viid)	|
+				FW_IQ_CMD_TYPE(iq_params->type)	|
+				FW_IQ_CMD_IQASYNCH(iq_params->iqasynch));
+
+	cmdp->fl0size = htons(iq_params->fl0size);
+	cmdp->fl0size = htons(iq_params->fl1size);
+
+} /* csio_mb_iq_alloc */
+
+/*
+ * csio_mb_iq_write - Initializes the mailbox for writing into an
+ *				Ingress DMA Queue.
+ *
+ * @hw: The HW structure
+ * @mbp: Mailbox structure to initialize
+ * @priv: Private object
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @cascaded_req: TRUE - if this request is cascased with iq-alloc request.
+ * @iq_params: Ingress queue params needed for writing.
+ * @cbfn: The call-back function
+ *
+ * NOTE: We OR relevant bits with cmdp->XXX, instead of just equating,
+ * because this IQ write request can be cascaded with a previous
+ * IQ alloc request, and we dont want to over-write the bits set by
+ * that request. This logic will work even in a non-cascaded case, since the
+ * cmdp structure is zeroed out by CSIO_INIT_MBP.
+ */
+static void
+csio_mb_iq_write(struct csio_hw *hw, struct csio_mb *mbp, void *priv,
+		 uint32_t mb_tmo, bool cascaded_req,
+		 struct csio_iq_params *iq_params,
+		 void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_iq_cmd *cmdp = (struct fw_iq_cmd *)(mbp->mb);
+
+	uint32_t iq_start_stop = (iq_params->iq_start)	?
+					FW_IQ_CMD_IQSTART(1) :
+					FW_IQ_CMD_IQSTOP(1);
+
+	/*
+	 * If this IQ write is cascaded with IQ alloc request, do not
+	 * re-initialize with 0's.
+	 *
+	 */
+	if (!cascaded_req)
+		CSIO_INIT_MBP(mbp, cmdp, mb_tmo, priv, cbfn, 1);
+
+	cmdp->op_to_vfn |= htonl(FW_CMD_OP(FW_IQ_CMD)		|
+				FW_CMD_REQUEST | FW_CMD_WRITE	|
+				FW_IQ_CMD_PFN(iq_params->pfn)	|
+				FW_IQ_CMD_VFN(iq_params->vfn));
+	cmdp->alloc_to_len16 |= htonl(iq_start_stop |
+				FW_CMD_LEN16(sizeof(*cmdp) / 16));
+	cmdp->iqid |= htons(iq_params->iqid);
+	cmdp->fl0id |= htons(iq_params->fl0id);
+	cmdp->fl1id |= htons(iq_params->fl1id);
+	cmdp->type_to_iqandstindex |= htonl(
+			FW_IQ_CMD_IQANDST(iq_params->iqandst)	|
+			FW_IQ_CMD_IQANUS(iq_params->iqanus)	|
+			FW_IQ_CMD_IQANUD(iq_params->iqanud)	|
+			FW_IQ_CMD_IQANDSTINDEX(iq_params->iqandstindex));
+	cmdp->iqdroprss_to_iqesize |= htons(
+			FW_IQ_CMD_IQPCIECH(iq_params->iqpciech)		|
+			FW_IQ_CMD_IQDCAEN(iq_params->iqdcaen)		|
+			FW_IQ_CMD_IQDCACPU(iq_params->iqdcacpu)		|
+			FW_IQ_CMD_IQINTCNTTHRESH(iq_params->iqintcntthresh) |
+			FW_IQ_CMD_IQCPRIO(iq_params->iqcprio)		|
+			FW_IQ_CMD_IQESIZE(iq_params->iqesize));
+
+	cmdp->iqsize |= htons(iq_params->iqsize);
+	cmdp->iqaddr |= cpu_to_be64(iq_params->iqaddr);
+
+	if (iq_params->type == 0) {
+		cmdp->iqns_to_fl0congen |= htonl(
+			FW_IQ_CMD_IQFLINTIQHSEN(iq_params->iqflintiqhsen)|
+			FW_IQ_CMD_IQFLINTCONGEN(iq_params->iqflintcongen));
+	}
+
+	if (iq_params->fl0size && iq_params->fl0addr &&
+	    (iq_params->fl0id != 0xFFFF)) {
+
+		cmdp->iqns_to_fl0congen |= htonl(
+			FW_IQ_CMD_FL0HOSTFCMODE(iq_params->fl0hostfcmode)|
+			FW_IQ_CMD_FL0CPRIO(iq_params->fl0cprio)		|
+			FW_IQ_CMD_FL0PADEN(iq_params->fl0paden)		|
+			FW_IQ_CMD_FL0PACKEN(iq_params->fl0packen));
+		cmdp->fl0dcaen_to_fl0cidxfthresh |= htons(
+			FW_IQ_CMD_FL0DCAEN(iq_params->fl0dcaen)		|
+			FW_IQ_CMD_FL0DCACPU(iq_params->fl0dcacpu)	|
+			FW_IQ_CMD_FL0FBMIN(iq_params->fl0fbmin)		|
+			FW_IQ_CMD_FL0FBMAX(iq_params->fl0fbmax)		|
+			FW_IQ_CMD_FL0CIDXFTHRESH(iq_params->fl0cidxfthresh));
+		cmdp->fl0size |= htons(iq_params->fl0size);
+		cmdp->fl0addr |= cpu_to_be64(iq_params->fl0addr);
+	}
+} /* csio_mb_iq_write */
+
+/*
+ * csio_mb_iq_alloc_write - Initializes the mailbox for allocating an
+ *				Ingress DMA Queue.
+ *
+ * @hw: The HW structure
+ * @mbp: Mailbox structure to initialize
+ * @priv: Private data.
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @iq_params: Ingress queue params needed for allocation & writing.
+ * @cbfn: The call-back function
+ *
+ *
+ */
+void
+csio_mb_iq_alloc_write(struct csio_hw *hw, struct csio_mb *mbp, void *priv,
+		       uint32_t mb_tmo, struct csio_iq_params *iq_params,
+		       void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	csio_mb_iq_alloc(hw, mbp, priv, mb_tmo, iq_params, cbfn);
+	csio_mb_iq_write(hw, mbp, priv, mb_tmo, true, iq_params, cbfn);
+} /* csio_mb_iq_alloc_write */
+
+/*
+ * csio_mb_iq_alloc_write_rsp - Process the allocation & writing
+ *				of ingress DMA queue mailbox's response.
+ *
+ * @hw: The HW structure.
+ * @mbp: Mailbox structure to initialize.
+ * @retval: Firmware return value.
+ * @iq_params: Ingress queue parameters, after allocation and write.
+ *
+ */
+void
+csio_mb_iq_alloc_write_rsp(struct csio_hw *hw, struct csio_mb *mbp,
+			   enum fw_retval *ret_val,
+			   struct csio_iq_params *iq_params)
+{
+	struct fw_iq_cmd *rsp = (struct fw_iq_cmd *)(mbp->mb);
+
+	*ret_val = FW_CMD_RETVAL_GET(ntohl(rsp->alloc_to_len16));
+	if (*ret_val == FW_SUCCESS) {
+		iq_params->physiqid = ntohs(rsp->physiqid);
+		iq_params->iqid = ntohs(rsp->iqid);
+		iq_params->fl0id = ntohs(rsp->fl0id);
+		iq_params->fl1id = ntohs(rsp->fl1id);
+	} else {
+		iq_params->physiqid = iq_params->iqid =
+		iq_params->fl0id = iq_params->fl1id = 0;
+	}
+} /* csio_mb_iq_alloc_write_rsp */
+
+/*
+ * csio_mb_iq_free - Initializes the mailbox for freeing a
+ *				specified Ingress DMA Queue.
+ *
+ * @hw: The HW structure
+ * @mbp: Mailbox structure to initialize
+ * @priv: Private data
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @iq_params: Parameters of ingress queue, that is to be freed.
+ * @cbfn: The call-back function
+ *
+ *
+ */
+void
+csio_mb_iq_free(struct csio_hw *hw, struct csio_mb *mbp, void *priv,
+		uint32_t mb_tmo, struct csio_iq_params *iq_params,
+		void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_iq_cmd *cmdp = (struct fw_iq_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, priv, cbfn, 1);
+
+	cmdp->op_to_vfn = htonl(FW_CMD_OP(FW_IQ_CMD)		|
+				FW_CMD_REQUEST | FW_CMD_EXEC	|
+				FW_IQ_CMD_PFN(iq_params->pfn)	|
+				FW_IQ_CMD_VFN(iq_params->vfn));
+	cmdp->alloc_to_len16 = htonl(FW_IQ_CMD_FREE		|
+				FW_CMD_LEN16(sizeof(*cmdp) / 16));
+	cmdp->type_to_iqandstindex = htonl(FW_IQ_CMD_TYPE(iq_params->type));
+
+	cmdp->iqid = htons(iq_params->iqid);
+	cmdp->fl0id = htons(iq_params->fl0id);
+	cmdp->fl1id = htons(iq_params->fl1id);
+
+} /* csio_mb_iq_free */
+
+/*
+ * csio_mb_eq_ofld_alloc - Initializes the mailbox for allocating
+ *				an offload-egress queue.
+ *
+ * @hw: The HW  structure
+ * @mbp: Mailbox structure to initialize
+ * @priv: Private data
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @eq_ofld_params: (Offload) Egress queue paramters.
+ * @cbfn: The call-back function
+ *
+ *
+ */
+static void
+csio_mb_eq_ofld_alloc(struct csio_hw *hw, struct csio_mb *mbp, void *priv,
+		uint32_t mb_tmo, struct csio_eq_params *eq_ofld_params,
+		void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_eq_ofld_cmd *cmdp = (struct fw_eq_ofld_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, priv, cbfn, 1);
+	cmdp->op_to_vfn = htonl(FW_CMD_OP(FW_EQ_OFLD_CMD)		|
+				FW_CMD_REQUEST | FW_CMD_EXEC		|
+				FW_EQ_OFLD_CMD_PFN(eq_ofld_params->pfn) |
+				FW_EQ_OFLD_CMD_VFN(eq_ofld_params->vfn));
+	cmdp->alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_ALLOC	|
+				FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+} /* csio_mb_eq_ofld_alloc */
+
+/*
+ * csio_mb_eq_ofld_write - Initializes the mailbox for writing
+ *				an alloacted offload-egress queue.
+ *
+ * @hw: The HW structure
+ * @mbp: Mailbox structure to initialize
+ * @priv: Private data
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @cascaded_req: TRUE - if this request is cascased with Eq-alloc request.
+ * @eq_ofld_params: (Offload) Egress queue paramters.
+ * @cbfn: The call-back function
+ *
+ *
+ * NOTE: We OR relevant bits with cmdp->XXX, instead of just equating,
+ * because this EQ write request can be cascaded with a previous
+ * EQ alloc request, and we dont want to over-write the bits set by
+ * that request. This logic will work even in a non-cascaded case, since the
+ * cmdp structure is zeroed out by CSIO_INIT_MBP.
+ */
+static void
+csio_mb_eq_ofld_write(struct csio_hw *hw, struct csio_mb *mbp, void *priv,
+		      uint32_t mb_tmo, bool cascaded_req,
+		      struct csio_eq_params *eq_ofld_params,
+		      void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_eq_ofld_cmd *cmdp = (struct fw_eq_ofld_cmd *)(mbp->mb);
+
+	uint32_t eq_start_stop = (eq_ofld_params->eqstart)	?
+				FW_EQ_OFLD_CMD_EQSTART	: FW_EQ_OFLD_CMD_EQSTOP;
+
+	/*
+	 * If this EQ write is cascaded with EQ alloc request, do not
+	 * re-initialize with 0's.
+	 *
+	 */
+	if (!cascaded_req)
+		CSIO_INIT_MBP(mbp, cmdp, mb_tmo, priv, cbfn, 1);
+
+	cmdp->op_to_vfn |= htonl(FW_CMD_OP(FW_EQ_OFLD_CMD)	|
+				FW_CMD_REQUEST | FW_CMD_WRITE	|
+				FW_EQ_OFLD_CMD_PFN(eq_ofld_params->pfn) |
+				FW_EQ_OFLD_CMD_VFN(eq_ofld_params->vfn));
+	cmdp->alloc_to_len16 |= htonl(eq_start_stop		|
+				      FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+	cmdp->eqid_pkd |= htonl(FW_EQ_OFLD_CMD_EQID(eq_ofld_params->eqid));
+
+	cmdp->fetchszm_to_iqid |= htonl(
+		FW_EQ_OFLD_CMD_HOSTFCMODE(eq_ofld_params->hostfcmode)	|
+		FW_EQ_OFLD_CMD_CPRIO(eq_ofld_params->cprio)		|
+		FW_EQ_OFLD_CMD_PCIECHN(eq_ofld_params->pciechn)		|
+		FW_EQ_OFLD_CMD_IQID(eq_ofld_params->iqid));
+
+	cmdp->dcaen_to_eqsize |= htonl(
+		FW_EQ_OFLD_CMD_DCAEN(eq_ofld_params->dcaen)		|
+		FW_EQ_OFLD_CMD_DCACPU(eq_ofld_params->dcacpu)		|
+		FW_EQ_OFLD_CMD_FBMIN(eq_ofld_params->fbmin)		|
+		FW_EQ_OFLD_CMD_FBMAX(eq_ofld_params->fbmax)		|
+		FW_EQ_OFLD_CMD_CIDXFTHRESHO(eq_ofld_params->cidxfthresho) |
+		FW_EQ_OFLD_CMD_CIDXFTHRESH(eq_ofld_params->cidxfthresh) |
+		FW_EQ_OFLD_CMD_EQSIZE(eq_ofld_params->eqsize));
+
+	cmdp->eqaddr |= cpu_to_be64(eq_ofld_params->eqaddr);
+
+} /* csio_mb_eq_ofld_write */
+
+/*
+ * csio_mb_eq_ofld_alloc_write - Initializes the mailbox for allocation
+ *				writing into an Engress DMA Queue.
+ *
+ * @hw: The HW structure
+ * @mbp: Mailbox structure to initialize
+ * @priv: Private data.
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @eq_ofld_params: (Offload) Egress queue paramters.
+ * @cbfn: The call-back function
+ *
+ *
+ */
+void
+csio_mb_eq_ofld_alloc_write(struct csio_hw *hw, struct csio_mb *mbp,
+			    void *priv, uint32_t mb_tmo,
+			    struct csio_eq_params *eq_ofld_params,
+			    void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	csio_mb_eq_ofld_alloc(hw, mbp, priv, mb_tmo, eq_ofld_params, cbfn);
+	csio_mb_eq_ofld_write(hw, mbp, priv, mb_tmo, true,
+			      eq_ofld_params, cbfn);
+} /* csio_mb_eq_ofld_alloc_write */
+
+/*
+ * csio_mb_eq_ofld_alloc_write_rsp - Process the allocation
+ *				& write egress DMA queue mailbox's response.
+ *
+ * @hw: The HW structure.
+ * @mbp: Mailbox structure to initialize.
+ * @retval: Firmware return value.
+ * @eq_ofld_params: (Offload) Egress queue paramters.
+ *
+ */
+void
+csio_mb_eq_ofld_alloc_write_rsp(struct csio_hw *hw,
+				struct csio_mb *mbp, enum fw_retval *ret_val,
+				struct csio_eq_params *eq_ofld_params)
+{
+	struct fw_eq_ofld_cmd *rsp = (struct fw_eq_ofld_cmd *)(mbp->mb);
+
+	*ret_val = FW_CMD_RETVAL_GET(ntohl(rsp->alloc_to_len16));
+
+	if (*ret_val == FW_SUCCESS) {
+		eq_ofld_params->eqid = FW_EQ_OFLD_CMD_EQID_GET(
+						ntohl(rsp->eqid_pkd));
+		eq_ofld_params->physeqid = FW_EQ_OFLD_CMD_PHYSEQID_GET(
+						ntohl(rsp->physeqid_pkd));
+	} else
+		eq_ofld_params->eqid = 0;
+
+} /* csio_mb_eq_ofld_alloc_write_rsp */
+
+/*
+ * csio_mb_eq_ofld_free - Initializes the mailbox for freeing a
+ *				specified Engress DMA Queue.
+ *
+ * @hw: The HW structure
+ * @mbp: Mailbox structure to initialize
+ * @priv: Private data area.
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @eq_ofld_params: (Offload) Egress queue paramters, that is to be freed.
+ * @cbfn: The call-back function
+ *
+ *
+ */
+void
+csio_mb_eq_ofld_free(struct csio_hw *hw, struct csio_mb *mbp, void *priv,
+		     uint32_t mb_tmo, struct csio_eq_params *eq_ofld_params,
+		     void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_eq_ofld_cmd *cmdp = (struct fw_eq_ofld_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, priv, cbfn, 1);
+
+	cmdp->op_to_vfn = htonl(FW_CMD_OP(FW_EQ_OFLD_CMD)	|
+				FW_CMD_REQUEST | FW_CMD_EXEC	|
+				FW_EQ_OFLD_CMD_PFN(eq_ofld_params->pfn) |
+				FW_EQ_OFLD_CMD_VFN(eq_ofld_params->vfn));
+	cmdp->alloc_to_len16 = htonl(FW_EQ_OFLD_CMD_FREE |
+				FW_CMD_LEN16(sizeof(*cmdp) / 16));
+	cmdp->eqid_pkd = htonl(FW_EQ_OFLD_CMD_EQID(eq_ofld_params->eqid));
+
+} /* csio_mb_eq_ofld_free */
+
+/*
+ * csio_write_fcoe_link_cond_init_mb - Initialize Mailbox to write FCoE link
+ *				 condition.
+ *
+ * @ln: The Lnode structure
+ * @mbp: Mailbox structure to initialize
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @cbfn: The call back function.
+ *
+ *
+ */
+void
+csio_write_fcoe_link_cond_init_mb(struct csio_lnode *ln, struct csio_mb *mbp,
+			uint32_t mb_tmo, uint8_t port_id, uint32_t sub_opcode,
+			uint8_t cos, bool link_status, uint32_t fcfi,
+			void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_fcoe_link_cmd *cmdp =
+				(struct fw_fcoe_link_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, ln, cbfn, 1);
+
+	cmdp->op_to_portid = htonl((
+			FW_CMD_OP(FW_FCOE_LINK_CMD)		|
+			FW_CMD_REQUEST				|
+			FW_CMD_WRITE				|
+			FW_FCOE_LINK_CMD_PORTID(port_id)));
+	cmdp->sub_opcode_fcfi = htonl(
+			FW_FCOE_LINK_CMD_SUB_OPCODE(sub_opcode)	|
+			FW_FCOE_LINK_CMD_FCFI(fcfi));
+	cmdp->lstatus = link_status;
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+} /* csio_write_fcoe_link_cond_init_mb */
+
+/*
+ * csio_fcoe_read_res_info_init_mb - Initializes the mailbox for reading FCoE
+ *				resource information(FW_GET_RES_INFO_CMD).
+ *
+ * @hw: The HW structure
+ * @mbp: Mailbox structure to initialize
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @cbfn: The call-back function
+ *
+ *
+ */
+void
+csio_fcoe_read_res_info_init_mb(struct csio_hw *hw, struct csio_mb *mbp,
+			uint32_t mb_tmo,
+			void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_fcoe_res_info_cmd *cmdp =
+			(struct fw_fcoe_res_info_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, hw, cbfn, 1);
+
+	cmdp->op_to_read = htonl((FW_CMD_OP(FW_FCOE_RES_INFO_CMD)	|
+				  FW_CMD_REQUEST			|
+				  FW_CMD_READ));
+
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+} /* csio_fcoe_read_res_info_init_mb */
+
+/*
+ * csio_fcoe_vnp_alloc_init_mb - Initializes the mailbox for allocating VNP
+ *				in the firmware (FW_FCOE_VNP_CMD).
+ *
+ * @ln: The Lnode structure.
+ * @mbp: Mailbox structure to initialize.
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @fcfi: FCF Index.
+ * @vnpi: vnpi
+ * @iqid: iqid
+ * @vnport_wwnn: vnport WWNN
+ * @vnport_wwpn: vnport WWPN
+ * @cbfn: The call-back function.
+ *
+ *
+ */
+void
+csio_fcoe_vnp_alloc_init_mb(struct csio_lnode *ln, struct csio_mb *mbp,
+		uint32_t mb_tmo, uint32_t fcfi, uint32_t vnpi, uint16_t iqid,
+		uint8_t vnport_wwnn[8],	uint8_t vnport_wwpn[8],
+		void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_fcoe_vnp_cmd *cmdp =
+			(struct fw_fcoe_vnp_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, ln, cbfn, 1);
+
+	cmdp->op_to_fcfi = htonl((FW_CMD_OP(FW_FCOE_VNP_CMD)		|
+				  FW_CMD_REQUEST			|
+				  FW_CMD_EXEC				|
+				  FW_FCOE_VNP_CMD_FCFI(fcfi)));
+
+	cmdp->alloc_to_len16 = htonl(FW_FCOE_VNP_CMD_ALLOC		|
+				     FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+	cmdp->gen_wwn_to_vnpi = htonl(FW_FCOE_VNP_CMD_VNPI(vnpi));
+
+	cmdp->iqid = htons(iqid);
+
+	if (!wwn_to_u64(vnport_wwnn) && !wwn_to_u64(vnport_wwpn))
+		cmdp->gen_wwn_to_vnpi |= htonl(FW_FCOE_VNP_CMD_GEN_WWN);
+
+	if (vnport_wwnn)
+		memcpy(cmdp->vnport_wwnn, vnport_wwnn, 8);
+	if (vnport_wwpn)
+		memcpy(cmdp->vnport_wwpn, vnport_wwpn, 8);
+
+} /* csio_fcoe_vnp_alloc_init_mb */
+
+/*
+ * csio_fcoe_vnp_read_init_mb - Prepares VNP read cmd.
+ * @ln: The Lnode structure.
+ * @mbp: Mailbox structure to initialize.
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @fcfi: FCF Index.
+ * @vnpi: vnpi
+ * @cbfn: The call-back handler.
+ */
+void
+csio_fcoe_vnp_read_init_mb(struct csio_lnode *ln, struct csio_mb *mbp,
+		uint32_t mb_tmo, uint32_t fcfi, uint32_t vnpi,
+		void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_fcoe_vnp_cmd *cmdp =
+			(struct fw_fcoe_vnp_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, ln, cbfn, 1);
+	cmdp->op_to_fcfi = htonl(FW_CMD_OP(FW_FCOE_VNP_CMD)	|
+				 FW_CMD_REQUEST			|
+				 FW_CMD_READ			|
+				 FW_FCOE_VNP_CMD_FCFI(fcfi));
+	cmdp->alloc_to_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+	cmdp->gen_wwn_to_vnpi = htonl(FW_FCOE_VNP_CMD_VNPI(vnpi));
+}
+
+/*
+ * csio_fcoe_vnp_free_init_mb - Initializes the mailbox for freeing an
+ *			alloacted VNP in the firmware (FW_FCOE_VNP_CMD).
+ *
+ * @ln: The Lnode structure.
+ * @mbp: Mailbox structure to initialize.
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @fcfi: FCF flow id
+ * @vnpi: VNP flow id
+ * @cbfn: The call-back function.
+ * Return: None
+ */
+void
+csio_fcoe_vnp_free_init_mb(struct csio_lnode *ln, struct csio_mb *mbp,
+		uint32_t mb_tmo, uint32_t fcfi, uint32_t vnpi,
+		void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_fcoe_vnp_cmd *cmdp =
+			(struct fw_fcoe_vnp_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, ln, cbfn, 1);
+
+	cmdp->op_to_fcfi = htonl(FW_CMD_OP(FW_FCOE_VNP_CMD)	|
+				 FW_CMD_REQUEST			|
+				 FW_CMD_EXEC			|
+				 FW_FCOE_VNP_CMD_FCFI(fcfi));
+	cmdp->alloc_to_len16 = htonl(FW_FCOE_VNP_CMD_FREE	|
+				     FW_CMD_LEN16(sizeof(*cmdp) / 16));
+	cmdp->gen_wwn_to_vnpi = htonl(FW_FCOE_VNP_CMD_VNPI(vnpi));
+}
+
+/*
+ * csio_fcoe_read_fcf_init_mb - Initializes the mailbox to read the
+ *				FCF records.
+ *
+ * @ln: The Lnode structure
+ * @mbp: Mailbox structure to initialize
+ * @mb_tmo: Mailbox time-out period (in ms).
+ * @fcf_params: FC-Forwarder parameters.
+ * @cbfn: The call-back function
+ *
+ *
+ */
+void
+csio_fcoe_read_fcf_init_mb(struct csio_lnode *ln, struct csio_mb *mbp,
+		uint32_t mb_tmo, uint32_t portid, uint32_t fcfi,
+		void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct fw_fcoe_fcf_cmd *cmdp =
+			(struct fw_fcoe_fcf_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, ln, cbfn, 1);
+
+	cmdp->op_to_fcfi = htonl(FW_CMD_OP(FW_FCOE_FCF_CMD)	|
+				 FW_CMD_REQUEST			|
+				 FW_CMD_READ			|
+				 FW_FCOE_FCF_CMD_FCFI(fcfi));
+	cmdp->retval_len16 = htonl(FW_CMD_LEN16(sizeof(*cmdp) / 16));
+
+} /* csio_fcoe_read_fcf_init_mb */
+
+void
+csio_fcoe_read_portparams_init_mb(struct csio_hw *hw, struct csio_mb *mbp,
+				uint32_t mb_tmo,
+				struct fw_fcoe_port_cmd_params *portparams,
+				void (*cbfn)(struct csio_hw *,
+					     struct csio_mb *))
+{
+	struct fw_fcoe_stats_cmd *cmdp = (struct fw_fcoe_stats_cmd *)(mbp->mb);
+
+	CSIO_INIT_MBP(mbp, cmdp, mb_tmo, hw, cbfn, 1);
+	mbp->mb_size = 64;
+
+	cmdp->op_to_flowid = htonl(FW_CMD_OP(FW_FCOE_STATS_CMD)         |
+				   FW_CMD_REQUEST | FW_CMD_READ);
+	cmdp->free_to_len16 = htonl(FW_CMD_LEN16(CSIO_MAX_MB_SIZE/16));
+
+	cmdp->u.ctl.nstats_port = FW_FCOE_STATS_CMD_NSTATS(portparams->nstats) |
+				  FW_FCOE_STATS_CMD_PORT(portparams->portid);
+
+	cmdp->u.ctl.port_valid_ix = FW_FCOE_STATS_CMD_IX(portparams->idx)    |
+				    FW_FCOE_STATS_CMD_PORT_VALID;
+
+} /* csio_fcoe_read_portparams_init_mb */
+
+void
+csio_mb_process_portparams_rsp(
+				struct csio_hw *hw,
+				struct csio_mb *mbp,
+				enum fw_retval *retval,
+				struct fw_fcoe_port_cmd_params *portparams,
+				struct fw_fcoe_port_stats  *portstats
+			     )
+{
+	struct fw_fcoe_stats_cmd *rsp = (struct fw_fcoe_stats_cmd *)(mbp->mb);
+	struct fw_fcoe_port_stats stats;
+	uint8_t *src;
+	uint8_t *dst;
+
+	*retval = FW_CMD_RETVAL_GET(ntohl(rsp->free_to_len16));
+
+	memset(&stats, 0, sizeof(struct fw_fcoe_port_stats));
+
+	if (*retval == FW_SUCCESS) {
+		dst = (uint8_t *)(&stats) + ((portparams->idx - 1) * 8);
+		src = (uint8_t *)rsp + (CSIO_STATS_OFFSET * 8);
+		memcpy(dst, src, (portparams->nstats * 8));
+		if (portparams->idx == 1) {
+			/* Get the first 6 flits from the Mailbox */
+			portstats->tx_bcast_bytes	=
+					be64_to_cpu(stats.tx_bcast_bytes);
+			portstats->tx_bcast_frames	=
+					be64_to_cpu(stats.tx_bcast_frames);
+			portstats->tx_mcast_bytes	=
+					be64_to_cpu(stats.tx_mcast_bytes);
+			portstats->tx_mcast_frames	=
+					be64_to_cpu(stats.tx_mcast_frames);
+			portstats->tx_ucast_bytes	=
+					be64_to_cpu(stats.tx_ucast_bytes);
+			portstats->tx_ucast_frames	=
+					be64_to_cpu(stats.tx_ucast_frames);
+		}
+		if (portparams->idx == 7) {
+			/* Get the second 6 flits from the Mailbox */
+			portstats->tx_drop_frames	=
+				be64_to_cpu(stats.tx_drop_frames);
+			portstats->tx_offload_bytes	=
+				be64_to_cpu(stats.tx_offload_bytes);
+			portstats->tx_offload_frames	=
+				be64_to_cpu(stats.tx_offload_frames);
+#if 0
+			portstats->rx_pf_bytes		=
+					be64_to_cpu(stats.rx_pf_bytes);
+			portstats->rx_pf_frames		=
+					be64_to_cpu(stats.rx_pf_frames);
+#endif
+			portstats->rx_bcast_bytes	=
+					be64_to_cpu(stats.rx_bcast_bytes);
+			portstats->rx_bcast_frames	=
+					be64_to_cpu(stats.rx_bcast_frames);
+			portstats->rx_mcast_bytes	=
+					be64_to_cpu(stats.rx_mcast_bytes);
+		}
+		if (portparams->idx == 13) {
+			/* Get the last 4 flits from the Mailbox */
+			portstats->rx_mcast_frames	=
+					be64_to_cpu(stats.rx_mcast_frames);
+			portstats->rx_ucast_bytes	=
+					be64_to_cpu(stats.rx_ucast_bytes);
+			portstats->rx_ucast_frames	=
+					be64_to_cpu(stats.rx_ucast_frames);
+			portstats->rx_err_frames	=
+					be64_to_cpu(stats.rx_err_frames);
+		}
+	}
+}
+
+/* Entry points/APIs for MB module					     */
+/*
+ * csio_mb_intr_enable - Enable Interrupts from mailboxes.
+ * @hw: The HW structure
+ *
+ * Enables CIM interrupt bit in appropriate INT_ENABLE registers.
+ */
+void
+csio_mb_intr_enable(struct csio_hw *hw)
+{
+	csio_wr_reg32(hw, MBMSGRDYINTEN(1), MYPF_REG(CIM_PF_HOST_INT_ENABLE));
+	csio_rd_reg32(hw, MYPF_REG(CIM_PF_HOST_INT_ENABLE));
+}
+
+/*
+ * csio_mb_intr_disable - Disable Interrupts from mailboxes.
+ * @hw: The HW structure
+ *
+ * Disable bit in HostInterruptEnable CIM register.
+ */
+void
+csio_mb_intr_disable(struct csio_hw *hw)
+{
+	csio_wr_reg32(hw, MBMSGRDYINTEN(0), MYPF_REG(CIM_PF_HOST_INT_ENABLE));
+	csio_rd_reg32(hw, MYPF_REG(CIM_PF_HOST_INT_ENABLE));
+}
+
+static void
+csio_mb_dump_fw_dbg(struct csio_hw *hw, __be64 *cmd)
+{
+	struct fw_debug_cmd *dbg = (struct fw_debug_cmd *)cmd;
+
+	if ((FW_DEBUG_CMD_TYPE_GET(ntohl(dbg->op_type))) == 1) {
+		csio_info(hw, "FW print message:\n");
+		csio_info(hw, "\tdebug->dprtstridx = %d\n",
+			    ntohs(dbg->u.prt.dprtstridx));
+		csio_info(hw, "\tdebug->dprtstrparam0 = 0x%x\n",
+			    ntohl(dbg->u.prt.dprtstrparam0));
+		csio_info(hw, "\tdebug->dprtstrparam1 = 0x%x\n",
+			    ntohl(dbg->u.prt.dprtstrparam1));
+		csio_info(hw, "\tdebug->dprtstrparam2 = 0x%x\n",
+			    ntohl(dbg->u.prt.dprtstrparam2));
+		csio_info(hw, "\tdebug->dprtstrparam3 = 0x%x\n",
+			    ntohl(dbg->u.prt.dprtstrparam3));
+	} else {
+		/* This is a FW assertion */
+		csio_fatal(hw, "FW assertion at %.16s:%u, val0 %#x, val1 %#x\n",
+			    dbg->u.assert.filename_0_7,
+			    ntohl(dbg->u.assert.line),
+			    ntohl(dbg->u.assert.x),
+			    ntohl(dbg->u.assert.y));
+	}
+}
+
+static void
+csio_mb_debug_cmd_handler(struct csio_hw *hw)
+{
+	int i;
+	__be64 cmd[CSIO_MB_MAX_REGS];
+	uint32_t ctl_reg = PF_REG(hw->pfn, CIM_PF_MAILBOX_CTRL);
+	uint32_t data_reg = PF_REG(hw->pfn, CIM_PF_MAILBOX_DATA);
+	int size = sizeof(struct fw_debug_cmd);
+
+	/* Copy mailbox data */
+	for (i = 0; i < size; i += 8)
+		cmd[i / 8] = cpu_to_be64(csio_rd_reg64(hw, data_reg + i));
+
+	csio_mb_dump_fw_dbg(hw, cmd);
+
+	/* Notify FW of mailbox by setting owner as UP */
+	csio_wr_reg32(hw, MBMSGVALID | MBINTREQ | MBOWNER(CSIO_MBOWNER_FW),
+		      ctl_reg);
+
+	csio_rd_reg32(hw, ctl_reg);
+	wmb();
+}
+
+/*
+ * csio_mb_issue - generic routine for issuing Mailbox commands.
+ * @hw: The HW structure
+ * @mbp: Mailbox command to issue
+ *
+ *  Caller should hold hw lock across this call.
+ */
+int
+csio_mb_issue(struct csio_hw *hw, struct csio_mb *mbp)
+{
+	uint32_t owner, ctl;
+	int i;
+	uint32_t ii;
+	__be64 *cmd = mbp->mb;
+	__be64 hdr;
+	struct csio_mbm	*mbm = &hw->mbm;
+	uint32_t ctl_reg = PF_REG(hw->pfn, CIM_PF_MAILBOX_CTRL);
+	uint32_t data_reg = PF_REG(hw->pfn, CIM_PF_MAILBOX_DATA);
+	int size = mbp->mb_size;
+	int rv = -EINVAL;
+	struct fw_cmd_hdr *fw_hdr;
+
+	/* Determine mode */
+	if (mbp->mb_cbfn == NULL) {
+		/* Need to issue/get results in the same context */
+		if (mbp->tmo < CSIO_MB_POLL_FREQ) {
+			csio_err(hw, "Invalid tmo: 0x%x\n", mbp->tmo);
+			goto error_out;
+		}
+	} else if (!csio_is_host_intr_enabled(hw) ||
+		   !csio_is_hw_intr_enabled(hw)) {
+		csio_err(hw, "Cannot issue mailbox in interrupt mode 0x%x\n",
+			 *((uint8_t *)mbp->mb));
+			goto error_out;
+	}
+
+	if (mbm->mcurrent != NULL) {
+		/* Queue mbox cmd, if another mbox cmd is active */
+		if (mbp->mb_cbfn == NULL) {
+			rv = -EBUSY;
+			csio_dbg(hw, "Couldnt own Mailbox %x op:0x%x\n",
+				    hw->pfn, *((uint8_t *)mbp->mb));
+
+			goto error_out;
+		} else {
+			list_add_tail(&mbp->list, &mbm->req_q);
+			CSIO_INC_STATS(mbm, n_activeq);
+
+			return 0;
+		}
+	}
+
+	/* Now get ownership of mailbox */
+	owner = MBOWNER_GET(csio_rd_reg32(hw, ctl_reg));
+
+	if (!csio_mb_is_host_owner(owner)) {
+
+		for (i = 0; (owner == CSIO_MBOWNER_NONE) && (i < 3); i++)
+			owner = MBOWNER_GET(csio_rd_reg32(hw, ctl_reg));
+		/*
+		 * Mailbox unavailable. In immediate mode, fail the command.
+		 * In other modes, enqueue the request.
+		 */
+		if (!csio_mb_is_host_owner(owner)) {
+			if (mbp->mb_cbfn == NULL) {
+				rv = owner ? -EBUSY : -ETIMEDOUT;
+
+				csio_dbg(hw,
+					 "Couldnt own Mailbox %x op:0x%x "
+					 "owner:%x\n",
+					 hw->pfn, *((uint8_t *)mbp->mb), owner);
+				goto error_out;
+			} else {
+				if (mbm->mcurrent == NULL) {
+					csio_err(hw,
+						 "Couldnt own Mailbox %x "
+						 "op:0x%x owner:%x\n",
+						 hw->pfn, *((uint8_t *)mbp->mb),
+						 owner);
+					csio_err(hw,
+						 "No outstanding driver"
+						 " mailbox as well\n");
+					goto error_out;
+				}
+			}
+		}
+	}
+
+	/* Mailbox is available, copy mailbox data into it */
+	for (i = 0; i < size; i += 8) {
+		csio_wr_reg64(hw, be64_to_cpu(*cmd), data_reg + i);
+		cmd++;
+	}
+
+	CSIO_DUMP_MB(hw, hw->pfn, data_reg);
+
+	/* Start completion timers in non-immediate modes and notify FW */
+	if (mbp->mb_cbfn != NULL) {
+		mbm->mcurrent = mbp;
+		mod_timer(&mbm->timer, jiffies + msecs_to_jiffies(mbp->tmo));
+		csio_wr_reg32(hw, MBMSGVALID | MBINTREQ |
+			      MBOWNER(CSIO_MBOWNER_FW), ctl_reg);
+	} else
+		csio_wr_reg32(hw, MBMSGVALID | MBOWNER(CSIO_MBOWNER_FW),
+			      ctl_reg);
+
+	/* Flush posted writes */
+	csio_rd_reg32(hw, ctl_reg);
+	wmb();
+
+	CSIO_INC_STATS(mbm, n_req);
+
+	if (mbp->mb_cbfn)
+		return 0;
+
+	/* Poll for completion in immediate mode */
+	cmd = mbp->mb;
+
+	for (ii = 0; ii < mbp->tmo; ii += CSIO_MB_POLL_FREQ) {
+		mdelay(CSIO_MB_POLL_FREQ);
+
+		/* Check for response */
+		ctl = csio_rd_reg32(hw, ctl_reg);
+		if (csio_mb_is_host_owner(MBOWNER_GET(ctl))) {
+
+			if (!(ctl & MBMSGVALID)) {
+				csio_wr_reg32(hw, 0, ctl_reg);
+				continue;
+			}
+
+			CSIO_DUMP_MB(hw, hw->pfn, data_reg);
+
+			hdr = cpu_to_be64(csio_rd_reg64(hw, data_reg));
+			fw_hdr = (struct fw_cmd_hdr *)&hdr;
+
+			switch (FW_CMD_OP_GET(ntohl(fw_hdr->hi))) {
+			case FW_DEBUG_CMD:
+				csio_mb_debug_cmd_handler(hw);
+				continue;
+			}
+
+			/* Copy response */
+			for (i = 0; i < size; i += 8)
+				*cmd++ = cpu_to_be64(csio_rd_reg64
+							  (hw, data_reg + i));
+			csio_wr_reg32(hw, 0, ctl_reg);
+
+			if (FW_CMD_RETVAL_GET(*(mbp->mb)))
+				CSIO_INC_STATS(mbm, n_err);
+
+			CSIO_INC_STATS(mbm, n_rsp);
+			return 0;
+		}
+	}
+
+	CSIO_INC_STATS(mbm, n_tmo);
+
+	csio_err(hw, "Mailbox %x op:0x%x timed out!\n",
+		 hw->pfn, *((uint8_t *)cmd));
+
+	return -ETIMEDOUT;
+
+error_out:
+	CSIO_INC_STATS(mbm, n_err);
+	return rv;
+}
+
+/*
+ * csio_mb_completions - Completion handler for Mailbox commands
+ * @hw: The HW structure
+ * @cbfn_q: Completion queue.
+ *
+ */
+void
+csio_mb_completions(struct csio_hw *hw, struct list_head *cbfn_q)
+{
+	struct csio_mb *mbp;
+	struct csio_mbm *mbm = &hw->mbm;
+	enum fw_retval rv;
+
+	while (!list_empty(cbfn_q)) {
+		mbp = list_first_entry(cbfn_q, struct csio_mb, list);
+		list_del_init(&mbp->list);
+
+		rv = csio_mb_fw_retval(mbp);
+		if ((rv != FW_SUCCESS) && (rv != FW_HOSTERROR))
+			CSIO_INC_STATS(mbm, n_err);
+		else if (rv != FW_HOSTERROR)
+			CSIO_INC_STATS(mbm, n_rsp);
+
+		if (mbp->mb_cbfn)
+			mbp->mb_cbfn(hw, mbp);
+	}
+}
+
+static void
+csio_mb_portmod_changed(struct csio_hw *hw, uint8_t port_id)
+{
+	static char *mod_str[] = {
+		NULL, "LR", "SR", "ER", "TWINAX", "active TWINAX", "LRM"
+	};
+
+	struct csio_pport *port = &hw->pport[port_id];
+
+	if (port->mod_type == FW_PORT_MOD_TYPE_NONE)
+		csio_info(hw, "Port:%d - port module unplugged\n", port_id);
+	else if (port->mod_type < ARRAY_SIZE(mod_str))
+		csio_info(hw, "Port:%d - %s port module inserted\n", port_id,
+			  mod_str[port->mod_type]);
+	else if (port->mod_type == FW_PORT_MOD_TYPE_NOTSUPPORTED)
+		csio_info(hw,
+			  "Port:%d - unsupported optical port module "
+			  "inserted\n", port_id);
+	else if (port->mod_type == FW_PORT_MOD_TYPE_UNKNOWN)
+		csio_info(hw,
+			  "Port:%d - unknown port module inserted, forcing "
+			  "TWINAX\n", port_id);
+	else if (port->mod_type == FW_PORT_MOD_TYPE_ERROR)
+		csio_info(hw, "Port:%d - transceiver module error\n", port_id);
+	else
+		csio_info(hw, "Port:%d - unknown module type %d inserted\n",
+			  port_id, port->mod_type);
+}
+
+int
+csio_mb_fwevt_handler(struct csio_hw *hw, __be64 *cmd)
+{
+	uint8_t opcode = *(uint8_t *)cmd;
+	struct fw_port_cmd *pcmd;
+	uint8_t port_id;
+	uint32_t link_status;
+	uint16_t action;
+	uint8_t mod_type;
+
+	if (opcode == FW_PORT_CMD) {
+		pcmd = (struct fw_port_cmd *)cmd;
+		port_id = FW_PORT_CMD_PORTID_GET(
+				ntohl(pcmd->op_to_portid));
+		action = FW_PORT_CMD_ACTION_GET(
+				ntohl(pcmd->action_to_len16));
+		if (action != FW_PORT_ACTION_GET_PORT_INFO) {
+			csio_err(hw, "Unhandled FW_PORT_CMD action: %u\n",
+				action);
+			return -EINVAL;
+		}
+
+		link_status = ntohl(pcmd->u.info.lstatus_to_modtype);
+		mod_type = FW_PORT_CMD_MODTYPE_GET(link_status);
+
+		hw->pport[port_id].link_status =
+			FW_PORT_CMD_LSTATUS_GET(link_status);
+		hw->pport[port_id].link_speed =
+			FW_PORT_CMD_LSPEED_GET(link_status);
+
+		csio_info(hw, "Port:%x - LINK %s\n", port_id,
+			FW_PORT_CMD_LSTATUS_GET(link_status) ? "UP" : "DOWN");
+
+		if (mod_type != hw->pport[port_id].mod_type) {
+			hw->pport[port_id].mod_type = mod_type;
+			csio_mb_portmod_changed(hw, port_id);
+		}
+	} else if (opcode == FW_DEBUG_CMD) {
+		csio_mb_dump_fw_dbg(hw, cmd);
+	} else {
+		csio_dbg(hw, "Gen MB can't handle op:0x%x on evtq.\n", opcode);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * csio_mb_isr_handler - Handle mailboxes related interrupts.
+ * @hw: The HW structure
+ *
+ * Called from the ISR to handle Mailbox related interrupts.
+ * HW Lock should be held across this call.
+ */
+int
+csio_mb_isr_handler(struct csio_hw *hw)
+{
+	struct csio_mbm		*mbm = &hw->mbm;
+	struct csio_mb		*mbp =  mbm->mcurrent;
+	__be64			*cmd;
+	uint32_t		ctl, cim_cause, pl_cause;
+	int			i;
+	uint32_t		ctl_reg = PF_REG(hw->pfn, CIM_PF_MAILBOX_CTRL);
+	uint32_t		data_reg = PF_REG(hw->pfn, CIM_PF_MAILBOX_DATA);
+	int			size;
+	__be64			hdr;
+	struct fw_cmd_hdr	*fw_hdr;
+
+	pl_cause = csio_rd_reg32(hw, MYPF_REG(PL_PF_INT_CAUSE));
+	cim_cause = csio_rd_reg32(hw, MYPF_REG(CIM_PF_HOST_INT_CAUSE));
+
+	if (!(pl_cause & PFCIM) || !(cim_cause & MBMSGRDYINT)) {
+		CSIO_INC_STATS(hw, n_mbint_unexp);
+		return -EINVAL;
+	}
+
+	/*
+	 * The cause registers below HAVE to be cleared in the SAME
+	 * order as below: The low level cause register followed by
+	 * the upper level cause register. In other words, CIM-cause
+	 * first followed by PL-Cause next.
+	 */
+	csio_wr_reg32(hw, MBMSGRDYINT, MYPF_REG(CIM_PF_HOST_INT_CAUSE));
+	csio_wr_reg32(hw, PFCIM, MYPF_REG(PL_PF_INT_CAUSE));
+
+	ctl = csio_rd_reg32(hw, ctl_reg);
+
+	if (csio_mb_is_host_owner(MBOWNER_GET(ctl))) {
+
+		CSIO_DUMP_MB(hw, hw->pfn, data_reg);
+
+		if (!(ctl & MBMSGVALID)) {
+			csio_warn(hw,
+				  "Stray mailbox interrupt recvd,"
+				  " mailbox data not valid\n");
+			csio_wr_reg32(hw, 0, ctl_reg);
+			/* Flush */
+			csio_rd_reg32(hw, ctl_reg);
+			return -EINVAL;
+		}
+
+		hdr = cpu_to_be64(csio_rd_reg64(hw, data_reg));
+		fw_hdr = (struct fw_cmd_hdr *)&hdr;
+
+		switch (FW_CMD_OP_GET(ntohl(fw_hdr->hi))) {
+		case FW_DEBUG_CMD:
+			csio_mb_debug_cmd_handler(hw);
+			return -EINVAL;
+#if 0
+		case FW_ERROR_CMD:
+		case FW_INITIALIZE_CMD: /* When we are not master */
+#endif
+		}
+
+		CSIO_ASSERT(mbp != NULL);
+
+		cmd = mbp->mb;
+		size = mbp->mb_size;
+		/* Get response */
+		for (i = 0; i < size; i += 8)
+			*cmd++ = cpu_to_be64(csio_rd_reg64
+						  (hw, data_reg + i));
+
+		csio_wr_reg32(hw, 0, ctl_reg);
+		/* Flush */
+		csio_rd_reg32(hw, ctl_reg);
+
+		mbm->mcurrent = NULL;
+
+		/* Add completion to tail of cbfn queue */
+		list_add_tail(&mbp->list, &mbm->cbfn_q);
+		CSIO_INC_STATS(mbm, n_cbfnq);
+
+		/*
+		 * Enqueue event to EventQ. Events processing happens
+		 * in Event worker thread context
+		 */
+		if (csio_enqueue_evt(hw, CSIO_EVT_MBX, mbp, sizeof(mbp)))
+			CSIO_INC_STATS(hw, n_evt_drop);
+
+		return 0;
+
+	} else {
+		/*
+		 * We can get here if mailbox MSIX vector is shared,
+		 * or in INTx case. Or a stray interrupt.
+		 */
+		csio_dbg(hw, "Host not owner, no mailbox interrupt\n");
+		CSIO_INC_STATS(hw, n_int_stray);
+		return -EINVAL;
+	}
+}
+
+/*
+ * csio_mb_tmo_handler - Timeout handler
+ * @hw: The HW structure
+ *
+ */
+struct csio_mb *
+csio_mb_tmo_handler(struct csio_hw *hw)
+{
+	struct csio_mbm *mbm = &hw->mbm;
+	struct csio_mb *mbp =  mbm->mcurrent;
+	struct fw_cmd_hdr *fw_hdr;
+
+	/*
+	 * Could be a race b/w the completion handler and the timer
+	 * and the completion handler won that race.
+	 */
+	if (mbp == NULL) {
+		CSIO_DB_ASSERT(0);
+		return NULL;
+	}
+
+	fw_hdr = (struct fw_cmd_hdr *)(mbp->mb);
+
+	csio_dbg(hw, "Mailbox num:%x op:0x%x timed out\n", hw->pfn,
+		    FW_CMD_OP_GET(ntohl(fw_hdr->hi)));
+
+	mbm->mcurrent = NULL;
+	CSIO_INC_STATS(mbm, n_tmo);
+	fw_hdr->lo = htonl(FW_CMD_RETVAL(FW_ETIMEDOUT));
+
+	return mbp;
+}
+
+/*
+ * csio_mb_cancel_all - Cancel all waiting commands.
+ * @hw: The HW structure
+ * @cbfn_q: The callback queue.
+ *
+ * Caller should hold hw lock across this call.
+ */
+void
+csio_mb_cancel_all(struct csio_hw *hw, struct list_head *cbfn_q)
+{
+	struct csio_mb *mbp;
+	struct csio_mbm *mbm = &hw->mbm;
+	struct fw_cmd_hdr *hdr;
+	struct list_head *tmp;
+
+	if (mbm->mcurrent) {
+		mbp = mbm->mcurrent;
+
+		/* Stop mailbox completion timer */
+		del_timer_sync(&mbm->timer);
+
+		/* Add completion to tail of cbfn queue */
+		list_add_tail(&mbp->list, cbfn_q);
+		mbm->mcurrent = NULL;
+	}
+
+	if (!list_empty(&mbm->req_q)) {
+		list_splice_tail_init(&mbm->req_q, cbfn_q);
+		mbm->stats.n_activeq = 0;
+	}
+
+	if (!list_empty(&mbm->cbfn_q)) {
+		list_splice_tail_init(&mbm->cbfn_q, cbfn_q);
+		mbm->stats.n_cbfnq = 0;
+	}
+
+	if (list_empty(cbfn_q))
+		return;
+
+	list_for_each(tmp, cbfn_q) {
+		mbp = (struct csio_mb *)tmp;
+		hdr = (struct fw_cmd_hdr *)(mbp->mb);
+
+		csio_dbg(hw, "Cancelling pending mailbox num %x op:%x\n",
+			    hw->pfn, FW_CMD_OP_GET(ntohl(hdr->hi)));
+
+		CSIO_INC_STATS(mbm, n_cancel);
+		hdr->lo = htonl(FW_CMD_RETVAL(FW_HOSTERROR));
+	}
+}
+
+/*
+ * csio_mbm_init - Initialize Mailbox module
+ * @mbm: Mailbox module
+ * @hw: The HW structure
+ * @timer: Timing function for interrupting mailboxes
+ *
+ * Initialize timer and the request/response queues.
+ */
+int
+csio_mbm_init(struct csio_mbm *mbm, struct csio_hw *hw,
+	      void (*timer_fn)(uintptr_t))
+{
+	struct timer_list *timer = &mbm->timer;
+
+	init_timer(timer);
+	timer->function = timer_fn;
+	timer->data = (unsigned long)hw;
+
+	INIT_LIST_HEAD(&mbm->req_q);
+	INIT_LIST_HEAD(&mbm->cbfn_q);
+	csio_set_mb_intr_idx(mbm, -1);
+
+	return 0;
+}
+
+/*
+ * csio_mbm_exit - Uninitialize mailbox module
+ * @mbm: Mailbox module
+ *
+ * Stop timer.
+ */
+void
+csio_mbm_exit(struct csio_mbm *mbm)
+{
+	del_timer_sync(&mbm->timer);
+
+	CSIO_DB_ASSERT(mbm->mcurrent == NULL);
+	CSIO_DB_ASSERT(list_empty(&mbm->req_q));
+	CSIO_DB_ASSERT(list_empty(&mbm->cbfn_q));
+}
-- 
1.7.1


^ permalink raw reply related

* [V4 PATCH 4/8] csiostor: Chelsio FCoE offload driver submission (sources part 1).
From: Naresh Kumar Inna @ 2012-09-12 17:18 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1347470328-32490-1-git-send-email-naresh@chelsio.com>

This patch contains code for driver initialization, driver resource
allocation and the Work Request module functionality. Driver initialization
includes module entry/exit points, registration with PCI, FC transport and
SCSI mid layer subsystems. The Work Request module provides services for
allocation of DMA queues, posting Work Requests on them and processing
completions.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
V2: Removed module parameters.
V3: Corrected comment.

 drivers/scsi/csiostor/csio_init.c | 1272 +++++++++++++++++++++++++++++
 drivers/scsi/csiostor/csio_wr.c   | 1632 +++++++++++++++++++++++++++++++++++++
 2 files changed, 2904 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_init.c
 create mode 100644 drivers/scsi/csiostor/csio_wr.c

diff --git a/drivers/scsi/csiostor/csio_init.c b/drivers/scsi/csiostor/csio_init.c
new file mode 100644
index 0000000..1864250
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_init.c
@@ -0,0 +1,1272 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/mm.h>
+#include <linux/notifier.h>
+#include <linux/kdebug.h>
+#include <linux/version.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
+#include <linux/string.h>
+#include <linux/export.h>
+
+#include "csio_init.h"
+#include "csio_defs.h"
+
+#define CSIO_MIN_MEMPOOL_SZ	64
+
+static struct dentry *csio_debugfs_root;
+
+static struct scsi_transport_template *csio_fcoe_transport;
+static struct scsi_transport_template *csio_fcoe_transport_vport;
+
+/*
+ * debugfs support
+ */
+static int
+csio_mem_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	return 0;
+}
+
+static ssize_t
+csio_mem_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	loff_t pos = *ppos;
+	loff_t avail = file->f_path.dentry->d_inode->i_size;
+	unsigned int mem = (uintptr_t)file->private_data & 3;
+	struct csio_hw *hw = file->private_data - mem;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= avail)
+		return 0;
+	if (count > avail - pos)
+		count = avail - pos;
+
+	while (count) {
+		size_t len;
+		int ret, ofst;
+		__be32 data[16];
+
+		if (mem == MEM_MC)
+			ret = csio_hw_mc_read(hw, pos, data, NULL);
+		else
+			ret = csio_hw_edc_read(hw, mem, pos, data, NULL);
+		if (ret)
+			return ret;
+
+		ofst = pos % sizeof(data);
+		len = min(count, sizeof(data) - ofst);
+		if (copy_to_user(buf, (u8 *)data + ofst, len))
+			return -EFAULT;
+
+		buf += len;
+		pos += len;
+		count -= len;
+	}
+	count = pos - *ppos;
+	*ppos = pos;
+	return count;
+}
+
+static const struct file_operations csio_mem_debugfs_fops = {
+	.owner   = THIS_MODULE,
+	.open    = csio_mem_open,
+	.read    = csio_mem_read,
+	.llseek  = default_llseek,
+};
+
+static void __devinit
+csio_add_debugfs_mem(struct csio_hw *hw, const char *name,
+		     unsigned int idx, unsigned int size_mb)
+{
+	struct dentry *de;
+
+	de = debugfs_create_file(name, S_IRUSR, hw->debugfs_root,
+				 (void *)hw + idx, &csio_mem_debugfs_fops);
+	if (de && de->d_inode)
+		de->d_inode->i_size = size_mb << 20;
+}
+
+static int __devinit
+csio_setup_debugfs(struct csio_hw *hw)
+{
+	int i;
+
+	if (IS_ERR_OR_NULL(hw->debugfs_root))
+		return -1;
+
+	i = csio_rd_reg32(hw, MA_TARGET_MEM_ENABLE);
+	if (i & EDRAM0_ENABLE)
+		csio_add_debugfs_mem(hw, "edc0", MEM_EDC0, 5);
+	if (i & EDRAM1_ENABLE)
+		csio_add_debugfs_mem(hw, "edc1", MEM_EDC1, 5);
+	if (i & EXT_MEM_ENABLE)
+		csio_add_debugfs_mem(hw, "mc", MEM_MC,
+		      EXT_MEM_SIZE_GET(csio_rd_reg32(hw, MA_EXT_MEMORY_BAR)));
+	return 0;
+}
+
+/*
+ * csio_dfs_create - Creates and sets up per-hw debugfs.
+ *
+ */
+static int
+csio_dfs_create(struct csio_hw *hw)
+{
+	if (csio_debugfs_root) {
+		hw->debugfs_root = debugfs_create_dir(pci_name(hw->pdev),
+							csio_debugfs_root);
+		csio_setup_debugfs(hw);
+	}
+
+	return 0;
+}
+
+/*
+ * csio_dfs_destroy - Destroys per-hw debugfs.
+ */
+static int
+csio_dfs_destroy(struct csio_hw *hw)
+{
+	if (hw->debugfs_root)
+		debugfs_remove_recursive(hw->debugfs_root);
+
+	return 0;
+}
+
+/*
+ * csio_dfs_init - Debug filesystem initialization for the module.
+ *
+ */
+static int
+csio_dfs_init(void)
+{
+	csio_debugfs_root = debugfs_create_dir(KBUILD_MODNAME, NULL);
+	if (!csio_debugfs_root)
+		pr_warn("Could not create debugfs entry, continuing\n");
+
+	return 0;
+}
+
+/*
+ * csio_dfs_exit - debugfs cleanup for the module.
+ */
+static void
+csio_dfs_exit(void)
+{
+	debugfs_remove(csio_debugfs_root);
+}
+
+/*
+ * csio_pci_init - PCI initialization.
+ * @pdev: PCI device.
+ * @bars: Bitmask of bars to be requested.
+ *
+ * Initializes the PCI function by enabling MMIO, setting bus
+ * mastership and setting DMA mask.
+ */
+static int
+csio_pci_init(struct pci_dev *pdev, int *bars)
+{
+	int rv = -ENODEV;
+
+	*bars = pci_select_bars(pdev, IORESOURCE_MEM);
+
+	if (pci_enable_device_mem(pdev))
+		goto err;
+
+	if (pci_request_selected_regions(pdev, *bars, KBUILD_MODNAME))
+		goto err_disable_device;
+
+	pci_set_master(pdev);
+	pci_try_set_mwi(pdev);
+
+	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
+		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	} else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32))) {
+		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+	} else {
+		dev_err(&pdev->dev, "No suitable DMA available.\n");
+		goto err_release_regions;
+	}
+
+	return 0;
+
+err_release_regions:
+	pci_release_selected_regions(pdev, *bars);
+err_disable_device:
+	pci_disable_device(pdev);
+err:
+	return rv;
+
+}
+
+/*
+ * csio_pci_exit - PCI unitialization.
+ * @pdev: PCI device.
+ * @bars: Bars to be released.
+ *
+ */
+static void
+csio_pci_exit(struct pci_dev *pdev, int *bars)
+{
+	pci_release_selected_regions(pdev, *bars);
+	pci_disable_device(pdev);
+}
+
+/*
+ * csio_hw_init_workers - Initialize the HW module's worker threads.
+ * @hw: HW module.
+ *
+ */
+static void
+csio_hw_init_workers(struct csio_hw *hw)
+{
+	INIT_WORK(&hw->evtq_work, csio_evtq_worker);
+}
+
+static void
+csio_hw_exit_workers(struct csio_hw *hw)
+{
+	cancel_work_sync(&hw->evtq_work);
+	flush_scheduled_work();
+}
+
+static int
+csio_create_queues(struct csio_hw *hw)
+{
+	int i, j;
+	struct csio_mgmtm *mgmtm = csio_hw_to_mgmtm(hw);
+	int rv;
+	struct csio_scsi_cpu_info *info;
+
+	if (hw->flags & CSIO_HWF_Q_FW_ALLOCED)
+		return 0;
+
+	if (hw->intr_mode != CSIO_IM_MSIX) {
+		rv = csio_wr_iq_create(hw, NULL, hw->intr_iq_idx,
+					0, hw->pport[0].portid, false, NULL);
+		if (rv != 0) {
+			csio_err(hw, " Forward Interrupt IQ failed!: %d\n", rv);
+			return rv;
+		}
+	}
+
+	/* FW event queue */
+	rv = csio_wr_iq_create(hw, NULL, hw->fwevt_iq_idx,
+			       csio_get_fwevt_intr_idx(hw),
+			       hw->pport[0].portid, true, NULL);
+	if (rv != 0) {
+		csio_err(hw, "FW event IQ config failed!: %d\n", rv);
+		return rv;
+	}
+
+	/* Create mgmt queue */
+	rv = csio_wr_eq_create(hw, NULL, mgmtm->eq_idx,
+			mgmtm->iq_idx, hw->pport[0].portid, NULL);
+
+	if (rv != 0) {
+		csio_err(hw, "Mgmt EQ create failed!: %d\n", rv);
+		goto err;
+	}
+
+	/* Create SCSI queues */
+	for (i = 0; i < hw->num_pports; i++) {
+		info = &hw->scsi_cpu_info[i];
+
+		for (j = 0; j < info->max_cpus; j++) {
+			struct csio_scsi_qset *sqset = &hw->sqset[i][j];
+
+			rv = csio_wr_iq_create(hw, NULL, sqset->iq_idx,
+					       sqset->intr_idx, i, false, NULL);
+			if (rv != 0) {
+				csio_err(hw,
+				   "SCSI module IQ config failed [%d][%d]:%d\n",
+				   i, j, rv);
+				goto err;
+			}
+			rv = csio_wr_eq_create(hw, NULL, sqset->eq_idx,
+					       sqset->iq_idx, i, NULL);
+			if (rv != 0) {
+				csio_err(hw,
+				   "SCSI module EQ config failed [%d][%d]:%d\n",
+				   i, j, rv);
+				goto err;
+			}
+		} /* for all CPUs */
+	} /* For all ports */
+
+	hw->flags |= CSIO_HWF_Q_FW_ALLOCED;
+	return 0;
+err:
+	csio_wr_destroy_queues(hw, true);
+	return -EINVAL;
+}
+
+/*
+ * csio_config_queues - Configure the DMA queues.
+ * @hw: HW module.
+ *
+ * Allocates memory for queues are registers them with FW.
+ */
+int
+csio_config_queues(struct csio_hw *hw)
+{
+	int i, j, idx, k = 0;
+	int rv;
+	struct csio_scsi_qset *sqset;
+	struct csio_mgmtm *mgmtm = csio_hw_to_mgmtm(hw);
+	struct csio_scsi_qset *orig;
+	struct csio_scsi_cpu_info *info;
+
+	if (hw->flags & CSIO_HWF_Q_MEM_ALLOCED)
+		return csio_create_queues(hw);
+
+	/* Calculate number of SCSI queues for MSIX we would like */
+	hw->num_scsi_msix_cpus = num_online_cpus();
+	hw->num_sqsets = num_online_cpus() * hw->num_pports;
+
+	if (hw->num_sqsets > CSIO_MAX_SCSI_QSETS) {
+		hw->num_sqsets = CSIO_MAX_SCSI_QSETS;
+		hw->num_scsi_msix_cpus = CSIO_MAX_SCSI_CPU;
+	}
+
+	/* Initialize max_cpus, may get reduced during msix allocations */
+	for (i = 0; i < hw->num_pports; i++)
+		hw->scsi_cpu_info[i].max_cpus = hw->num_scsi_msix_cpus;
+
+	csio_dbg(hw, "nsqsets:%d scpus:%d\n",
+		    hw->num_sqsets, hw->num_scsi_msix_cpus);
+
+	csio_intr_enable(hw);
+
+	if (hw->intr_mode != CSIO_IM_MSIX) {
+
+		/* Allocate Forward interrupt iq. */
+		hw->intr_iq_idx = csio_wr_alloc_q(hw, CSIO_INTR_IQSIZE,
+						CSIO_INTR_WRSIZE, CSIO_INGRESS,
+						(void *)hw, 0, 0, NULL);
+		if (hw->intr_iq_idx == -1) {
+			csio_err(hw,
+				 "Forward interrupt queue creation failed\n");
+			goto intr_disable;
+		}
+	}
+
+	/* Allocate the FW evt queue */
+	hw->fwevt_iq_idx = csio_wr_alloc_q(hw, CSIO_FWEVT_IQSIZE,
+					   CSIO_FWEVT_WRSIZE,
+					   CSIO_INGRESS, (void *)hw,
+					   CSIO_FWEVT_FLBUFS, 0,
+					   csio_fwevt_intx_handler);
+	if (hw->fwevt_iq_idx == -1) {
+		csio_err(hw, "FW evt queue creation failed\n");
+		goto intr_disable;
+	}
+
+	/* Allocate the mgmt queue */
+	mgmtm->eq_idx = csio_wr_alloc_q(hw, CSIO_MGMT_EQSIZE,
+				      CSIO_MGMT_EQ_WRSIZE,
+				      CSIO_EGRESS, (void *)hw, 0, 0, NULL);
+	if (mgmtm->eq_idx == -1) {
+		csio_err(hw, "Failed to alloc egress queue for mgmt module\n");
+		goto intr_disable;
+	}
+
+	/* Use FW IQ for MGMT req completion */
+	mgmtm->iq_idx = hw->fwevt_iq_idx;
+
+	/* Allocate SCSI queues */
+	for (i = 0; i < hw->num_pports; i++) {
+		info = &hw->scsi_cpu_info[i];
+
+		for (j = 0; j < hw->num_scsi_msix_cpus; j++) {
+			sqset = &hw->sqset[i][j];
+
+			if (j >= info->max_cpus) {
+				k = j % info->max_cpus;
+				orig = &hw->sqset[i][k];
+				sqset->eq_idx = orig->eq_idx;
+				sqset->iq_idx = orig->iq_idx;
+				continue;
+			}
+
+			idx = csio_wr_alloc_q(hw, csio_scsi_eqsize, 0,
+					      CSIO_EGRESS, (void *)hw, 0, 0,
+					      NULL);
+			if (idx == -1) {
+				csio_err(hw, "EQ creation failed for idx:%d\n",
+					    idx);
+				goto intr_disable;
+			}
+
+			sqset->eq_idx = idx;
+
+			idx = csio_wr_alloc_q(hw, CSIO_SCSI_IQSIZE,
+					     CSIO_SCSI_IQ_WRSZ, CSIO_INGRESS,
+					     (void *)hw, 0, 0,
+					     csio_scsi_intx_handler);
+			if (idx == -1) {
+				csio_err(hw, "IQ creation failed for idx:%d\n",
+					    idx);
+				goto intr_disable;
+			}
+			sqset->iq_idx = idx;
+		} /* for all CPUs */
+	} /* For all ports */
+
+	hw->flags |= CSIO_HWF_Q_MEM_ALLOCED;
+
+	rv = csio_create_queues(hw);
+	if (rv != 0)
+		goto intr_disable;
+
+	/*
+	 * Now request IRQs for the vectors. In the event of a failure,
+	 * cleanup is handled internally by this function.
+	 */
+	rv = csio_request_irqs(hw);
+	if (rv != 0)
+		return -EINVAL;
+
+	return 0;
+
+intr_disable:
+	csio_intr_disable(hw, false);
+
+	return -EINVAL;
+}
+
+static int
+csio_resource_alloc(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	int rv = -ENOMEM;
+
+	wrm->num_q = ((CSIO_MAX_SCSI_QSETS * 2) + CSIO_HW_NIQ +
+		       CSIO_HW_NEQ + CSIO_HW_NFLQ + CSIO_HW_NINTXQ);
+
+	hw->mb_mempool = mempool_create_kmalloc_pool(CSIO_MIN_MEMPOOL_SZ,
+						  sizeof(struct csio_mb));
+	if (!hw->mb_mempool)
+		goto err;
+
+	hw->rnode_mempool = mempool_create_kmalloc_pool(CSIO_MIN_MEMPOOL_SZ,
+						     sizeof(struct csio_rnode));
+	if (!hw->rnode_mempool)
+		goto err_free_mb_mempool;
+
+	hw->scsi_pci_pool = pci_pool_create("csio_scsi_pci_pool", hw->pdev,
+					    CSIO_SCSI_RSP_LEN, 8, 0);
+	if (!hw->scsi_pci_pool)
+		goto err_free_rn_pool;
+
+	return 0;
+
+err_free_rn_pool:
+	mempool_destroy(hw->rnode_mempool);
+	hw->rnode_mempool = NULL;
+err_free_mb_mempool:
+	mempool_destroy(hw->mb_mempool);
+	hw->mb_mempool = NULL;
+err:
+	return rv;
+}
+
+static void
+csio_resource_free(struct csio_hw *hw)
+{
+	pci_pool_destroy(hw->scsi_pci_pool);
+	hw->scsi_pci_pool = NULL;
+	mempool_destroy(hw->rnode_mempool);
+	hw->rnode_mempool = NULL;
+	mempool_destroy(hw->mb_mempool);
+	hw->mb_mempool = NULL;
+}
+
+/*
+ * csio_hw_alloc - Allocate and initialize the HW module.
+ * @pdev: PCI device.
+ *
+ * Allocates HW structure, DMA, memory resources, maps BARS to
+ * host memory and initializes HW module.
+ */
+static struct csio_hw * __devinit
+csio_hw_alloc(struct pci_dev *pdev)
+{
+	struct csio_hw *hw;
+
+	hw = kzalloc(sizeof(struct csio_hw), GFP_KERNEL);
+	if (!hw)
+		goto err;
+
+	hw->pdev = pdev;
+	strncpy(hw->drv_version, CSIO_DRV_VERSION, 32);
+
+	/* memory pool/DMA pool allocation */
+	if (csio_resource_alloc(hw))
+		goto err_free_hw;
+
+	/* Get the start address of registers from BAR 0 */
+	hw->regstart = ioremap_nocache(pci_resource_start(pdev, 0),
+				       pci_resource_len(pdev, 0));
+	if (!hw->regstart) {
+		csio_err(hw, "Could not map BAR 0, regstart = %p\n",
+			 hw->regstart);
+		goto err_resource_free;
+	}
+
+	csio_hw_init_workers(hw);
+
+	if (csio_hw_init(hw))
+		goto err_unmap_bar;
+
+	csio_dfs_create(hw);
+
+	csio_dbg(hw, "hw:%p\n", hw);
+
+	return hw;
+
+err_unmap_bar:
+	csio_hw_exit_workers(hw);
+	iounmap(hw->regstart);
+err_resource_free:
+	csio_resource_free(hw);
+err_free_hw:
+	kfree(hw);
+err:
+	return NULL;
+}
+
+/*
+ * csio_hw_free - Uninitialize and free the HW module.
+ * @hw: The HW module
+ *
+ * Disable interrupts, uninit the HW module, free resources, free hw.
+ */
+static void
+csio_hw_free(struct csio_hw *hw)
+{
+	csio_intr_disable(hw, true);
+	csio_hw_exit_workers(hw);
+	csio_hw_exit(hw);
+	iounmap(hw->regstart);
+	csio_dfs_destroy(hw);
+	csio_resource_free(hw);
+	kfree(hw);
+}
+
+/**
+ * csio_shost_init - Create and initialize the lnode module.
+ * @hw:		The HW module.
+ * @dev:	The device associated with this invocation.
+ * @probe:	Called from probe context or not?
+ * @os_pln:	Parent lnode if any.
+ *
+ * Allocates lnode structure via scsi_host_alloc, initializes
+ * shost, initializes lnode module and registers with SCSI ML
+ * via scsi_host_add. This function is shared between physical and
+ * virtual node ports.
+ */
+struct csio_lnode *
+csio_shost_init(struct csio_hw *hw, struct device *dev,
+		  bool probe, struct csio_lnode *pln)
+{
+	struct Scsi_Host  *shost = NULL;
+	struct csio_lnode *ln;
+
+	csio_fcoe_shost_template.cmd_per_lun = csio_lun_qdepth;
+	csio_fcoe_shost_vport_template.cmd_per_lun = csio_lun_qdepth;
+
+	/*
+	 * hw->pdev is the physical port's PCI dev structure,
+	 * which will be different from the NPIV dev structure.
+	 */
+	if (dev == &hw->pdev->dev)
+		shost = scsi_host_alloc(
+				&csio_fcoe_shost_template,
+				sizeof(struct csio_lnode));
+	else
+		shost = scsi_host_alloc(
+				&csio_fcoe_shost_vport_template,
+				sizeof(struct csio_lnode));
+
+	if (!shost)
+		goto err;
+
+	ln = shost_priv(shost);
+	memset(ln, 0, sizeof(struct csio_lnode));
+
+	/* Link common lnode to this lnode */
+	ln->dev_num = (shost->host_no << 16);
+
+	shost->can_queue = CSIO_MAX_QUEUE;
+	shost->this_id = -1;
+	shost->unique_id = shost->host_no;
+	shost->max_cmd_len = 16; /* Max CDB length supported */
+	shost->max_id = min_t(uint32_t, csio_fcoe_rnodes,
+			      hw->fres_info.max_ssns);
+	shost->max_lun = CSIO_MAX_LUN;
+	if (dev == &hw->pdev->dev)
+		shost->transportt = csio_fcoe_transport;
+	else
+		shost->transportt = csio_fcoe_transport_vport;
+
+	/* root lnode */
+	if (!hw->rln)
+		hw->rln = ln;
+
+	/* Other initialization here: Common, Transport specific */
+	if (csio_lnode_init(ln, hw, pln))
+		goto err_shost_put;
+
+	if (scsi_add_host(shost, dev))
+		goto err_lnode_exit;
+
+	return ln;
+
+err_lnode_exit:
+	csio_lnode_exit(ln);
+err_shost_put:
+	scsi_host_put(shost);
+err:
+	return NULL;
+}
+
+/**
+ * csio_shost_exit - De-instantiate the shost.
+ * @ln:		The lnode module corresponding to the shost.
+ *
+ */
+void
+csio_shost_exit(struct csio_lnode *ln)
+{
+	struct Scsi_Host *shost = csio_ln_to_shost(ln);
+	struct csio_hw *hw = csio_lnode_to_hw(ln);
+
+	/* Inform transport */
+	fc_remove_host(shost);
+
+	/* Inform SCSI ML */
+	scsi_remove_host(shost);
+
+	/* Flush all the events, so that any rnode removal events
+	 * already queued are all handled, before we remove the lnode.
+	 */
+	spin_lock_irq(&hw->lock);
+	csio_evtq_flush(hw);
+	spin_unlock_irq(&hw->lock);
+
+	csio_lnode_exit(ln);
+	scsi_host_put(shost);
+}
+
+struct csio_lnode *
+csio_lnode_alloc(struct csio_hw *hw)
+{
+	return csio_shost_init(hw, &hw->pdev->dev, false, NULL);
+}
+
+void
+csio_lnodes_block_request(struct csio_hw *hw)
+{
+	struct Scsi_Host  *shost;
+	struct csio_lnode *sln;
+	struct csio_lnode *ln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "Failed to allocate lnodes_list");
+		return;
+	}
+
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		lnode_list[cur_cnt++] = sln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "Blocking IOs on lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		shost = csio_ln_to_shost(ln);
+		scsi_block_requests(shost);
+
+	}
+	kfree(lnode_list);
+}
+
+void
+csio_lnodes_unblock_request(struct csio_hw *hw)
+{
+	struct csio_lnode *ln;
+	struct Scsi_Host  *shost;
+	struct csio_lnode *sln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "Failed to allocate lnodes_list");
+		return;
+	}
+
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		lnode_list[cur_cnt++] = sln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "unblocking IOs on lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		shost = csio_ln_to_shost(ln);
+		scsi_unblock_requests(shost);
+	}
+	kfree(lnode_list);
+}
+
+void
+csio_lnodes_block_by_port(struct csio_hw *hw, uint8_t portid)
+{
+	struct csio_lnode *ln;
+	struct Scsi_Host  *shost;
+	struct csio_lnode *sln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "Failed to allocate lnodes_list");
+		return;
+	}
+
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		if (sln->portid != portid)
+			continue;
+
+		lnode_list[cur_cnt++] = sln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "Blocking IOs on lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		shost = csio_ln_to_shost(ln);
+		scsi_block_requests(shost);
+	}
+	kfree(lnode_list);
+}
+
+void
+csio_lnodes_unblock_by_port(struct csio_hw *hw, uint8_t portid)
+{
+	struct csio_lnode *ln;
+	struct Scsi_Host  *shost;
+	struct csio_lnode *sln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "Failed to allocate lnodes_list");
+		return;
+	}
+
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		if (sln->portid != portid)
+			continue;
+		lnode_list[cur_cnt++] = sln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "unblocking IOs on lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		shost = csio_ln_to_shost(ln);
+		scsi_unblock_requests(shost);
+	}
+	kfree(lnode_list);
+}
+
+void
+csio_lnodes_exit(struct csio_hw *hw, bool npiv)
+{
+	struct csio_lnode *sln;
+	struct csio_lnode *ln;
+	struct list_head *cur_ln, *cur_cln;
+	struct csio_lnode **lnode_list;
+	int cur_cnt = 0, ii;
+
+	lnode_list = kzalloc((sizeof(struct csio_lnode *) * hw->num_lns),
+			GFP_KERNEL);
+	if (!lnode_list) {
+		csio_err(hw, "lnodes_exit: Failed to allocate lnodes_list.\n");
+		return;
+	}
+
+	/* Get all child lnodes(NPIV ports) */
+	spin_lock_irq(&hw->lock);
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+
+		/* Traverse children lnodes */
+		list_for_each(cur_cln, &sln->cln_head)
+			lnode_list[cur_cnt++] = (struct csio_lnode *) cur_cln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	/* Delete NPIV lnodes */
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "Deleting child lnode: %p\n", lnode_list[ii]);
+		ln = lnode_list[ii];
+		fc_vport_terminate(ln->fc_vport);
+	}
+
+	/* Delete only npiv lnodes */
+	if (npiv)
+		goto free_lnodes;
+
+	cur_cnt = 0;
+	/* Get all physical lnodes */
+	spin_lock_irq(&hw->lock);
+	/* Traverse sibling lnodes */
+	list_for_each(cur_ln, &hw->sln_head) {
+		sln = (struct csio_lnode *) cur_ln;
+		lnode_list[cur_cnt++] = sln;
+	}
+	spin_unlock_irq(&hw->lock);
+
+	/* Delete physical lnodes */
+	for (ii = 0; ii < cur_cnt; ii++) {
+		csio_dbg(hw, "Deleting parent lnode: %p\n", lnode_list[ii]);
+		csio_shost_exit(lnode_list[ii]);
+	}
+
+free_lnodes:
+	kfree(lnode_list);
+}
+
+/*
+ * csio_lnode_init_post: Set lnode attributes after starting HW.
+ * @ln: lnode.
+ *
+ */
+static void
+csio_lnode_init_post(struct csio_lnode *ln)
+{
+	struct Scsi_Host  *shost = csio_ln_to_shost(ln);
+
+	csio_fchost_attr_init(ln);
+
+	scsi_scan_host(shost);
+}
+
+/*
+ * csio_probe_one - Instantiate this function.
+ * @pdev: PCI device
+ * @id: Device ID
+ *
+ * This is the .probe() callback of the driver. This function:
+ * - Initializes the PCI function by enabling MMIO, setting bus
+ *   mastership and setting DMA mask.
+ * - Allocates HW structure, DMA, memory resources, maps BARS to
+ *   host memory and initializes HW module.
+ * - Allocates lnode structure via scsi_host_alloc, initializes
+ *   shost, initialized lnode module and registers with SCSI ML
+ *   via scsi_host_add.
+ * - Enables interrupts, and starts the chip by kicking off the
+ *   HW state machine.
+ * - Once hardware is ready, initiated scan of the host via
+ *   scsi_scan_host.
+ */
+static int __devinit
+csio_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	int rv;
+	int bars;
+	int i;
+	struct csio_hw *hw;
+	struct csio_lnode *ln;
+
+	rv = csio_pci_init(pdev, &bars);
+	if (rv)
+		goto err;
+
+	hw = csio_hw_alloc(pdev);
+	if (!hw) {
+		rv = -ENODEV;
+		goto err_pci_exit;
+	}
+
+	pci_set_drvdata(pdev, hw);
+
+	if (csio_hw_start(hw) != 0) {
+		dev_err(&pdev->dev,
+			"Failed to start FW, continuing in debug mode.\n");
+		return 0;
+	}
+
+	sprintf(hw->fwrev_str, "%u.%u.%u.%u\n",
+		    FW_HDR_FW_VER_MAJOR_GET(hw->fwrev),
+		    FW_HDR_FW_VER_MINOR_GET(hw->fwrev),
+		    FW_HDR_FW_VER_MICRO_GET(hw->fwrev),
+		    FW_HDR_FW_VER_BUILD_GET(hw->fwrev));
+
+	for (i = 0; i < hw->num_pports; i++) {
+		ln = csio_shost_init(hw, &pdev->dev, true, NULL);
+		if (!ln) {
+			rv = -ENODEV;
+			break;
+		}
+		/* Initialize portid */
+		ln->portid = hw->pport[i].portid;
+
+		spin_lock_irq(&hw->lock);
+		if (csio_lnode_start(ln) != 0)
+			rv = -ENODEV;
+		spin_unlock_irq(&hw->lock);
+
+		if (rv)
+			break;
+
+		csio_lnode_init_post(ln);
+	}
+
+	if (rv)
+		goto err_lnode_exit;
+
+	return 0;
+
+err_lnode_exit:
+	csio_lnodes_block_request(hw);
+	spin_lock_irq(&hw->lock);
+	csio_hw_stop(hw);
+	spin_unlock_irq(&hw->lock);
+	csio_lnodes_unblock_request(hw);
+	pci_set_drvdata(hw->pdev, NULL);
+	csio_lnodes_exit(hw, 0);
+	csio_hw_free(hw);
+err_pci_exit:
+	csio_pci_exit(pdev, &bars);
+err:
+	dev_err(&pdev->dev, "probe of device failed: %d\n", rv);
+	return rv;
+}
+
+/*
+ * csio_remove_one - Remove one instance of the driver at this PCI function.
+ * @pdev: PCI device
+ *
+ * Used during hotplug operation.
+ */
+static void __devexit
+csio_remove_one(struct pci_dev *pdev)
+{
+	struct csio_hw *hw = pci_get_drvdata(pdev);
+	int bars = pci_select_bars(pdev, IORESOURCE_MEM);
+
+	csio_lnodes_block_request(hw);
+	spin_lock_irq(&hw->lock);
+
+	/* Stops lnode, Rnode s/m
+	 * Quiesce IOs.
+	 * All sessions with remote ports are unregistered.
+	 */
+	csio_hw_stop(hw);
+	spin_unlock_irq(&hw->lock);
+	csio_lnodes_unblock_request(hw);
+
+	csio_lnodes_exit(hw, 0);
+	csio_hw_free(hw);
+	pci_set_drvdata(pdev, NULL);
+	csio_pci_exit(pdev, &bars);
+}
+
+/*
+ * csio_pci_error_detected - PCI error was detected
+ * @pdev: PCI device
+ *
+ */
+static pci_ers_result_t
+csio_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct csio_hw *hw = pci_get_drvdata(pdev);
+
+	csio_lnodes_block_request(hw);
+	spin_lock_irq(&hw->lock);
+
+	/* Post PCI error detected evt to HW s/m
+	 * HW s/m handles this evt by quiescing IOs, unregisters rports
+	 * and finally takes the device to offline.
+	 */
+	csio_post_event(&hw->sm, CSIO_HWE_PCIERR_DETECTED);
+	spin_unlock_irq(&hw->lock);
+	csio_lnodes_unblock_request(hw);
+	csio_lnodes_exit(hw, 0);
+	csio_intr_disable(hw, true);
+	pci_disable_device(pdev);
+	return state == pci_channel_io_perm_failure ?
+		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
+}
+
+/*
+ * csio_pci_slot_reset - PCI slot has been reset.
+ * @pdev: PCI device
+ *
+ */
+static pci_ers_result_t
+csio_pci_slot_reset(struct pci_dev *pdev)
+{
+	struct csio_hw *hw = pci_get_drvdata(pdev);
+
+	if (pci_enable_device(pdev)) {
+		dev_err(&pdev->dev, "cannot re-enable device in slot reset\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_set_master(pdev);
+	pci_restore_state(pdev);
+	pci_save_state(pdev);
+	pci_cleanup_aer_uncorrect_error_status(pdev);
+
+	/* Bring HW s/m to ready state.
+	 * but don't resume IOs.
+	 */
+	spin_lock_irq(&hw->lock);
+	csio_post_event(&hw->sm, CSIO_HWE_PCIERR_SLOT_RESET);
+	if (!csio_is_hw_ready(hw)) {
+		spin_unlock_irq(&hw->lock);
+		dev_err(&pdev->dev, "Can't initialize HW when in slot reset\n");
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+	spin_unlock_irq(&hw->lock);
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/*
+ * csio_pci_resume - Resume normal operations
+ * @pdev: PCI device
+ *
+ */
+static void
+csio_pci_resume(struct pci_dev *pdev)
+{
+	struct csio_hw *hw = pci_get_drvdata(pdev);
+	struct csio_lnode *ln;
+	int rv = 0;
+	int i;
+
+	/* Bring the LINK UP and Resume IO */
+
+	for (i = 0; i < hw->num_pports; i++) {
+		ln = csio_shost_init(hw, &pdev->dev, true, NULL);
+		if (!ln) {
+			rv = -ENODEV;
+			break;
+		}
+		/* Initialize portid */
+		ln->portid = hw->pport[i].portid;
+
+		spin_lock_irq(&hw->lock);
+		if (csio_lnode_start(ln) != 0)
+			rv = -ENODEV;
+		spin_unlock_irq(&hw->lock);
+
+		if (rv)
+			break;
+
+		csio_lnode_init_post(ln);
+	}
+
+	if (rv)
+		goto err_resume_exit;
+
+	return;
+
+err_resume_exit:
+	csio_lnodes_block_request(hw);
+	spin_lock_irq(&hw->lock);
+	csio_hw_stop(hw);
+	spin_unlock_irq(&hw->lock);
+	csio_lnodes_unblock_request(hw);
+	csio_lnodes_exit(hw, 0);
+	csio_hw_free(hw);
+	dev_err(&pdev->dev, "resume of device failed: %d\n", rv);
+}
+
+static struct pci_error_handlers csio_err_handler = {
+	.error_detected = csio_pci_error_detected,
+	.slot_reset	= csio_pci_slot_reset,
+	.resume		= csio_pci_resume,
+};
+
+static DEFINE_PCI_DEVICE_TABLE(csio_pci_tbl) = {
+	CSIO_DEVICE(CSIO_DEVID_T440DBG_FCOE, 0),	/* T440DBG FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420CR_FCOE, 0),		/* T420CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T422CR_FCOE, 0),		/* T422CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T440CR_FCOE, 0),		/* T440CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420BCH_FCOE, 0),	/* T420BCH FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T440BCH_FCOE, 0),	/* T440BCH FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T440CH_FCOE, 0),		/* T440CH FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420SO_FCOE, 0),		/* T420SO FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420CX_FCOE, 0),		/* T420CX FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T420BT_FCOE, 0),		/* T420BT FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T404BT_FCOE, 0),		/* T404BT FCOE */
+	CSIO_DEVICE(CSIO_DEVID_B420_FCOE, 0),		/* B420 FCOE */
+	CSIO_DEVICE(CSIO_DEVID_B404_FCOE, 0),		/* B404 FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T480CR_FCOE, 0),		/* T480 CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_T440LPCR_FCOE, 0),	/* T440 LP-CR FCOE */
+	CSIO_DEVICE(CSIO_DEVID_PE10K, 0),		/* PE10K FCOE */
+	CSIO_DEVICE(CSIO_DEVID_PE10K_PF1, 0),	/* PE10K FCOE on PF1 */
+	{ 0, 0, 0, 0, 0, 0, 0 }
+};
+
+
+static struct pci_driver csio_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.driver		= {
+		.owner	= THIS_MODULE,
+	},
+	.id_table	= csio_pci_tbl,
+	.probe		= csio_probe_one,
+	.remove		= csio_remove_one,
+	.err_handler	= &csio_err_handler,
+};
+
+/*
+ * csio_init - Chelsio storage driver initialization function.
+ *
+ */
+static int __init
+csio_init(void)
+{
+	int rv = -ENOMEM;
+
+	pr_info("%s %s\n", CSIO_DRV_DESC, CSIO_DRV_VERSION);
+
+	csio_dfs_init();
+
+	csio_fcoe_transport = fc_attach_transport(&csio_fc_transport_funcs);
+	if (!csio_fcoe_transport)
+		goto err;
+
+	csio_fcoe_transport_vport =
+			fc_attach_transport(&csio_fc_transport_vport_funcs);
+	if (!csio_fcoe_transport_vport)
+		goto err_vport;
+
+	rv = pci_register_driver(&csio_pci_driver);
+	if (rv)
+		goto err_pci;
+
+	return 0;
+
+err_pci:
+	fc_release_transport(csio_fcoe_transport_vport);
+err_vport:
+	fc_release_transport(csio_fcoe_transport);
+err:
+	csio_dfs_exit();
+	return rv;
+}
+
+/*
+ * csio_exit - Chelsio storage driver uninitialization .
+ *
+ * Function that gets called in the unload path.
+ */
+static void __exit
+csio_exit(void)
+{
+	pci_unregister_driver(&csio_pci_driver);
+	csio_dfs_exit();
+	fc_release_transport(csio_fcoe_transport_vport);
+	fc_release_transport(csio_fcoe_transport);
+}
+
+module_init(csio_init);
+module_exit(csio_exit);
+MODULE_AUTHOR(CSIO_DRV_AUTHOR);
+MODULE_DESCRIPTION(CSIO_DRV_DESC);
+MODULE_LICENSE(CSIO_DRV_LICENSE);
+MODULE_DEVICE_TABLE(pci, csio_pci_tbl);
+MODULE_VERSION(CSIO_DRV_VERSION);
+MODULE_FIRMWARE(CSIO_FW_FNAME);
diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c
new file mode 100644
index 0000000..329c6df
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_wr.c
@@ -0,0 +1,1632 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/compiler.h>
+#include <linux/slab.h>
+#include <asm/page.h>
+#include <linux/cache.h>
+
+#include "csio_hw.h"
+#include "csio_wr.h"
+#include "csio_mb.h"
+#include "csio_defs.h"
+
+int csio_intr_coalesce_cnt;		/* value:SGE_INGRESS_RX_THRESHOLD[0] */
+static int csio_sge_thresh_reg;		/* SGE_INGRESS_RX_THRESHOLD[0] */
+
+int csio_intr_coalesce_time = 10;	/* value:SGE_TIMER_VALUE_1 */
+static int csio_sge_timer_reg = 1;
+
+#define CSIO_SET_FLBUF_SIZE(_hw, _reg, _val)				\
+	csio_wr_reg32((_hw), (_val), SGE_FL_BUFFER_SIZE##_reg)
+
+static void
+csio_get_flbuf_size(struct csio_hw *hw, struct csio_sge *sge, uint32_t reg)
+{
+	sge->sge_fl_buf_size[reg] = csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE0 +
+							reg * sizeof(uint32_t));
+}
+
+/* Free list buffer size */
+static inline uint32_t
+csio_wr_fl_bufsz(struct csio_sge *sge, struct csio_dma_buf *buf)
+{
+	return sge->sge_fl_buf_size[buf->paddr & 0xF];
+}
+
+/* Size of the egress queue status page */
+static inline uint32_t
+csio_wr_qstat_pgsz(struct csio_hw *hw)
+{
+	return (hw->wrm.sge.sge_control & EGRSTATUSPAGESIZE(1)) ?  128 : 64;
+}
+
+/* Ring freelist doorbell */
+static inline void
+csio_wr_ring_fldb(struct csio_hw *hw, struct csio_q *flq)
+{
+	/*
+	 * Ring the doorbell only when we have atleast CSIO_QCREDIT_SZ
+	 * number of bytes in the freelist queue. This translates to atleast
+	 * 8 freelist buffer pointers (since each pointer is 8 bytes).
+	 */
+	if (flq->inc_idx >= 8) {
+		csio_wr_reg32(hw, DBPRIO(1) | QID(flq->un.fl.flid) |
+			      PIDX(flq->inc_idx / 8),
+			      MYPF_REG(SGE_PF_KDOORBELL));
+		flq->inc_idx &= 7;
+	}
+}
+
+/* Write a 0 cidx increment value to enable SGE interrupts for this queue */
+static void
+csio_wr_sge_intr_enable(struct csio_hw *hw, uint16_t iqid)
+{
+	csio_wr_reg32(hw, CIDXINC(0)		|
+			  INGRESSQID(iqid)	|
+			  TIMERREG(X_TIMERREG_RESTART_COUNTER),
+			  MYPF_REG(SGE_PF_GTS));
+}
+
+/*
+ * csio_wr_fill_fl - Populate the FL buffers of a FL queue.
+ * @hw: HW module.
+ * @flq: Freelist queue.
+ *
+ * Fill up freelist buffer entries with buffers of size specified
+ * in the size register.
+ *
+ */
+static int
+csio_wr_fill_fl(struct csio_hw *hw, struct csio_q *flq)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	__be64 *d = (__be64 *)(flq->vstart);
+	struct csio_dma_buf *buf = &flq->un.fl.bufs[0];
+	uint64_t paddr;
+	int sreg = flq->un.fl.sreg;
+	int n = flq->credits;
+
+	while (n--) {
+		buf->len = sge->sge_fl_buf_size[sreg];
+		buf->vaddr = pci_alloc_consistent(hw->pdev, buf->len,
+						  &buf->paddr);
+		if (!buf->vaddr) {
+			csio_err(hw, "Could only fill %d buffers!\n", n + 1);
+			return -ENOMEM;
+		}
+
+		paddr = buf->paddr | (sreg & 0xF);
+
+		*d++ = cpu_to_be64(paddr);
+		buf++;
+	}
+
+	return 0;
+}
+
+/*
+ * csio_wr_update_fl -
+ * @hw: HW module.
+ * @flq: Freelist queue.
+ *
+ *
+ */
+static inline void
+csio_wr_update_fl(struct csio_hw *hw, struct csio_q *flq, uint16_t n)
+{
+
+	flq->inc_idx += n;
+	flq->pidx += n;
+	if (unlikely(flq->pidx >= flq->credits))
+		flq->pidx -= (uint16_t)flq->credits;
+
+	CSIO_INC_STATS(flq, n_flq_refill);
+}
+
+/*
+ * csio_wr_alloc_q - Allocate a WR queue and initialize it.
+ * @hw: HW module
+ * @qsize: Size of the queue in bytes
+ * @wrsize: Since of WR in this queue, if fixed.
+ * @type: Type of queue (Ingress/Egress/Freelist)
+ * @owner: Module that owns this queue.
+ * @nflb: Number of freelist buffers for FL.
+ * @sreg: What is the FL buffer size register?
+ * @iq_int_handler: Ingress queue handler in INTx mode.
+ *
+ * This function allocates and sets up a queue for the caller
+ * of size qsize, aligned at the required boundary. This is subject to
+ * be free entries being available in the queue array. If one is found,
+ * it is initialized with the allocated queue, marked as being used (owner),
+ * and a handle returned to the caller in form of the queue's index
+ * into the q_arr array.
+ * If user has indicated a freelist (by specifying nflb > 0), create
+ * another queue (with its own index into q_arr) for the freelist. Allocate
+ * memory for DMA buffer metadata (vaddr, len etc). Save off the freelist
+ * idx in the ingress queue's flq.idx. This is how a Freelist is associated
+ * with its owning ingress queue.
+ */
+int
+csio_wr_alloc_q(struct csio_hw *hw, uint32_t qsize, uint32_t wrsize,
+		uint16_t type, void *owner, uint32_t nflb, int sreg,
+		iq_handler_t iq_intx_handler)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_q	*q, *flq;
+	int		free_idx = wrm->free_qidx;
+	int		ret_idx = free_idx;
+	uint32_t	qsz;
+	int flq_idx;
+
+	if (free_idx >= wrm->num_q) {
+		csio_err(hw, "No more free queues.\n");
+		return -1;
+	}
+
+	switch (type) {
+	case CSIO_EGRESS:
+		qsz = ALIGN(qsize, CSIO_QCREDIT_SZ) + csio_wr_qstat_pgsz(hw);
+		break;
+	case CSIO_INGRESS:
+		switch (wrsize) {
+		case 16:
+		case 32:
+		case 64:
+		case 128:
+			break;
+		default:
+			csio_err(hw, "Invalid Ingress queue WR size:%d\n",
+				    wrsize);
+			return -1;
+		}
+
+		/*
+		 * Number of elements must be a multiple of 16
+		 * So this includes status page size
+		 */
+		qsz = ALIGN(qsize/wrsize, 16) * wrsize;
+
+		break;
+	case CSIO_FREELIST:
+		qsz = ALIGN(qsize/wrsize, 8) * wrsize + csio_wr_qstat_pgsz(hw);
+		break;
+	default:
+		csio_err(hw, "Invalid queue type: 0x%x\n", type);
+		return -1;
+	}
+
+	q = wrm->q_arr[free_idx];
+
+	q->vstart = pci_alloc_consistent(hw->pdev, qsz, &q->pstart);
+	if (!q->vstart) {
+		csio_err(hw,
+			 "Failed to allocate DMA memory for "
+			 "queue at id: %d size: %d\n", free_idx, qsize);
+		return -1;
+	}
+
+	/*
+	 * We need to zero out the contents, importantly for ingress,
+	 * since we start with a generatiom bit of 1 for ingress.
+	 */
+	memset(q->vstart, 0, qsz);
+
+	q->type		= type;
+	q->owner	= owner;
+	q->pidx		= q->cidx = q->inc_idx = 0;
+	q->size		= qsz;
+	q->wr_sz	= wrsize;	/* If using fixed size WRs */
+
+	wrm->free_qidx++;
+
+	if (type == CSIO_INGRESS) {
+		/* Since queue area is set to zero */
+		q->un.iq.genbit	= 1;
+
+		/*
+		 * Ingress queue status page size is always the size of
+		 * the ingress queue entry.
+		 */
+		q->credits	= (qsz - q->wr_sz) / q->wr_sz;
+		q->vwrap	= (void *)((uintptr_t)(q->vstart) + qsz
+							- q->wr_sz);
+
+		/* Allocate memory for FL if requested */
+		if (nflb > 0) {
+			flq_idx = csio_wr_alloc_q(hw, nflb * sizeof(__be64),
+						  sizeof(__be64), CSIO_FREELIST,
+						  owner, 0, sreg, NULL);
+			if (flq_idx == -1) {
+				csio_err(hw,
+					 "Failed to allocate FL queue"
+					 " for IQ idx:%d\n", free_idx);
+				return -1;
+			}
+
+			/* Associate the new FL with the Ingress quue */
+			q->un.iq.flq_idx = flq_idx;
+
+			flq = wrm->q_arr[q->un.iq.flq_idx];
+			flq->un.fl.bufs = kzalloc(flq->credits *
+						  sizeof(struct csio_dma_buf),
+						  GFP_KERNEL);
+			if (!flq->un.fl.bufs) {
+				csio_err(hw,
+					 "Failed to allocate FL queue bufs"
+					 " for IQ idx:%d\n", free_idx);
+				return -1;
+			}
+
+			flq->un.fl.packen = 0;
+			flq->un.fl.offset = 0;
+			flq->un.fl.sreg = sreg;
+
+			/* Fill up the free list buffers */
+			if (csio_wr_fill_fl(hw, flq))
+				return -1;
+
+			/*
+			 * Make sure in a FLQ, atleast 1 credit (8 FL buffers)
+			 * remains unpopulated,otherwise HW thinks
+			 * FLQ is empty.
+			 */
+			flq->pidx = flq->inc_idx = flq->credits - 8;
+		} else {
+			q->un.iq.flq_idx = -1;
+		}
+
+		/* Associate the IQ INTx handler. */
+		q->un.iq.iq_intx_handler = iq_intx_handler;
+
+		csio_q_iqid(hw, ret_idx) = CSIO_MAX_QID;
+
+	} else if (type == CSIO_EGRESS) {
+		q->credits = (qsz - csio_wr_qstat_pgsz(hw)) / CSIO_QCREDIT_SZ;
+		q->vwrap   = (void *)((uintptr_t)(q->vstart) + qsz
+						- csio_wr_qstat_pgsz(hw));
+		csio_q_eqid(hw, ret_idx) = CSIO_MAX_QID;
+	} else { /* Freelist */
+		q->credits = (qsz - csio_wr_qstat_pgsz(hw)) / sizeof(__be64);
+		q->vwrap   = (void *)((uintptr_t)(q->vstart) + qsz
+						- csio_wr_qstat_pgsz(hw));
+		csio_q_flid(hw, ret_idx) = CSIO_MAX_QID;
+	}
+
+	return ret_idx;
+}
+
+/*
+ * csio_wr_iq_create_rsp - Response handler for IQ creation.
+ * @hw: The HW module.
+ * @mbp: Mailbox.
+ * @iq_idx: Ingress queue that got created.
+ *
+ * Handle FW_IQ_CMD mailbox completion. Save off the assigned IQ/FL ids.
+ */
+static int
+csio_wr_iq_create_rsp(struct csio_hw *hw, struct csio_mb *mbp, int iq_idx)
+{
+	struct csio_iq_params iqp;
+	enum fw_retval retval;
+	uint32_t iq_id;
+	int flq_idx;
+
+	memset(&iqp, 0, sizeof(struct csio_iq_params));
+
+	csio_mb_iq_alloc_write_rsp(hw, mbp, &retval, &iqp);
+
+	if (retval != FW_SUCCESS) {
+		csio_err(hw, "IQ cmd returned 0x%x!\n", retval);
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	csio_q_iqid(hw, iq_idx)		= iqp.iqid;
+	csio_q_physiqid(hw, iq_idx)	= iqp.physiqid;
+	csio_q_pidx(hw, iq_idx)		= csio_q_cidx(hw, iq_idx) = 0;
+	csio_q_inc_idx(hw, iq_idx)	= 0;
+
+	/* Actual iq-id. */
+	iq_id = iqp.iqid - hw->wrm.fw_iq_start;
+
+	/* Set the iq-id to iq map table. */
+	if (iq_id >= CSIO_MAX_IQ) {
+		csio_err(hw,
+			 "Exceeding MAX_IQ(%d) supported!"
+			 " iqid:%d rel_iqid:%d FW iq_start:%d\n",
+			 CSIO_MAX_IQ, iq_id, iqp.iqid, hw->wrm.fw_iq_start);
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+	csio_q_set_intr_map(hw, iq_idx, iq_id);
+
+	/*
+	 * During FW_IQ_CMD, FW sets interrupt_sent bit to 1 in the SGE
+	 * ingress context of this queue. This will block interrupts to
+	 * this queue until the next GTS write. Therefore, we do a
+	 * 0-cidx increment GTS write for this queue just to clear the
+	 * interrupt_sent bit. This will re-enable interrupts to this
+	 * queue.
+	 */
+	csio_wr_sge_intr_enable(hw, iqp.physiqid);
+
+	flq_idx = csio_q_iq_flq_idx(hw, iq_idx);
+	if (flq_idx != -1) {
+		struct csio_q *flq = hw->wrm.q_arr[flq_idx];
+
+		csio_q_flid(hw, flq_idx) = iqp.fl0id;
+		csio_q_cidx(hw, flq_idx) = 0;
+		csio_q_pidx(hw, flq_idx)    = csio_q_credits(hw, flq_idx) - 8;
+		csio_q_inc_idx(hw, flq_idx) = csio_q_credits(hw, flq_idx) - 8;
+
+		/* Now update SGE about the buffers allocated during init */
+		csio_wr_ring_fldb(hw, flq);
+	}
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	return 0;
+}
+
+/*
+ * csio_wr_iq_create - Configure an Ingress queue with FW.
+ * @hw: The HW module.
+ * @priv: Private data object.
+ * @iq_idx: Ingress queue index in the WR module.
+ * @vec: MSIX vector.
+ * @portid: PCIE Channel to be associated with this queue.
+ * @async: Is this a FW asynchronous message handling queue?
+ * @cbfn: Completion callback.
+ *
+ * This API configures an ingress queue with FW by issuing a FW_IQ_CMD mailbox
+ * with alloc/write bits set.
+ */
+int
+csio_wr_iq_create(struct csio_hw *hw, void *priv, int iq_idx,
+		  uint32_t vec, uint8_t portid, bool async,
+		  void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct csio_mb  *mbp;
+	struct csio_iq_params iqp;
+	int flq_idx;
+
+	memset(&iqp, 0, sizeof(struct csio_iq_params));
+	csio_q_portid(hw, iq_idx) = portid;
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		csio_err(hw, "IQ command out of memory!\n");
+		return -ENOMEM;
+	}
+
+	switch (hw->intr_mode) {
+	case CSIO_IM_INTX:
+	case CSIO_IM_MSI:
+		/* For interrupt forwarding queue only */
+		if (hw->intr_iq_idx == iq_idx)
+			iqp.iqandst	= X_INTERRUPTDESTINATION_PCIE;
+		else
+			iqp.iqandst	= X_INTERRUPTDESTINATION_IQ;
+		iqp.iqandstindex	=
+			csio_q_physiqid(hw, hw->intr_iq_idx);
+		break;
+	case CSIO_IM_MSIX:
+		iqp.iqandst		= X_INTERRUPTDESTINATION_PCIE;
+		iqp.iqandstindex	= (uint16_t)vec;
+		break;
+	case CSIO_IM_NONE:
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	/* Pass in the ingress queue cmd parameters */
+	iqp.pfn			= hw->pfn;
+	iqp.vfn			= 0;
+	iqp.iq_start		= 1;
+	iqp.viid		= 0;
+	iqp.type		= FW_IQ_TYPE_FL_INT_CAP;
+	iqp.iqasynch		= async;
+	if (csio_intr_coalesce_cnt)
+		iqp.iqanus	= X_UPDATESCHEDULING_COUNTER_OPTTIMER;
+	else
+		iqp.iqanus	= X_UPDATESCHEDULING_TIMER;
+	iqp.iqanud		= X_UPDATEDELIVERY_INTERRUPT;
+	iqp.iqpciech		= portid;
+	iqp.iqintcntthresh	= (uint8_t)csio_sge_thresh_reg;
+
+	switch (csio_q_wr_sz(hw, iq_idx)) {
+	case 16:
+		iqp.iqesize = 0; break;
+	case 32:
+		iqp.iqesize = 1; break;
+	case 64:
+		iqp.iqesize = 2; break;
+	case 128:
+		iqp.iqesize = 3; break;
+	}
+
+	iqp.iqsize		= csio_q_size(hw, iq_idx) /
+						csio_q_wr_sz(hw, iq_idx);
+	iqp.iqaddr		= csio_q_pstart(hw, iq_idx);
+
+	flq_idx = csio_q_iq_flq_idx(hw, iq_idx);
+	if (flq_idx != -1) {
+		struct csio_q *flq = hw->wrm.q_arr[flq_idx];
+
+		iqp.fl0paden	= 1;
+		iqp.fl0packen	= flq->un.fl.packen ? 1 : 0;
+		iqp.fl0fbmin	= X_FETCHBURSTMIN_64B;
+		iqp.fl0fbmax	= X_FETCHBURSTMAX_512B;
+		iqp.fl0size	= csio_q_size(hw, flq_idx) / CSIO_QCREDIT_SZ;
+		iqp.fl0addr	= csio_q_pstart(hw, flq_idx);
+	}
+
+	csio_mb_iq_alloc_write(hw, mbp, priv, CSIO_MB_DEFAULT_TMO, &iqp, cbfn);
+
+	if (csio_mb_issue(hw, mbp)) {
+		csio_err(hw, "Issue of IQ cmd failed!\n");
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	if (cbfn != NULL)
+		return 0;
+
+	return csio_wr_iq_create_rsp(hw, mbp, iq_idx);
+}
+
+/*
+ * csio_wr_eq_create_rsp - Response handler for EQ creation.
+ * @hw: The HW module.
+ * @mbp: Mailbox.
+ * @eq_idx: Egress queue that got created.
+ *
+ * Handle FW_EQ_OFLD_CMD mailbox completion. Save off the assigned EQ ids.
+ */
+static int
+csio_wr_eq_cfg_rsp(struct csio_hw *hw, struct csio_mb *mbp, int eq_idx)
+{
+	struct csio_eq_params eqp;
+	enum fw_retval retval;
+
+	memset(&eqp, 0, sizeof(struct csio_eq_params));
+
+	csio_mb_eq_ofld_alloc_write_rsp(hw, mbp, &retval, &eqp);
+
+	if (retval != FW_SUCCESS) {
+		csio_err(hw, "EQ OFLD cmd returned 0x%x!\n", retval);
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	csio_q_eqid(hw, eq_idx)	= (uint16_t)eqp.eqid;
+	csio_q_physeqid(hw, eq_idx) = (uint16_t)eqp.physeqid;
+	csio_q_pidx(hw, eq_idx)	= csio_q_cidx(hw, eq_idx) = 0;
+	csio_q_inc_idx(hw, eq_idx) = 0;
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	return 0;
+}
+
+/*
+ * csio_wr_eq_create - Configure an Egress queue with FW.
+ * @hw: HW module.
+ * @priv: Private data.
+ * @eq_idx: Egress queue index in the WR module.
+ * @iq_idx: Associated ingress queue index.
+ * @cbfn: Completion callback.
+ *
+ * This API configures a offload egress queue with FW by issuing a
+ * FW_EQ_OFLD_CMD  (with alloc + write ) mailbox.
+ */
+int
+csio_wr_eq_create(struct csio_hw *hw, void *priv, int eq_idx,
+		  int iq_idx, uint8_t portid,
+		  void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	struct csio_mb  *mbp;
+	struct csio_eq_params eqp;
+
+	memset(&eqp, 0, sizeof(struct csio_eq_params));
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp) {
+		csio_err(hw, "EQ command out of memory!\n");
+		return -ENOMEM;
+	}
+
+	eqp.pfn			= hw->pfn;
+	eqp.vfn			= 0;
+	eqp.eqstart		= 1;
+	eqp.hostfcmode		= X_HOSTFCMODE_STATUS_PAGE;
+	eqp.iqid		= csio_q_iqid(hw, iq_idx);
+	eqp.fbmin		= X_FETCHBURSTMIN_64B;
+	eqp.fbmax		= X_FETCHBURSTMAX_512B;
+	eqp.cidxfthresh		= 0;
+	eqp.pciechn		= portid;
+	eqp.eqsize		= csio_q_size(hw, eq_idx) / CSIO_QCREDIT_SZ;
+	eqp.eqaddr		= csio_q_pstart(hw, eq_idx);
+
+	csio_mb_eq_ofld_alloc_write(hw, mbp, priv, CSIO_MB_DEFAULT_TMO,
+				    &eqp, cbfn);
+
+	if (csio_mb_issue(hw, mbp)) {
+		csio_err(hw, "Issue of EQ OFLD cmd failed!\n");
+		mempool_free(mbp, hw->mb_mempool);
+		return -EINVAL;
+	}
+
+	if (cbfn != NULL)
+		return 0;
+
+	return csio_wr_eq_cfg_rsp(hw, mbp, eq_idx);
+}
+
+/*
+ * csio_wr_iq_destroy_rsp - Response handler for IQ removal.
+ * @hw: The HW module.
+ * @mbp: Mailbox.
+ * @iq_idx: Ingress queue that was freed.
+ *
+ * Handle FW_IQ_CMD (free) mailbox completion.
+ */
+static int
+csio_wr_iq_destroy_rsp(struct csio_hw *hw, struct csio_mb *mbp, int iq_idx)
+{
+	enum fw_retval retval = csio_mb_fw_retval(mbp);
+	int rv = 0;
+
+	if (retval != FW_SUCCESS)
+		rv = -EINVAL;
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	return rv;
+}
+
+/*
+ * csio_wr_iq_destroy - Free an ingress queue.
+ * @hw: The HW module.
+ * @priv: Private data object.
+ * @iq_idx: Ingress queue index to destroy
+ * @cbfn: Completion callback.
+ *
+ * This API frees an ingress queue by issuing the FW_IQ_CMD
+ * with the free bit set.
+ */
+static int
+csio_wr_iq_destroy(struct csio_hw *hw, void *priv, int iq_idx,
+		   void (*cbfn)(struct csio_hw *, struct csio_mb *))
+{
+	int rv = 0;
+	struct csio_mb  *mbp;
+	struct csio_iq_params iqp;
+	int flq_idx;
+
+	memset(&iqp, 0, sizeof(struct csio_iq_params));
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp)
+		return -ENOMEM;
+
+	iqp.pfn		= hw->pfn;
+	iqp.vfn		= 0;
+	iqp.iqid	= csio_q_iqid(hw, iq_idx);
+	iqp.type	= FW_IQ_TYPE_FL_INT_CAP;
+
+	flq_idx = csio_q_iq_flq_idx(hw, iq_idx);
+	if (flq_idx != -1)
+		iqp.fl0id = csio_q_flid(hw, flq_idx);
+	else
+		iqp.fl0id = 0xFFFF;
+
+	iqp.fl1id = 0xFFFF;
+
+	csio_mb_iq_free(hw, mbp, priv, CSIO_MB_DEFAULT_TMO, &iqp, cbfn);
+
+	rv = csio_mb_issue(hw, mbp);
+	if (rv != 0) {
+		mempool_free(mbp, hw->mb_mempool);
+		return rv;
+	}
+
+	if (cbfn != NULL)
+		return 0;
+
+	return csio_wr_iq_destroy_rsp(hw, mbp, iq_idx);
+}
+
+/*
+ * csio_wr_eq_destroy_rsp - Response handler for OFLD EQ creation.
+ * @hw: The HW module.
+ * @mbp: Mailbox.
+ * @eq_idx: Egress queue that was freed.
+ *
+ * Handle FW_OFLD_EQ_CMD (free) mailbox completion.
+ */
+static int
+csio_wr_eq_destroy_rsp(struct csio_hw *hw, struct csio_mb *mbp, int eq_idx)
+{
+	enum fw_retval retval = csio_mb_fw_retval(mbp);
+	int rv = 0;
+
+	if (retval != FW_SUCCESS)
+		rv = -EINVAL;
+
+	mempool_free(mbp, hw->mb_mempool);
+
+	return rv;
+}
+
+/*
+ * csio_wr_eq_destroy - Free an Egress queue.
+ * @hw: The HW module.
+ * @priv: Private data object.
+ * @eq_idx: Egress queue index to destroy
+ * @cbfn: Completion callback.
+ *
+ * This API frees an Egress queue by issuing the FW_EQ_OFLD_CMD
+ * with the free bit set.
+ */
+static int
+csio_wr_eq_destroy(struct csio_hw *hw, void *priv, int eq_idx,
+		   void (*cbfn) (struct csio_hw *, struct csio_mb *))
+{
+	int rv = 0;
+	struct csio_mb  *mbp;
+	struct csio_eq_params eqp;
+
+	memset(&eqp, 0, sizeof(struct csio_eq_params));
+
+	mbp = mempool_alloc(hw->mb_mempool, GFP_ATOMIC);
+	if (!mbp)
+		return -ENOMEM;
+
+	eqp.pfn		= hw->pfn;
+	eqp.vfn		= 0;
+	eqp.eqid	= csio_q_eqid(hw, eq_idx);
+
+	csio_mb_eq_ofld_free(hw, mbp, priv, CSIO_MB_DEFAULT_TMO, &eqp, cbfn);
+
+	rv = csio_mb_issue(hw, mbp);
+	if (rv != 0) {
+		mempool_free(mbp, hw->mb_mempool);
+		return rv;
+	}
+
+	if (cbfn != NULL)
+		return 0;
+
+	return csio_wr_eq_destroy_rsp(hw, mbp, eq_idx);
+}
+
+/*
+ * csio_wr_cleanup_eq_stpg - Cleanup Egress queue status page
+ * @hw: HW module
+ * @qidx: Egress queue index
+ *
+ * Cleanup the Egress queue status page.
+ */
+static void
+csio_wr_cleanup_eq_stpg(struct csio_hw *hw, int qidx)
+{
+	struct csio_q	*q = csio_hw_to_wrm(hw)->q_arr[qidx];
+	struct csio_qstatus_page *stp = (struct csio_qstatus_page *)q->vwrap;
+
+	memset(stp, 0, sizeof(*stp));
+}
+
+/*
+ * csio_wr_cleanup_iq_ftr - Cleanup Footer entries in IQ
+ * @hw: HW module
+ * @qidx: Ingress queue index
+ *
+ * Cleanup the footer entries in the given ingress queue,
+ * set to 1 the internal copy of genbit.
+ */
+static void
+csio_wr_cleanup_iq_ftr(struct csio_hw *hw, int qidx)
+{
+	struct csio_wrm *wrm	= csio_hw_to_wrm(hw);
+	struct csio_q	*q	= wrm->q_arr[qidx];
+	void *wr;
+	struct csio_iqwr_footer *ftr;
+	uint32_t i = 0;
+
+	/* set to 1 since we are just about zero out genbit */
+	q->un.iq.genbit = 1;
+
+	for (i = 0; i < q->credits; i++) {
+		/* Get the WR */
+		wr = (void *)((uintptr_t)q->vstart +
+					   (i * q->wr_sz));
+		/* Get the footer */
+		ftr = (struct csio_iqwr_footer *)((uintptr_t)wr +
+					  (q->wr_sz - sizeof(*ftr)));
+		/* Zero out footer */
+		memset(ftr, 0, sizeof(*ftr));
+	}
+}
+
+int
+csio_wr_destroy_queues(struct csio_hw *hw, bool cmd)
+{
+	int i, flq_idx;
+	struct csio_q *q;
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	int rv;
+
+	for (i = 0; i < wrm->free_qidx; i++) {
+		q = wrm->q_arr[i];
+
+		switch (q->type) {
+		case CSIO_EGRESS:
+			if (csio_q_eqid(hw, i) != CSIO_MAX_QID) {
+				csio_wr_cleanup_eq_stpg(hw, i);
+				if (!cmd) {
+					csio_q_eqid(hw, i) = CSIO_MAX_QID;
+					continue;
+				}
+
+				rv = csio_wr_eq_destroy(hw, NULL, i, NULL);
+				if ((rv == -EBUSY) || (rv == -ETIMEDOUT))
+					cmd = false;
+
+				csio_q_eqid(hw, i) = CSIO_MAX_QID;
+			}
+		case CSIO_INGRESS:
+			if (csio_q_iqid(hw, i) != CSIO_MAX_QID) {
+				csio_wr_cleanup_iq_ftr(hw, i);
+				if (!cmd) {
+					csio_q_iqid(hw, i) = CSIO_MAX_QID;
+					flq_idx = csio_q_iq_flq_idx(hw, i);
+					if (flq_idx != -1)
+						csio_q_flid(hw, flq_idx) =
+								CSIO_MAX_QID;
+					continue;
+				}
+
+				rv = csio_wr_iq_destroy(hw, NULL, i, NULL);
+				if ((rv == -EBUSY) || (rv == -ETIMEDOUT))
+					cmd = false;
+
+				csio_q_iqid(hw, i) = CSIO_MAX_QID;
+				flq_idx = csio_q_iq_flq_idx(hw, i);
+				if (flq_idx != -1)
+					csio_q_flid(hw, flq_idx) = CSIO_MAX_QID;
+			}
+		default:
+			break;
+		}
+	}
+
+	hw->flags &= ~CSIO_HWF_Q_FW_ALLOCED;
+
+	return 0;
+}
+
+/*
+ * csio_wr_get - Get requested size of WR entry/entries from queue.
+ * @hw: HW module.
+ * @qidx: Index of queue.
+ * @size: Cumulative size of Work request(s).
+ * @wrp: Work request pair.
+ *
+ * If requested credits are available, return the start address of the
+ * work request in the work request pair. Set pidx accordingly and
+ * return.
+ *
+ * NOTE about WR pair:
+ * ==================
+ * A WR can start towards the end of a queue, and then continue at the
+ * beginning, since the queue is considered to be circular. This will
+ * require a pair of address/size to be passed back to the caller -
+ * hence Work request pair format.
+ */
+int
+csio_wr_get(struct csio_hw *hw, int qidx, uint32_t size,
+	    struct csio_wr_pair *wrp)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_q *q = wrm->q_arr[qidx];
+	void *cwr = (void *)((uintptr_t)(q->vstart) +
+						(q->pidx * CSIO_QCREDIT_SZ));
+	struct csio_qstatus_page *stp = (struct csio_qstatus_page *)q->vwrap;
+	uint16_t cidx = q->cidx = ntohs(stp->cidx);
+	uint16_t pidx = q->pidx;
+	uint32_t req_sz	= ALIGN(size, CSIO_QCREDIT_SZ);
+	int req_credits	= req_sz / CSIO_QCREDIT_SZ;
+	int credits;
+
+	CSIO_DB_ASSERT(q->owner != NULL);
+	CSIO_DB_ASSERT((qidx >= 0) && (qidx < wrm->free_qidx));
+	CSIO_DB_ASSERT(cidx <= q->credits);
+
+	/* Calculate credits */
+	if (pidx > cidx) {
+		credits = q->credits - (pidx - cidx) - 1;
+	} else if (cidx > pidx) {
+		credits = cidx - pidx - 1;
+	} else {
+		/* cidx == pidx, empty queue */
+		credits = q->credits;
+		CSIO_INC_STATS(q, n_qempty);
+	}
+
+	/*
+	 * Check if we have enough credits.
+	 * credits = 1 implies queue is full.
+	 */
+	if (!credits || (req_credits > credits)) {
+		CSIO_INC_STATS(q, n_qfull);
+		return -EBUSY;
+	}
+
+	/*
+	 * If we are here, we have enough credits to satisfy the
+	 * request. Check if we are near the end of q, and if WR spills over.
+	 * If it does, use the first addr/size to cover the queue until
+	 * the end. Fit the remainder portion of the request at the top
+	 * of queue and return it in the second addr/len. Set pidx
+	 * accordingly.
+	 */
+	if (unlikely(((uintptr_t)cwr + req_sz) > (uintptr_t)(q->vwrap))) {
+		wrp->addr1 = cwr;
+		wrp->size1 = (uint32_t)((uintptr_t)q->vwrap - (uintptr_t)cwr);
+		wrp->addr2 = q->vstart;
+		wrp->size2 = req_sz - wrp->size1;
+		q->pidx	= (uint16_t)(ALIGN(wrp->size2, CSIO_QCREDIT_SZ) /
+							CSIO_QCREDIT_SZ);
+		CSIO_INC_STATS(q, n_qwrap);
+		CSIO_INC_STATS(q, n_eq_wr_split);
+	} else {
+		wrp->addr1 = cwr;
+		wrp->size1 = req_sz;
+		wrp->addr2 = NULL;
+		wrp->size2 = 0;
+		q->pidx	+= (uint16_t)req_credits;
+
+		/* We are the end of queue, roll back pidx to top of queue */
+		if (unlikely(q->pidx == q->credits)) {
+			q->pidx = 0;
+			CSIO_INC_STATS(q, n_qwrap);
+		}
+	}
+
+	q->inc_idx = (uint16_t)req_credits;
+
+	CSIO_INC_STATS(q, n_tot_reqs);
+
+	return 0;
+}
+
+/*
+ * csio_wr_copy_to_wrp - Copies given data into WR.
+ * @data_buf - Data buffer
+ * @wrp - Work request pair.
+ * @wr_off - Work request offset.
+ * @data_len - Data length.
+ *
+ * Copies the given data in Work Request. Work request pair(wrp) specifies
+ * address information of Work request.
+ * Returns: none
+ */
+void
+csio_wr_copy_to_wrp(void *data_buf, struct csio_wr_pair *wrp,
+		   uint32_t wr_off, uint32_t data_len)
+{
+	uint32_t nbytes;
+
+	/* Number of space available in buffer addr1 of WRP */
+	nbytes = ((wrp->size1 - wr_off) >= data_len) ?
+					data_len : (wrp->size1 - wr_off);
+
+	memcpy((uint8_t *) wrp->addr1 + wr_off, data_buf, nbytes);
+	data_len -= nbytes;
+
+	/* Write the remaining data from the begining of circular buffer */
+	if (data_len) {
+		CSIO_DB_ASSERT(data_len <= wrp->size2);
+		CSIO_DB_ASSERT(wrp->addr2 != NULL);
+		memcpy(wrp->addr2, (uint8_t *) data_buf + nbytes, data_len);
+	}
+}
+
+/*
+ * csio_wr_issue - Notify chip of Work request.
+ * @hw: HW module.
+ * @qidx: Index of queue.
+ * @prio: 0: Low priority, 1: High priority
+ *
+ * Rings the SGE Doorbell by writing the current producer index of the passed
+ * in queue into the register.
+ *
+ */
+int
+csio_wr_issue(struct csio_hw *hw, int qidx, bool prio)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_q *q = wrm->q_arr[qidx];
+
+	CSIO_DB_ASSERT((qidx >= 0) && (qidx < wrm->free_qidx));
+
+	wmb();
+	/* Ring SGE Doorbell writing q->pidx into it */
+	csio_wr_reg32(hw, DBPRIO(prio) | QID(q->un.eq.physeqid) |
+		      PIDX(q->inc_idx), MYPF_REG(SGE_PF_KDOORBELL));
+	q->inc_idx = 0;
+
+	return 0;
+}
+
+static inline uint32_t
+csio_wr_avail_qcredits(struct csio_q *q)
+{
+	if (q->pidx > q->cidx)
+		return q->pidx - q->cidx;
+	else if (q->cidx > q->pidx)
+		return q->credits - (q->cidx - q->pidx);
+	else
+		return 0;	/* cidx == pidx, empty queue */
+}
+
+/*
+ * csio_wr_inval_flq_buf - Invalidate a free list buffer entry.
+ * @hw: HW module.
+ * @flq: The freelist queue.
+ *
+ * Invalidate the driver's version of a freelist buffer entry,
+ * without freeing the associated the DMA memory. The entry
+ * to be invalidated is picked up from the current Free list
+ * queue cidx.
+ *
+ */
+static inline void
+csio_wr_inval_flq_buf(struct csio_hw *hw, struct csio_q *flq)
+{
+	flq->cidx++;
+	if (flq->cidx == flq->credits) {
+		flq->cidx = 0;
+		CSIO_INC_STATS(flq, n_qwrap);
+	}
+}
+
+/*
+ * csio_wr_process_fl - Process a freelist completion.
+ * @hw: HW module.
+ * @q: The ingress queue attached to the Freelist.
+ * @wr: The freelist completion WR in the ingress queue.
+ * @len_to_qid: The lower 32-bits of the first flit of the RSP footer
+ * @iq_handler: Caller's handler for this completion.
+ * @priv: Private pointer of caller
+ *
+ */
+static inline void
+csio_wr_process_fl(struct csio_hw *hw, struct csio_q *q,
+		   void *wr, uint32_t len_to_qid,
+		   void (*iq_handler)(struct csio_hw *, void *,
+				      uint32_t, struct csio_fl_dma_buf *,
+				      void *),
+		   void *priv)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	struct csio_fl_dma_buf flb;
+	struct csio_dma_buf *buf, *fbuf;
+	uint32_t bufsz, len, lastlen = 0;
+	int n;
+	struct csio_q *flq = hw->wrm.q_arr[q->un.iq.flq_idx];
+
+	CSIO_DB_ASSERT(flq != NULL);
+
+	len = len_to_qid;
+
+	if (len & IQWRF_NEWBUF) {
+		if (flq->un.fl.offset > 0) {
+			csio_wr_inval_flq_buf(hw, flq);
+			flq->un.fl.offset = 0;
+		}
+		len = IQWRF_LEN_GET(len);
+	}
+
+	CSIO_DB_ASSERT(len != 0);
+
+	flb.totlen = len;
+
+	/* Consume all freelist buffers used for len bytes */
+	for (n = 0, fbuf = flb.flbufs; ; n++, fbuf++) {
+		buf = &flq->un.fl.bufs[flq->cidx];
+		bufsz = csio_wr_fl_bufsz(sge, buf);
+
+		fbuf->paddr	= buf->paddr;
+		fbuf->vaddr	= buf->vaddr;
+
+		flb.offset	= flq->un.fl.offset;
+		lastlen		= min(bufsz, len);
+		fbuf->len	= lastlen;
+
+		len -= lastlen;
+		if (!len)
+			break;
+		csio_wr_inval_flq_buf(hw, flq);
+	}
+
+	flb.defer_free = flq->un.fl.packen ? 0 : 1;
+
+	iq_handler(hw, wr, q->wr_sz - sizeof(struct csio_iqwr_footer),
+		   &flb, priv);
+
+	if (flq->un.fl.packen)
+		flq->un.fl.offset += ALIGN(lastlen, sge->csio_fl_align);
+	else
+		csio_wr_inval_flq_buf(hw, flq);
+
+}
+
+/*
+ * csio_is_new_iqwr - Is this a new Ingress queue entry ?
+ * @q: Ingress quueue.
+ * @ftr: Ingress queue WR SGE footer.
+ *
+ * The entry is new if our generation bit matches the corresponding
+ * bit in the footer of the current WR.
+ */
+static inline bool
+csio_is_new_iqwr(struct csio_q *q, struct csio_iqwr_footer *ftr)
+{
+	return (q->un.iq.genbit == (ftr->u.type_gen >> IQWRF_GEN_SHIFT));
+}
+
+/*
+ * csio_wr_process_iq - Process elements in Ingress queue.
+ * @hw:  HW pointer
+ * @qidx: Index of queue
+ * @iq_handler: Handler for this queue
+ * @priv: Caller's private pointer
+ *
+ * This routine walks through every entry of the ingress queue, calling
+ * the provided iq_handler with the entry, until the generation bit
+ * flips.
+ */
+int
+csio_wr_process_iq(struct csio_hw *hw, struct csio_q *q,
+		   void (*iq_handler)(struct csio_hw *, void *,
+				      uint32_t, struct csio_fl_dma_buf *,
+				      void *),
+		   void *priv)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	void *wr = (void *)((uintptr_t)q->vstart + (q->cidx * q->wr_sz));
+	struct csio_iqwr_footer *ftr;
+	uint32_t wr_type, fw_qid, qid;
+	struct csio_q *q_completed;
+	struct csio_q *flq = csio_iq_has_fl(q) ?
+					wrm->q_arr[q->un.iq.flq_idx] : NULL;
+	int rv = 0;
+
+	/* Get the footer */
+	ftr = (struct csio_iqwr_footer *)((uintptr_t)wr +
+					  (q->wr_sz - sizeof(*ftr)));
+
+	/*
+	 * When q wrapped around last time, driver should have inverted
+	 * ic.genbit as well.
+	 */
+	while (csio_is_new_iqwr(q, ftr)) {
+
+		CSIO_DB_ASSERT(((uintptr_t)wr + q->wr_sz) <=
+						(uintptr_t)q->vwrap);
+		rmb();
+		wr_type = IQWRF_TYPE_GET(ftr->u.type_gen);
+
+		switch (wr_type) {
+		case X_RSPD_TYPE_CPL:
+			/* Subtract footer from WR len */
+			iq_handler(hw, wr, q->wr_sz - sizeof(*ftr), NULL, priv);
+			break;
+		case X_RSPD_TYPE_FLBUF:
+			csio_wr_process_fl(hw, q, wr,
+					   ntohl(ftr->pldbuflen_qid),
+					   iq_handler, priv);
+			break;
+		case X_RSPD_TYPE_INTR:
+			fw_qid = ntohl(ftr->pldbuflen_qid);
+			qid = fw_qid - wrm->fw_iq_start;
+			q_completed = hw->wrm.intr_map[qid];
+
+			if (unlikely(qid ==
+					csio_q_physiqid(hw, hw->intr_iq_idx))) {
+				/*
+				 * We are already in the Forward Interrupt
+				 * Interrupt Queue Service! Do-not service
+				 * again!
+				 *
+				 */
+			} else {
+				CSIO_DB_ASSERT(q_completed);
+				CSIO_DB_ASSERT(
+					q_completed->un.iq.iq_intx_handler);
+
+				/* Call the queue handler. */
+				q_completed->un.iq.iq_intx_handler(hw, NULL,
+						0, NULL, (void *)q_completed);
+			}
+			break;
+		default:
+			csio_warn(hw, "Unknown resp type 0x%x received\n",
+				 wr_type);
+			CSIO_INC_STATS(q, n_rsp_unknown);
+			break;
+		}
+
+		/*
+		 * Ingress *always* has fixed size WR entries. Therefore,
+		 * there should always be complete WRs towards the end of
+		 * queue.
+		 */
+		if (((uintptr_t)wr + q->wr_sz) == (uintptr_t)q->vwrap) {
+
+			/* Roll over to start of queue */
+			q->cidx = 0;
+			wr	= q->vstart;
+
+			/* Toggle genbit */
+			q->un.iq.genbit ^= 0x1;
+
+			CSIO_INC_STATS(q, n_qwrap);
+		} else {
+			q->cidx++;
+			wr	= (void *)((uintptr_t)(q->vstart) +
+					   (q->cidx * q->wr_sz));
+		}
+
+		ftr = (struct csio_iqwr_footer *)((uintptr_t)wr +
+						  (q->wr_sz - sizeof(*ftr)));
+		q->inc_idx++;
+
+	} /* while (q->un.iq.genbit == hdr->genbit) */
+
+	/*
+	 * We need to re-arm SGE interrupts in case we got a stray interrupt,
+	 * especially in msix mode. With INTx, this may be a common occurence.
+	 */
+	if (unlikely(!q->inc_idx)) {
+		CSIO_INC_STATS(q, n_stray_comp);
+		rv = -EINVAL;
+		goto restart;
+	}
+
+	/* Replenish free list buffers if pending falls below low water mark */
+	if (flq) {
+		uint32_t avail  = csio_wr_avail_qcredits(flq);
+		if (avail <= 16) {
+			/* Make sure in FLQ, atleast 1 credit (8 FL buffers)
+			 * remains unpopulated otherwise HW thinks
+			 * FLQ is empty.
+			 */
+			csio_wr_update_fl(hw, flq, (flq->credits - 8) - avail);
+			csio_wr_ring_fldb(hw, flq);
+		}
+	}
+
+restart:
+	/* Now inform SGE about our incremental index value */
+	csio_wr_reg32(hw, CIDXINC(q->inc_idx)		|
+			  INGRESSQID(q->un.iq.physiqid)	|
+			  TIMERREG(csio_sge_timer_reg),
+			  MYPF_REG(SGE_PF_GTS));
+	q->stats.n_tot_rsps += q->inc_idx;
+
+	q->inc_idx = 0;
+
+	return rv;
+}
+
+int
+csio_wr_process_iq_idx(struct csio_hw *hw, int qidx,
+		   void (*iq_handler)(struct csio_hw *, void *,
+				      uint32_t, struct csio_fl_dma_buf *,
+				      void *),
+		   void *priv)
+{
+	struct csio_wrm *wrm	= csio_hw_to_wrm(hw);
+	struct csio_q	*iq	= wrm->q_arr[qidx];
+
+	return csio_wr_process_iq(hw, iq, iq_handler, priv);
+}
+
+static int
+csio_closest_timer(struct csio_sge *s, int time)
+{
+	int i, delta, match = 0, min_delta = INT_MAX;
+
+	for (i = 0; i < ARRAY_SIZE(s->timer_val); i++) {
+		delta = time - s->timer_val[i];
+		if (delta < 0)
+			delta = -delta;
+		if (delta < min_delta) {
+			min_delta = delta;
+			match = i;
+		}
+	}
+	return match;
+}
+
+static int
+csio_closest_thresh(struct csio_sge *s, int cnt)
+{
+	int i, delta, match = 0, min_delta = INT_MAX;
+
+	for (i = 0; i < ARRAY_SIZE(s->counter_val); i++) {
+		delta = cnt - s->counter_val[i];
+		if (delta < 0)
+			delta = -delta;
+		if (delta < min_delta) {
+			min_delta = delta;
+			match = i;
+		}
+	}
+	return match;
+}
+
+static void
+csio_wr_fixup_host_params(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	uint32_t clsz = L1_CACHE_BYTES;
+	uint32_t s_hps = PAGE_SHIFT - 10;
+	uint32_t ingpad = 0;
+	uint32_t stat_len = clsz > 64 ? 128 : 64;
+
+	csio_wr_reg32(hw, HOSTPAGESIZEPF0(s_hps) | HOSTPAGESIZEPF1(s_hps) |
+		      HOSTPAGESIZEPF2(s_hps) | HOSTPAGESIZEPF3(s_hps) |
+		      HOSTPAGESIZEPF4(s_hps) | HOSTPAGESIZEPF5(s_hps) |
+		      HOSTPAGESIZEPF6(s_hps) | HOSTPAGESIZEPF7(s_hps),
+		      SGE_HOST_PAGE_SIZE);
+
+	sge->csio_fl_align = clsz < 32 ? 32 : clsz;
+	ingpad = ilog2(sge->csio_fl_align) - 5;
+
+	csio_set_reg_field(hw, SGE_CONTROL, INGPADBOUNDARY_MASK |
+					    EGRSTATUSPAGESIZE(1),
+			   INGPADBOUNDARY(ingpad) |
+			   EGRSTATUSPAGESIZE(stat_len != 64));
+
+	/* FL BUFFER SIZE#0 is Page size i,e already aligned to cache line */
+	csio_wr_reg32(hw, PAGE_SIZE, SGE_FL_BUFFER_SIZE0);
+	csio_wr_reg32(hw,
+		      (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE2) +
+		      sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
+		      SGE_FL_BUFFER_SIZE2);
+	csio_wr_reg32(hw,
+		      (csio_rd_reg32(hw, SGE_FL_BUFFER_SIZE3) +
+		      sge->csio_fl_align - 1) & ~(sge->csio_fl_align - 1),
+		      SGE_FL_BUFFER_SIZE3);
+
+	csio_wr_reg32(hw, HPZ0(PAGE_SHIFT - 12), ULP_RX_TDDP_PSZ);
+
+	/* default value of rx_dma_offset of the NIC driver */
+	csio_set_reg_field(hw, SGE_CONTROL, PKTSHIFT_MASK,
+			   PKTSHIFT(CSIO_SGE_RX_DMA_OFFSET));
+}
+
+static void
+csio_init_intr_coalesce_parms(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+
+	csio_sge_thresh_reg = csio_closest_thresh(sge, csio_intr_coalesce_cnt);
+	if (csio_intr_coalesce_cnt) {
+		csio_sge_thresh_reg = 0;
+		csio_sge_timer_reg = X_TIMERREG_RESTART_COUNTER;
+		return;
+	}
+
+	csio_sge_timer_reg = csio_closest_timer(sge, csio_intr_coalesce_time);
+}
+
+/*
+ * csio_wr_get_sge - Get SGE register values.
+ * @hw: HW module.
+ *
+ * Used by non-master functions and by master-functions relying on config file.
+ */
+static void
+csio_wr_get_sge(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	uint32_t ingpad;
+	int i;
+	u32 timer_value_0_and_1, timer_value_2_and_3, timer_value_4_and_5;
+	u32 ingress_rx_threshold;
+
+	sge->sge_control = csio_rd_reg32(hw, SGE_CONTROL);
+
+	ingpad = INGPADBOUNDARY_GET(sge->sge_control);
+
+	switch (ingpad) {
+	case X_INGPCIEBOUNDARY_32B:
+		sge->csio_fl_align = 32; break;
+	case X_INGPCIEBOUNDARY_64B:
+		sge->csio_fl_align = 64; break;
+	case X_INGPCIEBOUNDARY_128B:
+		sge->csio_fl_align = 128; break;
+	case X_INGPCIEBOUNDARY_256B:
+		sge->csio_fl_align = 256; break;
+	case X_INGPCIEBOUNDARY_512B:
+		sge->csio_fl_align = 512; break;
+	case X_INGPCIEBOUNDARY_1024B:
+		sge->csio_fl_align = 1024; break;
+	case X_INGPCIEBOUNDARY_2048B:
+		sge->csio_fl_align = 2048; break;
+	case X_INGPCIEBOUNDARY_4096B:
+		sge->csio_fl_align = 4096; break;
+	}
+
+	for (i = 0; i < CSIO_SGE_FL_SIZE_REGS; i++)
+		csio_get_flbuf_size(hw, sge, i);
+
+	timer_value_0_and_1 = csio_rd_reg32(hw, SGE_TIMER_VALUE_0_AND_1);
+	timer_value_2_and_3 = csio_rd_reg32(hw, SGE_TIMER_VALUE_2_AND_3);
+	timer_value_4_and_5 = csio_rd_reg32(hw, SGE_TIMER_VALUE_4_AND_5);
+
+	sge->timer_val[0] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE0_GET(timer_value_0_and_1));
+	sge->timer_val[1] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE1_GET(timer_value_0_and_1));
+	sge->timer_val[2] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE2_GET(timer_value_2_and_3));
+	sge->timer_val[3] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE3_GET(timer_value_2_and_3));
+	sge->timer_val[4] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE4_GET(timer_value_4_and_5));
+	sge->timer_val[5] = (uint16_t)csio_core_ticks_to_us(hw,
+					TIMERVALUE5_GET(timer_value_4_and_5));
+
+	ingress_rx_threshold = csio_rd_reg32(hw, SGE_INGRESS_RX_THRESHOLD);
+	sge->counter_val[0] = THRESHOLD_0_GET(ingress_rx_threshold);
+	sge->counter_val[1] = THRESHOLD_1_GET(ingress_rx_threshold);
+	sge->counter_val[2] = THRESHOLD_2_GET(ingress_rx_threshold);
+	sge->counter_val[3] = THRESHOLD_3_GET(ingress_rx_threshold);
+
+	csio_init_intr_coalesce_parms(hw);
+}
+
+/*
+ * csio_wr_set_sge - Initialize SGE registers
+ * @hw: HW module.
+ *
+ * Used by Master function to initialize SGE registers in the absence
+ * of a config file.
+ */
+static void
+csio_wr_set_sge(struct csio_hw *hw)
+{
+	struct csio_wrm *wrm = csio_hw_to_wrm(hw);
+	struct csio_sge *sge = &wrm->sge;
+	int i;
+
+	/*
+	 * Set up our basic SGE mode to deliver CPL messages to our Ingress
+	 * Queue and Packet Date to the Free List.
+	 */
+	csio_set_reg_field(hw, SGE_CONTROL, RXPKTCPLMODE, RXPKTCPLMODE);
+
+	sge->sge_control = csio_rd_reg32(hw, SGE_CONTROL);
+
+	/* sge->csio_fl_align is set up by csio_wr_fixup_host_params(). */
+
+	/*
+	 * Set up to drop DOORBELL writes when the DOORBELL FIFO overflows
+	 * and generate an interrupt when this occurs so we can recover.
+	 */
+	csio_set_reg_field(hw, SGE_DBFIFO_STATUS,
+			   HP_INT_THRESH(HP_INT_THRESH_MASK) |
+			   LP_INT_THRESH(LP_INT_THRESH_MASK),
+			   HP_INT_THRESH(CSIO_SGE_DBFIFO_INT_THRESH) |
+			   LP_INT_THRESH(CSIO_SGE_DBFIFO_INT_THRESH));
+	csio_set_reg_field(hw, SGE_DOORBELL_CONTROL, ENABLE_DROP,
+			   ENABLE_DROP);
+
+	/* SGE_FL_BUFFER_SIZE0 is set up by csio_wr_fixup_host_params(). */
+
+	CSIO_SET_FLBUF_SIZE(hw, 1, CSIO_SGE_FLBUF_SIZE1);
+	CSIO_SET_FLBUF_SIZE(hw, 2, CSIO_SGE_FLBUF_SIZE2);
+	CSIO_SET_FLBUF_SIZE(hw, 3, CSIO_SGE_FLBUF_SIZE3);
+	CSIO_SET_FLBUF_SIZE(hw, 4, CSIO_SGE_FLBUF_SIZE4);
+	CSIO_SET_FLBUF_SIZE(hw, 5, CSIO_SGE_FLBUF_SIZE5);
+	CSIO_SET_FLBUF_SIZE(hw, 6, CSIO_SGE_FLBUF_SIZE6);
+	CSIO_SET_FLBUF_SIZE(hw, 7, CSIO_SGE_FLBUF_SIZE7);
+	CSIO_SET_FLBUF_SIZE(hw, 8, CSIO_SGE_FLBUF_SIZE8);
+
+	for (i = 0; i < CSIO_SGE_FL_SIZE_REGS; i++)
+		csio_get_flbuf_size(hw, sge, i);
+
+	/* Initialize interrupt coalescing attributes */
+	sge->timer_val[0] = CSIO_SGE_TIMER_VAL_0;
+	sge->timer_val[1] = CSIO_SGE_TIMER_VAL_1;
+	sge->timer_val[2] = CSIO_SGE_TIMER_VAL_2;
+	sge->timer_val[3] = CSIO_SGE_TIMER_VAL_3;
+	sge->timer_val[4] = CSIO_SGE_TIMER_VAL_4;
+	sge->timer_val[5] = CSIO_SGE_TIMER_VAL_5;
+
+	sge->counter_val[0] = CSIO_SGE_INT_CNT_VAL_0;
+	sge->counter_val[1] = CSIO_SGE_INT_CNT_VAL_1;
+	sge->counter_val[2] = CSIO_SGE_INT_CNT_VAL_2;
+	sge->counter_val[3] = CSIO_SGE_INT_CNT_VAL_3;
+
+	csio_wr_reg32(hw, THRESHOLD_0(sge->counter_val[0]) |
+		      THRESHOLD_1(sge->counter_val[1]) |
+		      THRESHOLD_2(sge->counter_val[2]) |
+		      THRESHOLD_3(sge->counter_val[3]),
+		      SGE_INGRESS_RX_THRESHOLD);
+
+	csio_wr_reg32(hw,
+		   TIMERVALUE0(csio_us_to_core_ticks(hw, sge->timer_val[0])) |
+		   TIMERVALUE1(csio_us_to_core_ticks(hw, sge->timer_val[1])),
+		   SGE_TIMER_VALUE_0_AND_1);
+
+	csio_wr_reg32(hw,
+		   TIMERVALUE2(csio_us_to_core_ticks(hw, sge->timer_val[2])) |
+		   TIMERVALUE3(csio_us_to_core_ticks(hw, sge->timer_val[3])),
+		   SGE_TIMER_VALUE_2_AND_3);
+
+	csio_wr_reg32(hw,
+		   TIMERVALUE4(csio_us_to_core_ticks(hw, sge->timer_val[4])) |
+		   TIMERVALUE5(csio_us_to_core_ticks(hw, sge->timer_val[5])),
+		   SGE_TIMER_VALUE_4_AND_5);
+
+	csio_init_intr_coalesce_parms(hw);
+}
+
+void
+csio_wr_sge_init(struct csio_hw *hw)
+{
+	/*
+	 * If we are master:
+	 *    - If we plan to use the config file, we need to fixup some
+	 *      host specific registers, and read the rest of the SGE
+	 *      configuration.
+	 *    - If we dont plan to use the config file, we need to initialize
+	 *      SGE entirely, including fixing the host specific registers.
+	 * If we arent the master, we are only allowed to read and work off of
+	 *      the already initialized SGE values.
+	 *
+	 * Therefore, before calling this function, we assume that the master-
+	 * ship of the card, and whether to use config file or not, have
+	 * already been decided. In other words, CSIO_HWF_USING_SOFT_PARAMS and
+	 * CSIO_HWF_MASTER should be set/unset.
+	 */
+	if (csio_is_hw_master(hw)) {
+		csio_wr_fixup_host_params(hw);
+
+		if (hw->flags & CSIO_HWF_USING_SOFT_PARAMS)
+			csio_wr_get_sge(hw);
+		else
+			csio_wr_set_sge(hw);
+	} else
+		csio_wr_get_sge(hw);
+}
+
+/*
+ * csio_wrm_init - Initialize Work request module.
+ * @wrm: WR module
+ * @hw: HW pointer
+ *
+ * Allocates memory for an array of queue pointers starting at q_arr.
+ */
+int
+csio_wrm_init(struct csio_wrm *wrm, struct csio_hw *hw)
+{
+	int i;
+
+	if (!wrm->num_q) {
+		csio_err(hw, "Num queues is not set\n");
+		return -EINVAL;
+	}
+
+	wrm->q_arr = kzalloc(sizeof(struct csio_q *) * wrm->num_q, GFP_KERNEL);
+	if (!wrm->q_arr)
+		goto err;
+
+	for (i = 0; i < wrm->num_q; i++) {
+		wrm->q_arr[i] = kzalloc(sizeof(struct csio_q), GFP_KERNEL);
+		if (!wrm->q_arr[i]) {
+			while (--i >= 0)
+				kfree(wrm->q_arr[i]);
+			goto err_free_arr;
+		}
+	}
+	wrm->free_qidx	= 0;
+
+	return 0;
+
+err_free_arr:
+	kfree(wrm->q_arr);
+err:
+	return -ENOMEM;
+}
+
+/*
+ * csio_wrm_exit - Initialize Work request module.
+ * @wrm: WR module
+ * @hw: HW module
+ *
+ * Uninitialize WR module. Free q_arr and pointers in it.
+ * We have the additional job of freeing the DMA memory associated
+ * with the queues.
+ */
+void
+csio_wrm_exit(struct csio_wrm *wrm, struct csio_hw *hw)
+{
+	int i;
+	uint32_t j;
+	struct csio_q *q;
+	struct csio_dma_buf *buf;
+
+	for (i = 0; i < wrm->num_q; i++) {
+		q = wrm->q_arr[i];
+
+		if (wrm->free_qidx && (i < wrm->free_qidx)) {
+			if (q->type == CSIO_FREELIST) {
+				if (!q->un.fl.bufs)
+					continue;
+				for (j = 0; j < q->credits; j++) {
+					buf = &q->un.fl.bufs[j];
+					if (!buf->vaddr)
+						continue;
+					pci_free_consistent(hw->pdev, buf->len,
+							    buf->vaddr,
+							    buf->paddr);
+				}
+				kfree(q->un.fl.bufs);
+			}
+			pci_free_consistent(hw->pdev, q->size,
+					    q->vstart, q->pstart);
+		}
+		kfree(q);
+	}
+
+	hw->flags &= ~CSIO_HWF_Q_MEM_ALLOCED;
+
+	kfree(wrm->q_arr);
+}
-- 
1.7.1


^ permalink raw reply related

* [V4 PATCH 3/8] csiostor: Chelsio FCoE offload driver submission (headers part 2).
From: Naresh Kumar Inna @ 2012-09-12 17:18 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1347470328-32490-1-git-send-email-naresh@chelsio.com>

This patch contains the second set of the header files for csiostor driver.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
 drivers/scsi/csiostor/csio_lnode.h |  255 ++++++++++++++++++
 drivers/scsi/csiostor/csio_mb.h    |  278 +++++++++++++++++++
 drivers/scsi/csiostor/csio_rnode.h |  141 ++++++++++
 drivers/scsi/csiostor/csio_scsi.h  |  342 ++++++++++++++++++++++++
 drivers/scsi/csiostor/csio_wr.h    |  512 ++++++++++++++++++++++++++++++++++++
 5 files changed, 1528 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_lnode.h
 create mode 100644 drivers/scsi/csiostor/csio_mb.h
 create mode 100644 drivers/scsi/csiostor/csio_rnode.h
 create mode 100644 drivers/scsi/csiostor/csio_scsi.h
 create mode 100644 drivers/scsi/csiostor/csio_wr.h

diff --git a/drivers/scsi/csiostor/csio_lnode.h b/drivers/scsi/csiostor/csio_lnode.h
new file mode 100644
index 0000000..8d84988
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_lnode.h
@@ -0,0 +1,255 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_LNODE_H__
+#define __CSIO_LNODE_H__
+
+#include <linux/kref.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+#include <scsi/fc/fc_els.h>
+
+
+#include "csio_defs.h"
+#include "csio_hw.h"
+
+#define CSIO_FCOE_MAX_NPIV	128
+#define CSIO_FCOE_MAX_RNODES	2048
+
+/* FDMI port attribute unknown speed */
+#define CSIO_HBA_PORTSPEED_UNKNOWN	0x8000
+
+extern int csio_fcoe_rnodes;
+extern int csio_fdmi_enable;
+
+/* State machine evets */
+enum csio_ln_ev {
+	CSIO_LNE_NONE = (uint32_t)0,
+	CSIO_LNE_LINKUP,
+	CSIO_LNE_FAB_INIT_DONE,
+	CSIO_LNE_LINK_DOWN,
+	CSIO_LNE_DOWN_LINK,
+	CSIO_LNE_LOGO,
+	CSIO_LNE_CLOSE,
+	CSIO_LNE_MAX_EVENT,
+};
+
+
+struct csio_fcf_info {
+	struct list_head	list;
+	uint8_t			priority;
+	uint8_t			mac[6];
+	uint8_t			name_id[8];
+	uint8_t			fabric[8];
+	uint16_t		vf_id;
+	uint8_t			vlan_id;
+	uint16_t		max_fcoe_size;
+	uint8_t			fc_map[3];
+	uint32_t		fka_adv;
+	uint32_t		fcfi;
+	uint8_t			get_next:1;
+	uint8_t			link_aff:1;
+	uint8_t			fpma:1;
+	uint8_t			spma:1;
+	uint8_t			login:1;
+	uint8_t			portid;
+	uint8_t			spma_mac[6];
+	struct kref		kref;
+};
+
+/* Defines for flags */
+#define	CSIO_LNF_FIPSUPP		0x00000001	/* Fip Supported */
+#define	CSIO_LNF_NPIVSUPP		0x00000002	/* NPIV supported */
+#define CSIO_LNF_LINK_ENABLE		0x00000004	/* Link enabled */
+#define	CSIO_LNF_FDMI_ENABLE		0x00000008	/* FDMI support */
+
+/* Transport events */
+enum csio_ln_fc_evt {
+	CSIO_LN_FC_LINKUP = 1,
+	CSIO_LN_FC_LINKDOWN,
+	CSIO_LN_FC_RSCN,
+	CSIO_LN_FC_ATTRIB_UPDATE,
+};
+
+/* Lnode stats */
+struct csio_lnode_stats {
+	uint32_t	n_link_up;	/* Link down */
+	uint32_t	n_link_down;	/* Link up */
+	uint32_t	n_err;		/* error */
+	uint32_t	n_err_nomem;	/* memory not available */
+	uint32_t	n_inval_parm;   /* Invalid parameters */
+	uint32_t	n_evt_unexp;	/* unexpected event */
+	uint32_t	n_evt_drop;	/* dropped event */
+	uint32_t	n_rnode_match;  /* matched rnode */
+	uint32_t	n_dev_loss_tmo; /* Device loss timeout */
+	uint32_t	n_fdmi_err;	/* fdmi err */
+	uint32_t	n_evt_fw[RSCN_DEV_LOST];	/* fw events */
+	enum csio_ln_ev	n_evt_sm[CSIO_LNE_MAX_EVENT];	/* State m/c events */
+	uint32_t	n_rnode_alloc;	/* rnode allocated */
+	uint32_t	n_rnode_free;	/* rnode freed */
+	uint32_t	n_rnode_nomem;	/* rnode alloc failure */
+	uint32_t        n_input_requests; /* Input Requests */
+	uint32_t        n_output_requests; /* Output Requests */
+	uint32_t        n_control_requests; /* Control Requests */
+	uint32_t        n_input_bytes; /* Input Bytes */
+	uint32_t        n_output_bytes; /* Output Bytes */
+	uint32_t	rsvd1;
+};
+
+/* Common Lnode params */
+struct csio_lnode_params {
+	uint32_t	ra_tov;
+	uint32_t	fcfi;
+	uint32_t	log_level;	/* Module level for debugging */
+};
+
+struct csio_service_parms {
+	struct fc_els_csp	csp;		/* Common service parms */
+	uint8_t			wwpn[8];	/* WWPN */
+	uint8_t			wwnn[8];	/* WWNN */
+	struct fc_els_cssp	clsp[4];	/* Class service params */
+	uint8_t			vvl[16];	/* Vendor version level */
+};
+
+/* Lnode */
+struct csio_lnode {
+	struct csio_sm		sm;		/* State machine + sibling
+						 * lnode list.
+						 */
+	struct csio_hw		*hwp;		/* Pointer to the HW module */
+	uint8_t			portid;		/* Port ID */
+	uint8_t			rsvd1;
+	uint16_t		rsvd2;
+	uint32_t		dev_num;	/* Device number */
+	uint32_t		flags;		/* Flags */
+	struct list_head	fcf_lsthead;	/* FCF entries */
+	struct csio_fcf_info	*fcfinfo;	/* FCF in use */
+	struct csio_ioreq	*mgmt_req;	/* MGMT request */
+
+	/* FCoE identifiers */
+	uint8_t			mac[6];
+	uint32_t		nport_id;
+	struct csio_service_parms ln_sparm;	/* Service parms */
+
+	/* Firmware identifiers */
+	uint32_t		fcf_flowid;	/*fcf flowid */
+	uint32_t		vnp_flowid;
+	uint16_t		ssn_cnt;	/* Registered Session */
+	uint8_t			cur_evt;	/* Current event */
+	uint8_t			prev_evt;	/* Previous event */
+
+	/* Children */
+	struct list_head	cln_head;	/* Head of the children lnode
+						 * list.
+						 */
+	uint32_t		num_vports;	/* Total NPIV/children LNodes*/
+	struct csio_lnode	*pln;		/* Parent lnode of child
+						 * lnodes.
+						 */
+	struct list_head	cmpl_q;		/* Pending I/Os on this lnode */
+
+	/* Remote node information */
+	struct list_head	rnhead;		/* Head of rnode list */
+	uint32_t		num_reg_rnodes;	/* Number of rnodes registered
+						 * with the host.
+						 */
+	uint32_t		n_scsi_tgts;	/* Number of scsi targets
+						 * found
+						 */
+	uint32_t		last_scan_ntgts;/* Number of scsi targets
+						 * found per last scan.
+						 */
+	uint32_t		tgt_scan_tick;	/* timer started after
+						 * new tgt found
+						 */
+	/* FC transport data */
+	struct fc_vport		*fc_vport;
+	struct fc_host_statistics fch_stats;
+
+	struct csio_lnode_stats stats;		/* Common lnode stats */
+	struct csio_lnode_params params;	/* Common lnode params */
+};
+
+#define	csio_lnode_to_hw(ln)	((ln)->hwp)
+#define csio_root_lnode(ln)	(csio_lnode_to_hw((ln))->rln)
+#define csio_parent_lnode(ln)	((ln)->pln)
+#define	csio_ln_flowid(ln)	((ln)->vnp_flowid)
+#define csio_ln_wwpn(ln)	((ln)->ln_sparm.wwpn)
+#define csio_ln_wwnn(ln)	((ln)->ln_sparm.wwnn)
+
+#define csio_is_root_ln(ln)	(((ln) == csio_root_lnode((ln))) ? 1 : 0)
+#define csio_is_phys_ln(ln)	(((ln)->pln == NULL) ? 1 : 0)
+#define csio_is_npiv_ln(ln)	(((ln)->pln != NULL) ? 1 : 0)
+
+
+#define csio_ln_dbg(_ln, _fmt, ...)	\
+	csio_dbg(_ln->hwp, "%x:%x "_fmt, CSIO_DEVID_HI(_ln), \
+		 CSIO_DEVID_LO(_ln), ##__VA_ARGS__);
+
+#define csio_ln_err(_ln, _fmt, ...)	\
+	csio_err(_ln->hwp, "%x:%x "_fmt, CSIO_DEVID_HI(_ln), \
+		 CSIO_DEVID_LO(_ln), ##__VA_ARGS__);
+
+#define csio_ln_warn(_ln, _fmt, ...)	\
+	csio_warn(_ln->hwp, "%x:%x "_fmt, CSIO_DEVID_HI(_ln), \
+		 CSIO_DEVID_LO(_ln), ##__VA_ARGS__);
+
+/* HW->Lnode notifications */
+enum csio_ln_notify {
+	CSIO_LN_NOTIFY_HWREADY = 1,
+	CSIO_LN_NOTIFY_HWSTOP,
+	CSIO_LN_NOTIFY_HWREMOVE,
+	CSIO_LN_NOTIFY_HWRESET,
+};
+
+void csio_fcoe_fwevt_handler(struct csio_hw *,  __u8 cpl_op, __be64 *);
+int csio_is_lnode_ready(struct csio_lnode *);
+void csio_lnode_state_to_str(struct csio_lnode *ln, int8_t *str);
+struct csio_lnode *csio_lnode_lookup_by_wwpn(struct csio_hw *, uint8_t *);
+int csio_get_phy_port_stats(struct csio_hw *, uint8_t ,
+				      struct fw_fcoe_port_stats *);
+int csio_scan_done(struct csio_lnode *, unsigned long, unsigned long,
+		   unsigned long, unsigned long);
+void csio_notify_lnodes(struct csio_hw *, enum csio_ln_notify);
+void csio_disable_lnodes(struct csio_hw *, uint8_t, bool);
+void csio_lnode_async_event(struct csio_lnode *, enum csio_ln_fc_evt);
+int csio_ln_fdmi_start(struct csio_lnode *, void *);
+int csio_lnode_start(struct csio_lnode *);
+void csio_lnode_stop(struct csio_lnode *);
+void csio_lnode_close(struct csio_lnode *);
+int csio_lnode_init(struct csio_lnode *, struct csio_hw *,
+			      struct csio_lnode *);
+void csio_lnode_exit(struct csio_lnode *);
+
+#endif /* ifndef __CSIO_LNODE_H__ */
diff --git a/drivers/scsi/csiostor/csio_mb.h b/drivers/scsi/csiostor/csio_mb.h
new file mode 100644
index 0000000..1788ea5
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_mb.h
@@ -0,0 +1,278 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_MB_H__
+#define __CSIO_MB_H__
+
+#include <linux/timer.h>
+#include <linux/completion.h>
+
+#include "t4fw_api.h"
+#include "t4fw_api_stor.h"
+#include "csio_defs.h"
+
+#define CSIO_STATS_OFFSET (2)
+#define CSIO_NUM_STATS_PER_MB (6)
+
+struct fw_fcoe_port_cmd_params {
+	uint8_t		portid;
+	uint8_t		idx;
+	uint8_t		nstats;
+};
+
+#define CSIO_DUMP_MB(__hw, __num, __mb)					\
+	csio_dbg(__hw, "\t%llx %llx %llx %llx %llx %llx %llx %llx\n",	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb),		\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 8),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 16),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 24),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 32),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 40),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 48),	\
+		(unsigned long long)csio_rd_reg64(__hw, __mb + 56))
+
+#define CSIO_MB_MAX_REGS	8
+#define CSIO_MAX_MB_SIZE	64
+#define CSIO_MB_POLL_FREQ	5		/*  5 ms */
+#define CSIO_MB_DEFAULT_TMO	FW_CMD_MAX_TIMEOUT
+
+/* Device master in HELLO command */
+enum csio_dev_master { CSIO_MASTER_CANT, CSIO_MASTER_MAY, CSIO_MASTER_MUST };
+
+enum csio_mb_owner { CSIO_MBOWNER_NONE, CSIO_MBOWNER_FW, CSIO_MBOWNER_PL };
+
+enum csio_dev_state {
+	CSIO_DEV_STATE_UNINIT,
+	CSIO_DEV_STATE_INIT,
+	CSIO_DEV_STATE_ERR
+};
+
+#define FW_PARAM_DEV(param) \
+	(FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
+	 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
+
+#define FW_PARAM_PFVF(param) \
+	(FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
+	 FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param)|  \
+	 FW_PARAMS_PARAM_Y(0) | \
+	 FW_PARAMS_PARAM_Z(0))
+
+enum {
+	PAUSE_RX      = 1 << 0,
+	PAUSE_TX      = 1 << 1,
+	PAUSE_AUTONEG = 1 << 2
+};
+
+#define CSIO_INIT_MBP(__mbp, __cp,  __tmo, __priv, __fn, __clear)	\
+do {									\
+	if (__clear)							\
+		memset((__cp), 0,					\
+			    CSIO_MB_MAX_REGS * sizeof(__be64));		\
+	INIT_LIST_HEAD(&(__mbp)->list);					\
+	(__mbp)->tmo		= (__tmo);				\
+	(__mbp)->priv		= (void *)(__priv);			\
+	(__mbp)->mb_cbfn	= (__fn);				\
+	(__mbp)->mb_size	= sizeof(*(__cp));			\
+} while (0)
+
+struct csio_mbm_stats {
+	uint32_t	n_req;		/* number of mbox req */
+	uint32_t	n_rsp;		/* number of mbox rsp */
+	uint32_t	n_activeq;	/* number of mbox req active Q */
+	uint32_t	n_cbfnq;	/* number of mbox req cbfn Q */
+	uint32_t	n_tmo;		/* number of mbox timeout */
+	uint32_t	n_cancel;	/* number of mbox cancel */
+	uint32_t	n_err;		/* number of mbox error */
+};
+
+/* Driver version of Mailbox */
+struct csio_mb {
+	struct list_head	list;			/* for req/resp */
+							/* queue in driver */
+	__be64			mb[CSIO_MB_MAX_REGS];	/* MB in HW format */
+	int			mb_size;		/* Size of this
+							 * mailbox.
+							 */
+	uint32_t		tmo;			/* Timeout */
+	struct completion	cmplobj;		/* MB Completion
+							 * object
+							 */
+	void			(*mb_cbfn) (struct csio_hw *, struct csio_mb *);
+							/* Callback fn */
+	void			*priv;			/* Owner private ptr */
+};
+
+struct csio_mbm {
+	uint32_t		a_mbox;			/* Async mbox num */
+	uint32_t		intr_idx;		/* Interrupt index */
+	struct timer_list	timer;			/* Mbox timer */
+	struct list_head	req_q;			/* Mbox request queue */
+	struct list_head	cbfn_q;			/* Mbox completion q */
+	struct csio_mb		*mcurrent;		/* Current mailbox */
+	uint32_t		req_q_cnt;		/* Outstanding mbox
+							 * cmds
+							 */
+	struct csio_mbm_stats	stats;			/* Statistics */
+};
+
+#define csio_set_mb_intr_idx(_m, _i)	((_m)->intr_idx = (_i))
+#define csio_get_mb_intr_idx(_m)	((_m)->intr_idx)
+
+struct csio_iq_params;
+struct csio_eq_params;
+
+enum fw_retval csio_mb_fw_retval(struct csio_mb *);
+
+/* MB helpers */
+void csio_mb_hello(struct csio_hw *, struct csio_mb *, uint32_t,
+		   uint32_t, uint32_t, enum csio_dev_master,
+		   void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_process_hello_rsp(struct csio_hw *, struct csio_mb *,
+			       enum fw_retval *, enum csio_dev_state *,
+			       uint8_t *);
+
+void csio_mb_bye(struct csio_hw *, struct csio_mb *, uint32_t,
+		 void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_reset(struct csio_hw *, struct csio_mb *, uint32_t, int, int,
+		   void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_params(struct csio_hw *, struct csio_mb *, uint32_t, unsigned int,
+		    unsigned int, unsigned int, const u32 *, u32 *, bool,
+		    void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_process_read_params_rsp(struct csio_hw *, struct csio_mb *,
+				enum fw_retval *, unsigned int , u32 *);
+
+void csio_mb_ldst(struct csio_hw *hw, struct csio_mb *mbp, uint32_t tmo,
+		  int reg);
+
+void csio_mb_caps_config(struct csio_hw *, struct csio_mb *, uint32_t,
+			    bool, bool, bool, bool,
+			    void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_rss_glb_config(struct csio_hw *, struct csio_mb *,
+			 uint32_t, uint8_t, unsigned int,
+			 void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_pfvf(struct csio_hw *, struct csio_mb *, uint32_t,
+		  unsigned int, unsigned int, unsigned int,
+		  unsigned int, unsigned int, unsigned int,
+		  unsigned int, unsigned int, unsigned int,
+		  unsigned int, unsigned int, unsigned int,
+		  unsigned int, void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_port(struct csio_hw *, struct csio_mb *, uint32_t,
+		  uint8_t, bool, uint32_t, uint16_t,
+		  void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_process_read_port_rsp(struct csio_hw *, struct csio_mb *,
+				   enum fw_retval *, uint16_t *);
+
+void csio_mb_initialize(struct csio_hw *, struct csio_mb *, uint32_t,
+			void (*)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_iq_alloc_write(struct csio_hw *, struct csio_mb *, void *,
+			uint32_t, struct csio_iq_params *,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_iq_alloc_write_rsp(struct csio_hw *, struct csio_mb *,
+				enum fw_retval *, struct csio_iq_params *);
+
+void csio_mb_iq_free(struct csio_hw *, struct csio_mb *, void *,
+		     uint32_t, struct csio_iq_params *,
+		     void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_eq_ofld_alloc_write(struct csio_hw *, struct csio_mb *, void *,
+				 uint32_t, struct csio_eq_params *,
+				 void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_mb_eq_ofld_alloc_write_rsp(struct csio_hw *, struct csio_mb *,
+				     enum fw_retval *, struct csio_eq_params *);
+
+void csio_mb_eq_ofld_free(struct csio_hw *, struct csio_mb *, void *,
+			  uint32_t , struct csio_eq_params *,
+			  void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_read_res_info_init_mb(struct csio_hw *, struct csio_mb *,
+			uint32_t,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_write_fcoe_link_cond_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t, uint8_t, uint32_t, uint8_t, bool, uint32_t,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_vnp_alloc_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t, uint32_t , uint32_t , uint16_t,
+			uint8_t [8], uint8_t [8],
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_vnp_read_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t, uint32_t , uint32_t ,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_vnp_free_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t , uint32_t, uint32_t ,
+			void (*) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_read_fcf_init_mb(struct csio_lnode *, struct csio_mb *,
+			uint32_t, uint32_t, uint32_t,
+			void (*cbfn) (struct csio_hw *, struct csio_mb *));
+
+void csio_fcoe_read_portparams_init_mb(struct csio_hw *hw,
+			struct csio_mb *mbp, uint32_t mb_tmo,
+			struct fw_fcoe_port_cmd_params *portparams,
+			void (*cbfn)(struct csio_hw *, struct csio_mb *));
+
+void csio_mb_process_portparams_rsp(struct csio_hw *hw, struct csio_mb *mbp,
+				enum fw_retval *retval,
+				struct fw_fcoe_port_cmd_params *portparams,
+				struct fw_fcoe_port_stats *portstats);
+
+/* MB module functions */
+int csio_mbm_init(struct csio_mbm *, struct csio_hw *,
+			    void (*)(uintptr_t));
+void csio_mbm_exit(struct csio_mbm *);
+void csio_mb_intr_enable(struct csio_hw *);
+void csio_mb_intr_disable(struct csio_hw *);
+
+int csio_mb_issue(struct csio_hw *, struct csio_mb *);
+void csio_mb_completions(struct csio_hw *, struct list_head *);
+int csio_mb_fwevt_handler(struct csio_hw *, __be64 *);
+int csio_mb_isr_handler(struct csio_hw *);
+struct csio_mb *csio_mb_tmo_handler(struct csio_hw *);
+void csio_mb_cancel_all(struct csio_hw *, struct list_head *);
+
+#endif /* ifndef __CSIO_MB_H__ */
diff --git a/drivers/scsi/csiostor/csio_rnode.h b/drivers/scsi/csiostor/csio_rnode.h
new file mode 100644
index 0000000..a3b434c
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_rnode.h
@@ -0,0 +1,141 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_RNODE_H__
+#define __CSIO_RNODE_H__
+
+#include "csio_defs.h"
+
+/* State machine evets */
+enum csio_rn_ev {
+	CSIO_RNFE_NONE = (uint32_t)0,			/* None */
+	CSIO_RNFE_LOGGED_IN,				/* [N/F]Port login
+							 * complete.
+							 */
+	CSIO_RNFE_PRLI_DONE,				/* PRLI completed */
+	CSIO_RNFE_PLOGI_RECV,				/* Received PLOGI */
+	CSIO_RNFE_PRLI_RECV,				/* Received PLOGI */
+	CSIO_RNFE_LOGO_RECV,				/* Received LOGO */
+	CSIO_RNFE_PRLO_RECV,				/* Received PRLO */
+	CSIO_RNFE_DOWN,					/* Rnode is down */
+	CSIO_RNFE_CLOSE,				/* Close rnode */
+	CSIO_RNFE_NAME_MISSING,				/* Rnode name missing
+							 * in name server.
+							 */
+	CSIO_RNFE_MAX_EVENT,
+};
+
+/* rnode stats */
+struct csio_rnode_stats {
+	uint32_t	n_err;		/* error */
+	uint32_t	n_err_inval;	/* invalid parameter */
+	uint32_t	n_err_nomem;	/* error nomem */
+	uint32_t	n_evt_unexp;	/* unexpected event */
+	uint32_t	n_evt_drop;	/* unexpected event */
+	uint32_t	n_evt_fw[RSCN_DEV_LOST];	/* fw events */
+	enum csio_rn_ev	n_evt_sm[CSIO_RNFE_MAX_EVENT];	/* State m/c events */
+	uint32_t	n_lun_rst;	/* Number of resets of
+					 * of LUNs under this
+					 * target
+					 */
+	uint32_t	n_lun_rst_fail;	/* Number of LUN reset
+					 * failures.
+					 */
+	uint32_t	n_tgt_rst;	/* Number of target resets */
+	uint32_t	n_tgt_rst_fail;	/* Number of target reset
+					 * failures.
+					 */
+};
+
+/* Defines for rnode role */
+#define	CSIO_RNFR_INITIATOR	0x1
+#define	CSIO_RNFR_TARGET	0x2
+#define CSIO_RNFR_FABRIC	0x4
+#define	CSIO_RNFR_NS		0x8
+#define CSIO_RNFR_NPORT		0x10
+
+struct csio_rnode {
+	struct csio_sm		sm;			/* State machine -
+							 * should be the
+							 * 1st member
+							 */
+	struct csio_lnode	*lnp;			/* Pointer to owning
+							 * Lnode */
+	uint32_t		flowid;			/* Firmware ID */
+	struct list_head	host_cmpl_q;		/* SCSI IOs
+							 * pending to completed
+							 * to Mid-layer.
+							 */
+	/* FC identifiers for remote node */
+	uint32_t		nport_id;
+	uint16_t		fcp_flags;		/* FCP Flags */
+	uint8_t			cur_evt;		/* Current event */
+	uint8_t			prev_evt;		/* Previous event */
+	uint32_t		role;			/* Fabric/Target/
+							 * Initiator/NS
+							 */
+	struct fcoe_rdev_entry		*rdev_entry;	/* Rdev entry */
+	struct csio_service_parms	rn_sparm;
+
+	/* FC transport attributes */
+	struct fc_rport		*rport;		/* FC transport rport */
+	uint32_t		supp_classes;	/* Supported FC classes */
+	uint32_t		maxframe_size;	/* Max Frame size */
+	uint32_t		scsi_id;	/* Transport given SCSI id */
+
+	struct csio_rnode_stats	stats;		/* Common rnode stats */
+};
+
+#define csio_rn_flowid(rn)			((rn)->flowid)
+#define csio_rn_wwpn(rn)			((rn)->rn_sparm.wwpn)
+#define csio_rn_wwnn(rn)			((rn)->rn_sparm.wwnn)
+#define csio_rnode_to_lnode(rn)			((rn)->lnp)
+
+int csio_is_rnode_ready(struct csio_rnode *rn);
+void csio_rnode_state_to_str(struct csio_rnode *rn, int8_t *str);
+
+struct csio_rnode *csio_rnode_lookup_portid(struct csio_lnode *, uint32_t);
+struct csio_rnode *csio_confirm_rnode(struct csio_lnode *,
+					  uint32_t, struct fcoe_rdev_entry *);
+
+void csio_rnode_fwevt_handler(struct csio_rnode *rn, uint8_t fwevt);
+
+void csio_put_rnode(struct csio_lnode *ln, struct csio_rnode *rn);
+
+void csio_reg_rnode(struct csio_rnode *);
+void csio_unreg_rnode(struct csio_rnode *);
+
+void csio_rnode_devloss_handler(struct csio_rnode *);
+
+#endif /* ifndef __CSIO_RNODE_H__ */
diff --git a/drivers/scsi/csiostor/csio_scsi.h b/drivers/scsi/csiostor/csio_scsi.h
new file mode 100644
index 0000000..2257c3d
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_scsi.h
@@ -0,0 +1,342 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_SCSI_H__
+#define __CSIO_SCSI_H__
+
+#include <linux/spinlock_types.h>
+#include <linux/completion.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_eh.h>
+#include <scsi/scsi_tcq.h>
+#include <scsi/fc/fc_fcp.h>
+
+#include "csio_defs.h"
+#include "csio_wr.h"
+
+extern struct scsi_host_template csio_fcoe_shost_template;
+extern struct scsi_host_template csio_fcoe_shost_vport_template;
+
+extern int csio_scsi_eqsize;
+extern int csio_scsi_iqlen;
+extern int csio_scsi_ioreqs;
+extern uint32_t csio_max_scan_tmo;
+extern uint32_t csio_delta_scan_tmo;
+extern int csio_lun_qdepth;
+
+/*
+ **************************** NOTE *******************************
+ * How do we calculate MAX FCoE SCSI SGEs? Here is the math:
+ * Max Egress WR size = 512 bytes
+ * One SCSI egress WR has the following fixed no of bytes:
+ *      48 (sizeof(struct fw_scsi_write[read]_wr)) - FW WR
+ *    + 32 (sizeof(struct fc_fcp_cmnd)) - Immediate FCP_CMD
+ *    ------
+ *      80
+ *    ------
+ * That leaves us with 512 - 96 = 432 bytes for data SGE. Using
+ * struct ulptx_sgl header for the SGE consumes:
+ *	- 4 bytes for cmnd_sge.
+ *	- 12 bytes for the first SGL.
+ * That leaves us with 416 bytes for the remaining SGE pairs. Which is
+ * is 416 / 24 (size(struct ulptx_sge_pair)) = 17 SGE pairs,
+ * or 34 SGEs. Adding the first SGE fetches us 35 SGEs.
+ */
+#define CSIO_SCSI_MAX_SGE		35
+#define CSIO_SCSI_ABRT_TMO_MS		60000
+#define CSIO_SCSI_LUNRST_TMO_MS		60000
+#define CSIO_SCSI_TM_POLL_MS		2000	/* should be less than
+						 * all TM timeouts.
+						 */
+#define CSIO_SCSI_IQ_WRSZ		128
+#define CSIO_SCSI_IQSIZE		(csio_scsi_iqlen * CSIO_SCSI_IQ_WRSZ)
+
+#define	CSIO_MAX_SNS_LEN		128
+#define	CSIO_SCSI_RSP_LEN	(FCP_RESP_WITH_EXT + 4 + CSIO_MAX_SNS_LEN)
+
+/* Reference to scsi_cmnd */
+#define csio_scsi_cmnd(req)		((req)->scratch1)
+
+struct csio_scsi_stats {
+	uint64_t		n_tot_success;	/* Total number of good I/Os */
+	uint32_t		n_rn_nr_error;	/* No. of remote-node-not-
+						 * ready errors
+						 */
+	uint32_t		n_hw_nr_error;	/* No. of hw-module-not-
+						 * ready errors
+						 */
+	uint32_t		n_dmamap_error;	/* No. of DMA map erros */
+	uint32_t		n_unsupp_sge_error; /* No. of too-many-SGes
+						     * errors.
+						     */
+	uint32_t		n_no_req_error;	/* No. of Out-of-ioreqs error */
+	uint32_t		n_busy_error;	/* No. of -EBUSY errors */
+	uint32_t		n_hosterror;	/* No. of FW_HOSTERROR I/O */
+	uint32_t		n_rsperror;	/* No. of response errors */
+	uint32_t		n_autosense;	/* No. of auto sense replies */
+	uint32_t		n_ovflerror;	/* No. of overflow errors */
+	uint32_t		n_unflerror;	/* No. of underflow errors */
+	uint32_t		n_rdev_nr_error;/* No. of rdev not
+						 * ready errors
+						 */
+	uint32_t		n_rdev_lost_error;/* No. of rdev lost errors */
+	uint32_t		n_rdev_logo_error;/* No. of rdev logo errors */
+	uint32_t		n_link_down_error;/* No. of link down errors */
+	uint32_t		n_no_xchg_error; /* No. no exchange error */
+	uint32_t		n_unknown_error;/* No. of unhandled errors */
+	uint32_t		n_aborted;	/* No. of aborted I/Os */
+	uint32_t		n_abrt_timedout; /* No. of abort timedouts */
+	uint32_t		n_abrt_fail;	/* No. of abort failures */
+	uint32_t		n_abrt_dups;	/* No. of duplicate aborts */
+	uint32_t		n_abrt_race_comp; /* No. of aborts that raced
+						   * with completions.
+						   */
+	uint32_t		n_abrt_busy_error;/* No. of abort failures
+						   * due to -EBUSY.
+						   */
+	uint32_t		n_closed;	/* No. of closed I/Os */
+	uint32_t		n_cls_busy_error; /* No. of close failures
+						   * due to -EBUSY.
+						   */
+	uint32_t		n_active;	/* No. of IOs in active_q */
+	uint32_t		n_tm_active;	/* No. of TMs in active_q */
+	uint32_t		n_wcbfn;	/* No. of I/Os in worker
+						 * cbfn q
+						 */
+	uint32_t		n_free_ioreq;	/* No. of freelist entries */
+	uint32_t		n_free_ddp;	/* No. of DDP freelist */
+	uint32_t		n_unaligned;	/* No. of Unaligned SGls */
+	uint32_t		n_inval_cplop;	/* No. invalid CPL op's in IQ */
+	uint32_t		n_inval_scsiop;	/* No. invalid scsi op's in IQ*/
+};
+
+struct csio_scsim {
+	struct csio_hw		*hw;		/* Pointer to HW moduel */
+	uint8_t			max_sge;	/* Max SGE */
+	uint8_t			proto_cmd_len;	/* Proto specific SCSI
+						 * cmd length
+						 */
+	uint16_t		proto_rsp_len;	/* Proto specific SCSI
+						 * response length
+						 */
+	spinlock_t		freelist_lock;	/* Lock for ioreq freelist */
+	struct list_head	active_q;	/* Outstanding SCSI I/Os */
+	struct list_head	ioreq_freelist;	/* Free list of ioreq's */
+	struct list_head	ddp_freelist;	/* DDP descriptor freelist */
+	struct csio_scsi_stats	stats;		/* This module's statistics */
+};
+
+/* State machine defines */
+enum csio_scsi_ev {
+	CSIO_SCSIE_START_IO = 1,		/* Start a regular SCSI IO */
+	CSIO_SCSIE_START_TM,			/* Start a TM IO */
+	CSIO_SCSIE_COMPLETED,			/* IO Completed */
+	CSIO_SCSIE_ABORT,			/* Abort IO */
+	CSIO_SCSIE_ABORTED,			/* IO Aborted */
+	CSIO_SCSIE_CLOSE,			/* Close exchange */
+	CSIO_SCSIE_CLOSED,			/* Exchange closed */
+	CSIO_SCSIE_DRVCLEANUP,			/* Driver wants to manually
+						 * cleanup this I/O.
+						 */
+};
+
+enum csio_scsi_lev {
+	CSIO_LEV_ALL = 1,
+	CSIO_LEV_LNODE,
+	CSIO_LEV_RNODE,
+	CSIO_LEV_LUN,
+};
+
+struct csio_scsi_level_data {
+	enum csio_scsi_lev	level;
+	struct csio_rnode	*rnode;
+	struct csio_lnode	*lnode;
+	uint64_t		oslun;
+};
+
+static inline struct csio_ioreq *
+csio_get_scsi_ioreq(struct csio_scsim *scm)
+{
+	struct csio_sm *req;
+
+	if (likely(!list_empty(&scm->ioreq_freelist))) {
+		req = list_first_entry(&scm->ioreq_freelist,
+				       struct csio_sm, sm_list);
+		list_del_init(&req->sm_list);
+		CSIO_DEC_STATS(scm, n_free_ioreq);
+		return (struct csio_ioreq *)req;
+	} else
+		return NULL;
+}
+
+static inline void
+csio_put_scsi_ioreq(struct csio_scsim *scm, struct csio_ioreq *ioreq)
+{
+	list_add_tail(&ioreq->sm.sm_list, &scm->ioreq_freelist);
+	CSIO_INC_STATS(scm, n_free_ioreq);
+}
+
+static inline void
+csio_put_scsi_ioreq_list(struct csio_scsim *scm, struct list_head *reqlist,
+			 int n)
+{
+	list_splice_init(reqlist, &scm->ioreq_freelist);
+	scm->stats.n_free_ioreq += n;
+}
+
+static inline struct csio_dma_buf *
+csio_get_scsi_ddp(struct csio_scsim *scm)
+{
+	struct csio_dma_buf *ddp;
+
+	if (likely(!list_empty(&scm->ddp_freelist))) {
+		ddp = list_first_entry(&scm->ddp_freelist,
+				       struct csio_dma_buf, list);
+		list_del_init(&ddp->list);
+		CSIO_DEC_STATS(scm, n_free_ddp);
+		return ddp;
+	} else
+		return NULL;
+}
+
+static inline void
+csio_put_scsi_ddp(struct csio_scsim *scm, struct csio_dma_buf *ddp)
+{
+	list_add_tail(&ddp->list, &scm->ddp_freelist);
+	CSIO_INC_STATS(scm, n_free_ddp);
+}
+
+static inline void
+csio_put_scsi_ddp_list(struct csio_scsim *scm, struct list_head *reqlist,
+			 int n)
+{
+	list_splice_tail_init(reqlist, &scm->ddp_freelist);
+	scm->stats.n_free_ddp += n;
+}
+
+static inline void
+csio_scsi_completed(struct csio_ioreq *ioreq, struct list_head *cbfn_q)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_COMPLETED);
+	if (csio_list_deleted(&ioreq->sm.sm_list))
+		list_add_tail(&ioreq->sm.sm_list, cbfn_q);
+}
+
+static inline void
+csio_scsi_aborted(struct csio_ioreq *ioreq, struct list_head *cbfn_q)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_ABORTED);
+	list_add_tail(&ioreq->sm.sm_list, cbfn_q);
+}
+
+static inline void
+csio_scsi_closed(struct csio_ioreq *ioreq, struct list_head *cbfn_q)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_CLOSED);
+	list_add_tail(&ioreq->sm.sm_list, cbfn_q);
+}
+
+static inline void
+csio_scsi_drvcleanup(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_DRVCLEANUP);
+}
+
+/*
+ * csio_scsi_start_io - Kick starts the IO SM.
+ * @req: io request SM.
+ *
+ * needs to be called with lock held.
+ */
+static inline int
+csio_scsi_start_io(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_START_IO);
+	return ioreq->drv_status;
+}
+
+/*
+ * csio_scsi_start_tm - Kicks off the Task management IO SM.
+ * @req: io request SM.
+ *
+ * needs to be called with lock held.
+ */
+static inline int
+csio_scsi_start_tm(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_START_TM);
+	return ioreq->drv_status;
+}
+
+/*
+ * csio_scsi_abort - Abort an IO request
+ * @req: io request SM.
+ *
+ * needs to be called with lock held.
+ */
+static inline int
+csio_scsi_abort(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_ABORT);
+	return ioreq->drv_status;
+}
+
+/*
+ * csio_scsi_close - Close an IO request
+ * @req: io request SM.
+ *
+ * needs to be called with lock held.
+ */
+static inline int
+csio_scsi_close(struct csio_ioreq *ioreq)
+{
+	csio_post_event(&ioreq->sm, CSIO_SCSIE_CLOSE);
+	return ioreq->drv_status;
+}
+
+void csio_scsi_cleanup_io_q(struct csio_scsim *, struct list_head *);
+int csio_scsim_cleanup_io(struct csio_scsim *, bool abort);
+int csio_scsim_cleanup_io_lnode(struct csio_scsim *,
+					  struct csio_lnode *);
+struct csio_ioreq *csio_scsi_cmpl_handler(struct csio_hw *, void *, uint32_t,
+					  struct csio_fl_dma_buf *,
+					  void *, uint8_t **);
+int csio_scsi_qconfig(struct csio_hw *);
+int csio_scsim_init(struct csio_scsim *, struct csio_hw *);
+void csio_scsim_exit(struct csio_scsim *);
+
+#endif /* __CSIO_SCSI_H__ */
diff --git a/drivers/scsi/csiostor/csio_wr.h b/drivers/scsi/csiostor/csio_wr.h
new file mode 100644
index 0000000..8d30e7a
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_wr.h
@@ -0,0 +1,512 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_WR_H__
+#define __CSIO_WR_H__
+
+#include <linux/cache.h>
+
+#include "csio_defs.h"
+#include "t4fw_api.h"
+#include "t4fw_api_stor.h"
+
+/*
+ * SGE register field values.
+ */
+#define X_INGPCIEBOUNDARY_32B		0
+#define X_INGPCIEBOUNDARY_64B		1
+#define X_INGPCIEBOUNDARY_128B		2
+#define X_INGPCIEBOUNDARY_256B		3
+#define X_INGPCIEBOUNDARY_512B		4
+#define X_INGPCIEBOUNDARY_1024B		5
+#define X_INGPCIEBOUNDARY_2048B		6
+#define X_INGPCIEBOUNDARY_4096B		7
+
+/* GTS register */
+#define X_TIMERREG_COUNTER0		0
+#define X_TIMERREG_COUNTER1		1
+#define X_TIMERREG_COUNTER2		2
+#define X_TIMERREG_COUNTER3		3
+#define X_TIMERREG_COUNTER4		4
+#define X_TIMERREG_COUNTER5		5
+#define X_TIMERREG_RESTART_COUNTER	6
+#define X_TIMERREG_UPDATE_CIDX		7
+
+/*
+ * Egress Context field values
+ */
+#define X_FETCHBURSTMIN_16B		0
+#define X_FETCHBURSTMIN_32B		1
+#define X_FETCHBURSTMIN_64B		2
+#define X_FETCHBURSTMIN_128B		3
+
+#define X_FETCHBURSTMAX_64B		0
+#define X_FETCHBURSTMAX_128B		1
+#define X_FETCHBURSTMAX_256B		2
+#define X_FETCHBURSTMAX_512B		3
+
+#define X_HOSTFCMODE_NONE		0
+#define X_HOSTFCMODE_INGRESS_QUEUE	1
+#define X_HOSTFCMODE_STATUS_PAGE	2
+#define X_HOSTFCMODE_BOTH		3
+
+/*
+ * Ingress Context field values
+ */
+#define X_UPDATESCHEDULING_TIMER	0
+#define X_UPDATESCHEDULING_COUNTER_OPTTIMER	1
+
+#define X_UPDATEDELIVERY_NONE		0
+#define X_UPDATEDELIVERY_INTERRUPT	1
+#define X_UPDATEDELIVERY_STATUS_PAGE	2
+#define X_UPDATEDELIVERY_BOTH		3
+
+#define X_INTERRUPTDESTINATION_PCIE	0
+#define X_INTERRUPTDESTINATION_IQ	1
+
+#define X_RSPD_TYPE_FLBUF		0
+#define X_RSPD_TYPE_CPL			1
+#define X_RSPD_TYPE_INTR		2
+
+/* WR status is at the same position as retval in a CMD header */
+#define csio_wr_status(_wr)		\
+		(FW_CMD_RETVAL_GET(ntohl(((struct fw_cmd_hdr *)(_wr))->lo)))
+
+struct csio_hw;
+
+extern int csio_intr_coalesce_cnt;
+extern int csio_intr_coalesce_time;
+
+/* Ingress queue params */
+struct csio_iq_params {
+
+	uint8_t		iq_start:1;
+	uint8_t		iq_stop:1;
+	uint8_t		pfn:3;
+
+	uint8_t		vfn;
+
+	uint16_t	physiqid;
+	uint16_t	iqid;
+
+	uint16_t	fl0id;
+	uint16_t	fl1id;
+
+	uint8_t		viid;
+
+	uint8_t		type;
+	uint8_t		iqasynch;
+	uint8_t		reserved4;
+
+	uint8_t		iqandst;
+	uint8_t		iqanus;
+	uint8_t		iqanud;
+
+	uint16_t	iqandstindex;
+
+	uint8_t		iqdroprss;
+	uint8_t		iqpciech;
+	uint8_t		iqdcaen;
+
+	uint8_t		iqdcacpu;
+	uint8_t		iqintcntthresh;
+	uint8_t		iqo;
+
+	uint8_t		iqcprio;
+	uint8_t		iqesize;
+
+	uint16_t	iqsize;
+
+	uint64_t	iqaddr;
+
+	uint8_t		iqflintiqhsen;
+	uint8_t		reserved5;
+	uint8_t		iqflintcongen;
+	uint8_t		iqflintcngchmap;
+
+	uint32_t	reserved6;
+
+	uint8_t		fl0hostfcmode;
+	uint8_t		fl0cprio;
+	uint8_t		fl0paden;
+	uint8_t		fl0packen;
+	uint8_t		fl0congen;
+	uint8_t		fl0dcaen;
+
+	uint8_t		fl0dcacpu;
+	uint8_t		fl0fbmin;
+
+	uint8_t		fl0fbmax;
+	uint8_t		fl0cidxfthresho;
+	uint8_t		fl0cidxfthresh;
+
+	uint16_t	fl0size;
+
+	uint64_t	fl0addr;
+
+	uint64_t	reserved7;
+
+	uint8_t		fl1hostfcmode;
+	uint8_t		fl1cprio;
+	uint8_t		fl1paden;
+	uint8_t		fl1packen;
+	uint8_t		fl1congen;
+	uint8_t		fl1dcaen;
+
+	uint8_t		fl1dcacpu;
+	uint8_t		fl1fbmin;
+
+	uint8_t		fl1fbmax;
+	uint8_t		fl1cidxfthresho;
+	uint8_t		fl1cidxfthresh;
+
+	uint16_t	fl1size;
+
+	uint64_t	fl1addr;
+};
+
+/* Egress queue params */
+struct csio_eq_params {
+
+	uint8_t		pfn;
+	uint8_t		vfn;
+
+	uint8_t		eqstart:1;
+	uint8_t		eqstop:1;
+
+	uint16_t        physeqid;
+	uint32_t	eqid;
+
+	uint8_t		hostfcmode:2;
+	uint8_t		cprio:1;
+	uint8_t		pciechn:3;
+
+	uint16_t	iqid;
+
+	uint8_t		dcaen:1;
+	uint8_t		dcacpu:5;
+
+	uint8_t		fbmin:3;
+	uint8_t		fbmax:3;
+
+	uint8_t		cidxfthresho:1;
+	uint8_t		cidxfthresh:3;
+
+	uint16_t	eqsize;
+
+	uint64_t	eqaddr;
+};
+
+struct csio_dma_buf {
+	struct list_head	list;
+	void			*vaddr;		/* Virtual address */
+	dma_addr_t		paddr;		/* Physical address */
+	uint32_t		len;		/* Buffer size */
+};
+
+/* Generic I/O request structure */
+struct csio_ioreq {
+	struct csio_sm		sm;		/* SM, List
+						 * should be the first member
+						 */
+	int			iq_idx;		/* Ingress queue index */
+	int			eq_idx;		/* Egress queue index */
+	uint32_t		nsge;		/* Number of SG elements */
+	uint32_t		tmo;		/* Driver timeout */
+	uint32_t		datadir;	/* Data direction */
+	struct csio_dma_buf	dma_buf;	/* Req/resp DMA buffers */
+	uint16_t		wr_status;	/* WR completion status */
+	int16_t			drv_status;	/* Driver internal status */
+	struct csio_lnode	*lnode;		/* Owner lnode */
+	struct csio_rnode	*rnode;		/* Src/destination rnode */
+	void (*io_cbfn) (struct csio_hw *, struct csio_ioreq *);
+						/* completion callback */
+	void			*scratch1;	/* Scratch area 1.
+						 */
+	void			*scratch2;	/* Scratch area 2. */
+	struct list_head	gen_list;	/* Any list associated with
+						 * this ioreq.
+						 */
+	uint64_t		fw_handle;	/* Unique handle passed
+						 * to FW
+						 */
+	uint8_t			dcopy;		/* Data copy required */
+	uint8_t			reserved1;
+	uint16_t		reserved2;
+	struct completion	cmplobj;	/* ioreq completion object */
+} ____cacheline_aligned_in_smp;
+
+/*
+ * Egress status page for egress cidx updates
+ */
+struct csio_qstatus_page {
+	__be32 qid;
+	__be16 cidx;
+	__be16 pidx;
+};
+
+
+enum {
+	CSIO_MAX_FLBUF_PER_IQWR = 4,
+	CSIO_QCREDIT_SZ  = 64,			/* pidx/cidx increments
+						 * in bytes
+						 */
+	CSIO_MAX_QID = 0xFFFF,
+	CSIO_MAX_IQ = 128,
+
+	CSIO_SGE_NTIMERS = 6,
+	CSIO_SGE_NCOUNTERS = 4,
+	CSIO_SGE_FL_SIZE_REGS = 16,
+};
+
+/* Defines for type */
+enum {
+	CSIO_EGRESS	= 1,
+	CSIO_INGRESS	= 2,
+	CSIO_FREELIST	= 3,
+};
+
+/*
+ * Structure for footer (last 2 flits) of Ingress Queue Entry.
+ */
+struct csio_iqwr_footer {
+	__be32			hdrbuflen_pidx;
+	__be32			pldbuflen_qid;
+	union {
+		u8		type_gen;
+		__be64		last_flit;
+	} u;
+};
+
+#define IQWRF_NEWBUF		(1 << 31)
+#define IQWRF_LEN_GET(x)	(((x) >> 0) & 0x7fffffffU)
+#define IQWRF_GEN_SHIFT		7
+#define IQWRF_TYPE_GET(x)	(((x) >> 4) & 0x3U)
+
+
+/*
+ * WR pair:
+ * ========
+ * A WR can start towards the end of a queue, and then continue at the
+ * beginning, since the queue is considered to be circular. This will
+ * require a pair of address/len to be passed back to the caller -
+ * hence the Work request pair structure.
+ */
+struct csio_wr_pair {
+	void			*addr1;
+	uint32_t		size1;
+	void			*addr2;
+	uint32_t		size2;
+};
+
+/*
+ * The following structure is used by ingress processing to return the
+ * free list buffers to consumers.
+ */
+struct csio_fl_dma_buf {
+	struct csio_dma_buf	flbufs[CSIO_MAX_FLBUF_PER_IQWR];
+						/* Freelist DMA buffers */
+	int			offset;		/* Offset within the
+						 * first FL buf.
+						 */
+	uint32_t		totlen;		/* Total length */
+	uint8_t			defer_free;	/* Free of buffer can
+						 * deferred
+						 */
+};
+
+/* Data-types */
+typedef void (*iq_handler_t)(struct csio_hw *, void *, uint32_t,
+			     struct csio_fl_dma_buf *, void *);
+
+struct csio_iq {
+	uint16_t		iqid;		/* Queue ID */
+	uint16_t		physiqid;	/* Physical Queue ID */
+	uint16_t		genbit;		/* Generation bit,
+						 * initially set to 1
+						 */
+	int			flq_idx;	/* Freelist queue index */
+	iq_handler_t		iq_intx_handler; /* IQ INTx handler routine */
+};
+
+struct csio_eq {
+	uint16_t		eqid;		/* Qid */
+	uint16_t		physeqid;	/* Physical Queue ID */
+	uint8_t			wrap[512];	/* Temp area for q-wrap around*/
+};
+
+struct csio_fl {
+	uint16_t		flid;		/* Qid */
+	uint16_t		packen;		/* Packing enabled? */
+	int			offset;		/* Offset within FL buf */
+	int			sreg;		/* Size register */
+	struct csio_dma_buf	*bufs;		/* Free list buffer ptr array
+						 * indexed using flq->cidx/pidx
+						 */
+};
+
+struct csio_qstats {
+	uint32_t	n_tot_reqs;		/* Total no. of Requests */
+	uint32_t	n_tot_rsps;		/* Total no. of responses */
+	uint32_t	n_qwrap;		/* Queue wraps */
+	uint32_t	n_eq_wr_split;		/* Number of split EQ WRs */
+	uint32_t	n_qentry;		/* Queue entry */
+	uint32_t	n_qempty;		/* Queue empty */
+	uint32_t	n_qfull;		/* Queue fulls */
+	uint32_t	n_rsp_unknown;		/* Unknown response type */
+	uint32_t	n_stray_comp;		/* Stray completion intr */
+	uint32_t	n_flq_refill;		/* Number of FL refills */
+};
+
+/* Queue metadata */
+struct csio_q {
+	uint16_t		type;		/* Type: Ingress/Egress/FL */
+	uint16_t		pidx;		/* producer index */
+	uint16_t		cidx;		/* consumer index */
+	uint16_t		inc_idx;	/* Incremental index */
+	uint32_t		wr_sz;		/* Size of all WRs in this q
+						 * if fixed
+						 */
+	void			*vstart;	/* Base virtual address
+						 * of queue
+						 */
+	void			*vwrap;		/* Virtual end address to
+						 * wrap around at
+						 */
+	uint32_t		credits;	/* Size of queue in credits */
+	void			*owner;		/* Owner */
+	union {					/* Queue contexts */
+		struct csio_iq	iq;
+		struct csio_eq	eq;
+		struct csio_fl	fl;
+	} un;
+
+	dma_addr_t		pstart;		/* Base physical address of
+						 * queue
+						 */
+	uint32_t		portid;		/* PCIE Channel */
+	uint32_t		size;		/* Size of queue in bytes */
+	struct csio_qstats	stats;		/* Statistics */
+} ____cacheline_aligned_in_smp;
+
+struct csio_sge {
+	uint32_t	csio_fl_align;		/* Calculated and cached
+						 * for fast path
+						 */
+	uint32_t	sge_control;		/* padding, boundaries,
+						 * lengths, etc.
+						 */
+	uint32_t	sge_host_page_size;	/* Host page size */
+	uint32_t	sge_fl_buf_size[CSIO_SGE_FL_SIZE_REGS];
+						/* free list buffer sizes */
+	uint16_t	timer_val[CSIO_SGE_NTIMERS];
+	uint8_t		counter_val[CSIO_SGE_NCOUNTERS];
+};
+
+/* Work request module */
+struct csio_wrm {
+	int			num_q;		/* Number of queues */
+	struct csio_q		**q_arr;	/* Array of queue pointers
+						 * allocated dynamically
+						 * based on configured values
+						 */
+	uint32_t		fw_iq_start;	/* Start ID of IQ for this fn*/
+	uint32_t		fw_eq_start;	/* Start ID of EQ for this fn*/
+	struct csio_q		*intr_map[CSIO_MAX_IQ];
+						/* IQ-id to IQ map table. */
+	int			free_qidx;	/* queue idx of free queue */
+	struct csio_sge		sge;		/* SGE params */
+};
+
+#define csio_get_q(__hw, __idx)		((__hw)->wrm.q_arr[__idx])
+#define	csio_q_type(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->type)
+#define	csio_q_pidx(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->pidx)
+#define	csio_q_cidx(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->cidx)
+#define	csio_q_inc_idx(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->inc_idx)
+#define	csio_q_vstart(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->vstart)
+#define	csio_q_pstart(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->pstart)
+#define	csio_q_size(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->size)
+#define	csio_q_credits(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->credits)
+#define	csio_q_portid(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->portid)
+#define	csio_q_wr_sz(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->wr_sz)
+#define	csio_q_iqid(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->un.iq.iqid)
+#define csio_q_physiqid(__hw, __idx)					\
+				((__hw)->wrm.q_arr[(__idx)]->un.iq.physiqid)
+#define csio_q_iq_flq_idx(__hw, __idx)					\
+				((__hw)->wrm.q_arr[(__idx)]->un.iq.flq_idx)
+#define	csio_q_eqid(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->un.eq.eqid)
+#define	csio_q_flid(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->un.fl.flid)
+
+#define csio_q_physeqid(__hw, __idx)					\
+				((__hw)->wrm.q_arr[(__idx)]->un.eq.physeqid)
+#define csio_iq_has_fl(__iq)		((__iq)->un.iq.flq_idx != -1)
+
+#define csio_q_iq_to_flid(__hw, __iq_idx)				\
+	csio_q_flid((__hw), (__hw)->wrm.q_arr[(__iq_qidx)]->un.iq.flq_idx)
+#define csio_q_set_intr_map(__hw, __iq_idx, __rel_iq_id)		\
+		(__hw)->wrm.intr_map[__rel_iq_id] = csio_get_q(__hw, __iq_idx)
+#define	csio_q_eq_wrap(__hw, __idx)	((__hw)->wrm.q_arr[(__idx)]->un.eq.wrap)
+
+struct csio_mb;
+
+int csio_wr_alloc_q(struct csio_hw *, uint32_t, uint32_t,
+		    uint16_t, void *, uint32_t, int, iq_handler_t);
+int csio_wr_iq_create(struct csio_hw *, void *, int,
+				uint32_t, uint8_t, bool,
+				void (*)(struct csio_hw *, struct csio_mb *));
+int csio_wr_eq_create(struct csio_hw *, void *, int, int, uint8_t,
+				void (*)(struct csio_hw *, struct csio_mb *));
+int csio_wr_destroy_queues(struct csio_hw *, bool cmd);
+
+
+int csio_wr_get(struct csio_hw *, int, uint32_t,
+			  struct csio_wr_pair *);
+void csio_wr_copy_to_wrp(void *, struct csio_wr_pair *, uint32_t, uint32_t);
+int csio_wr_issue(struct csio_hw *, int, bool);
+int csio_wr_process_iq(struct csio_hw *, struct csio_q *,
+				 void (*)(struct csio_hw *, void *,
+					  uint32_t, struct csio_fl_dma_buf *,
+					  void *),
+				 void *);
+int csio_wr_process_iq_idx(struct csio_hw *, int,
+				 void (*)(struct csio_hw *, void *,
+					  uint32_t, struct csio_fl_dma_buf *,
+					  void *),
+				 void *);
+
+void csio_wr_sge_init(struct csio_hw *);
+int csio_wrm_init(struct csio_wrm *, struct csio_hw *);
+void csio_wrm_exit(struct csio_wrm *, struct csio_hw *);
+
+#endif /* ifndef __CSIO_WR_H__ */
-- 
1.7.1


^ permalink raw reply related

* [V4 PATCH 2/8] csiostor: Chelsio FCoE offload driver submission (headers part 1).
From: Naresh Kumar Inna @ 2012-09-12 17:18 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan
In-Reply-To: <1347470328-32490-1-git-send-email-naresh@chelsio.com>

This patch contains the first set of the header files for csiostor driver.

Signed-off-by: Naresh Kumar Inna <naresh@chelsio.com>
---
V2:
- Removed csio_fcoe_proto.h, using defines from include/scsi/fc instead.
- Removed driver-specific return values, using errno values instead.
- Retained CSIO_INC_STATS, since it is useful in multiple places and
  the name of the structure has been standardized to make use of this macro.
- Removed csio_deq_from_head(), replaced it inline with calls from list.h.
- Removed csio_deq_from_tail().
- Replaced state machine macros with static functions.
- Capitalizing macros with CPP keys.

V3:
- Replaced CSIO_ROUNDUP with DIV_ROUND_UP.
- Use BUG_ON in CSIO_ASSERT macro.

 drivers/scsi/csiostor/csio_defs.h     |  108 ++++++
 drivers/scsi/csiostor/csio_hw.h       |  666 +++++++++++++++++++++++++++++++++
 drivers/scsi/csiostor/csio_init.h     |  158 ++++++++
 drivers/scsi/csiostor/t4fw_api_stor.h |  578 ++++++++++++++++++++++++++++
 4 files changed, 1510 insertions(+), 0 deletions(-)
 create mode 100644 drivers/scsi/csiostor/csio_defs.h
 create mode 100644 drivers/scsi/csiostor/csio_hw.h
 create mode 100644 drivers/scsi/csiostor/csio_init.h
 create mode 100644 drivers/scsi/csiostor/t4fw_api_stor.h

diff --git a/drivers/scsi/csiostor/csio_defs.h b/drivers/scsi/csiostor/csio_defs.h
new file mode 100644
index 0000000..80be354
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_defs.h
@@ -0,0 +1,108 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_DEFS_H__
+#define __CSIO_DEFS_H__
+
+#include <linux/kernel.h>
+#include <linux/stddef.h>
+#include <linux/timer.h>
+#include <linux/list.h>
+#include <linux/bug.h>
+#include <linux/pci.h>
+#include <linux/jiffies.h>
+
+#define CSIO_INVALID_IDX		0xFFFFFFFF
+#define CSIO_INC_STATS(elem, val)	((elem)->stats.val++)
+#define CSIO_DEC_STATS(elem, val)	((elem)->stats.val--)
+#define CSIO_VALID_WWN(__n)		((*__n >> 4) == 0x5 ? true : false)
+#define CSIO_DID_MASK			0xFFFFFF
+#define CSIO_WORD_TO_BYTE		4
+
+static inline int
+csio_list_deleted(struct list_head *list)
+{
+	return ((list->next == list) && (list->prev == list));
+}
+
+#define csio_list_next(elem)	(((struct list_head *)(elem))->next)
+#define csio_list_prev(elem)	(((struct list_head *)(elem))->prev)
+
+/* State machine */
+typedef void (*csio_sm_state_t)(void *, uint32_t);
+
+struct csio_sm {
+	struct list_head	sm_list;
+	csio_sm_state_t		sm_state;
+};
+
+static inline void
+csio_set_state(void *smp, void *state)
+{
+	((struct csio_sm *)smp)->sm_state = (csio_sm_state_t)state;
+}
+
+static inline void
+csio_init_state(struct csio_sm *smp, void *state)
+{
+	csio_set_state(smp, state);
+}
+
+static inline void
+csio_post_event(void *smp, uint32_t evt)
+{
+	((struct csio_sm *)smp)->sm_state(smp, evt);
+}
+
+static inline csio_sm_state_t
+csio_get_state(void *smp)
+{
+	return ((struct csio_sm *)smp)->sm_state;
+}
+
+static inline bool
+csio_match_state(void *smp, void *state)
+{
+	return (csio_get_state(smp) == (csio_sm_state_t)state);
+}
+
+#define	CSIO_ASSERT(cond)		BUG_ON(!(cond))
+
+#ifdef __CSIO_DEBUG__
+#define CSIO_DB_ASSERT(__c)		CSIO_ASSERT((__c))
+#else
+#define CSIO_DB_ASSERT(__c)
+#endif
+
+#endif /* ifndef __CSIO_DEFS_H__ */
diff --git a/drivers/scsi/csiostor/csio_hw.h b/drivers/scsi/csiostor/csio_hw.h
new file mode 100644
index 0000000..a58fdc9
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_hw.h
@@ -0,0 +1,666 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_HW_H__
+#define __CSIO_HW_H__
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/workqueue.h>
+#include <linux/compiler.h>
+#include <linux/cdev.h>
+#include <linux/list.h>
+#include <linux/mempool.h>
+#include <linux/io.h>
+#include <linux/spinlock_types.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include "csio_wr.h"
+#include "csio_mb.h"
+#include "csio_scsi.h"
+#include "csio_defs.h"
+#include "t4_regs.h"
+#include "t4_msg.h"
+
+/*
+ * An error value used by host. Should not clash with FW defined return values.
+ */
+#define	FW_HOSTERROR			255
+
+#define CSIO_FW_FNAME		"cxgb4/t4fw.bin"
+#define CSIO_CF_FNAME		"cxgb4/t4-config.txt"
+
+#define FW_VERSION_MAJOR	1
+#define FW_VERSION_MINOR	2
+#define FW_VERSION_MICRO	8
+
+#define CSIO_HW_NAME		"Chelsio FCoE Adapter"
+#define CSIO_MAX_PFN		8
+#define CSIO_MAX_PPORTS		4
+
+#define CSIO_MAX_LUN		0xFFFF
+#define CSIO_MAX_QUEUE		2048
+#define CSIO_MAX_CMD_PER_LUN	32
+#define CSIO_MAX_DDP_BUF_SIZE	(1024 * 1024)
+#define CSIO_MAX_SECTOR_SIZE	128
+
+/* Interrupts */
+#define CSIO_EXTRA_MSI_IQS	2	/* Extra iqs for INTX/MSI mode
+					 * (Forward intr iq + fw iq) */
+#define CSIO_EXTRA_VECS		2	/* non-data + FW evt */
+#define CSIO_MAX_SCSI_CPU	128
+#define CSIO_MAX_SCSI_QSETS	(CSIO_MAX_SCSI_CPU * CSIO_MAX_PPORTS)
+#define CSIO_MAX_MSIX_VECS	(CSIO_MAX_SCSI_QSETS + CSIO_EXTRA_VECS)
+
+/* Queues */
+enum {
+	CSIO_INTR_WRSIZE = 128,
+	CSIO_INTR_IQSIZE = ((CSIO_MAX_MSIX_VECS + 1) * CSIO_INTR_WRSIZE),
+	CSIO_FWEVT_WRSIZE = 128,
+	CSIO_FWEVT_IQLEN = 128,
+	CSIO_FWEVT_FLBUFS = 64,
+	CSIO_FWEVT_IQSIZE = (CSIO_FWEVT_WRSIZE * CSIO_FWEVT_IQLEN),
+	CSIO_HW_NIQ = 1,
+	CSIO_HW_NFLQ = 1,
+	CSIO_HW_NEQ = 1,
+	CSIO_HW_NINTXQ = 1,
+};
+
+struct csio_msix_entries {
+	unsigned short	vector;		/* Vector assigned by pci_enable_msix */
+	void		*dev_id;	/* Priv object associated w/ this msix*/
+	char		desc[24];	/* Description of this vector */
+};
+
+struct csio_scsi_qset {
+	int		iq_idx;		/* Ingress index */
+	int		eq_idx;		/* Egress index */
+	uint32_t	intr_idx;	/* MSIX Vector index */
+};
+
+struct csio_scsi_cpu_info {
+	int16_t	max_cpus;
+};
+
+extern int csio_dbg_level;
+extern int csio_force_master;
+extern unsigned int csio_port_mask;
+extern int csio_msi;
+
+#define CSIO_VENDOR_ID				0x1425
+#define CSIO_ASIC_DEVID_PROTO_MASK		0xFF00
+#define CSIO_ASIC_DEVID_TYPE_MASK		0x00FF
+#define CSIO_FPGA				0xA000
+#define CSIO_T4_FCOE_ASIC			0x4600
+
+#define CSIO_GLBL_INTR_MASK		(CIM | MPS | PL | PCIE | MC | EDC0 | \
+					 EDC1 | LE | TP | MA | PM_TX | PM_RX | \
+					 ULP_RX | CPL_SWITCH | SGE | \
+					 ULP_TX | SF)
+
+/*
+ * Hard parameters used to initialize the card in the absence of a
+ * configuration file.
+ */
+enum {
+	/* General */
+	CSIO_SGE_DBFIFO_INT_THRESH	= 10,
+
+	CSIO_SGE_RX_DMA_OFFSET		= 2,
+
+	CSIO_SGE_FLBUF_SIZE1		= 65536,
+	CSIO_SGE_FLBUF_SIZE2		= 1536,
+	CSIO_SGE_FLBUF_SIZE3		= 9024,
+	CSIO_SGE_FLBUF_SIZE4		= 9216,
+	CSIO_SGE_FLBUF_SIZE5		= 2048,
+	CSIO_SGE_FLBUF_SIZE6		= 128,
+	CSIO_SGE_FLBUF_SIZE7		= 8192,
+	CSIO_SGE_FLBUF_SIZE8		= 16384,
+
+	CSIO_SGE_TIMER_VAL_0		= 5,
+	CSIO_SGE_TIMER_VAL_1		= 10,
+	CSIO_SGE_TIMER_VAL_2		= 20,
+	CSIO_SGE_TIMER_VAL_3		= 50,
+	CSIO_SGE_TIMER_VAL_4		= 100,
+	CSIO_SGE_TIMER_VAL_5		= 200,
+
+	CSIO_SGE_INT_CNT_VAL_0		= 1,
+	CSIO_SGE_INT_CNT_VAL_1		= 4,
+	CSIO_SGE_INT_CNT_VAL_2		= 8,
+	CSIO_SGE_INT_CNT_VAL_3		= 16,
+
+	/* Storage specific - used by FW_PFVF_CMD */
+	CSIO_WX_CAPS			= FW_CMD_CAP_PF, /* w/x all */
+	CSIO_R_CAPS			= FW_CMD_CAP_PF, /* r all */
+	CSIO_NVI			= 4,
+	CSIO_NIQ_FLINT			= 34,
+	CSIO_NETH_CTRL			= 32,
+	CSIO_NEQ			= 66,
+	CSIO_NEXACTF			= 32,
+	CSIO_CMASK			= FW_PFVF_CMD_CMASK_MASK,
+	CSIO_PMASK			= FW_PFVF_CMD_PMASK_MASK,
+};
+
+/* Slowpath events */
+enum csio_evt {
+	CSIO_EVT_FW  = 0,	/* FW event */
+	CSIO_EVT_MBX,		/* MBX event */
+	CSIO_EVT_SCN,		/* State change notification */
+	CSIO_EVT_DEV_LOSS,	/* Device loss event */
+	CSIO_EVT_MAX,		/* Max supported event */
+};
+
+#define CSIO_EVT_MSG_SIZE	512
+#define CSIO_EVTQ_SIZE		512
+
+/* Event msg  */
+struct csio_evt_msg {
+	struct list_head	list;	/* evt queue*/
+	enum csio_evt		type;
+	uint8_t			data[CSIO_EVT_MSG_SIZE];
+};
+
+enum {
+	EEPROMVSIZE    = 32768, /* Serial EEPROM virtual address space size */
+	SERNUM_LEN     = 16,    /* Serial # length */
+	EC_LEN         = 16,    /* E/C length */
+	ID_LEN         = 16,    /* ID length */
+	TRACE_LEN      = 112,   /* length of trace data and mask */
+};
+
+enum {
+	SF_PAGE_SIZE = 256,           /* serial flash page size */
+	SF_SEC_SIZE = 64 * 1024,      /* serial flash sector size */
+	SF_SIZE = SF_SEC_SIZE * 16,   /* serial flash size */
+};
+
+enum { MEM_EDC0, MEM_EDC1, MEM_MC };
+
+enum {
+	MEMWIN0_APERTURE = 2048,
+	MEMWIN0_BASE     = 0x1b800,
+	MEMWIN1_APERTURE = 32768,
+	MEMWIN1_BASE     = 0x28000,
+	MEMWIN2_APERTURE = 65536,
+	MEMWIN2_BASE     = 0x30000,
+};
+
+/* serial flash and firmware constants */
+enum {
+	SF_ATTEMPTS = 10,             /* max retries for SF operations */
+
+	/* flash command opcodes */
+	SF_PROG_PAGE    = 2,          /* program page */
+	SF_WR_DISABLE   = 4,          /* disable writes */
+	SF_RD_STATUS    = 5,          /* read status register */
+	SF_WR_ENABLE    = 6,          /* enable writes */
+	SF_RD_DATA_FAST = 0xb,        /* read flash */
+	SF_RD_ID	= 0x9f,	      /* read ID */
+	SF_ERASE_SECTOR = 0xd8,       /* erase sector */
+
+	FW_START_SEC = 8,             /* first flash sector for FW */
+	FW_END_SEC = 15,              /* last flash sector for FW */
+	FW_IMG_START = FW_START_SEC * SF_SEC_SIZE,
+	FW_MAX_SIZE = (FW_END_SEC - FW_START_SEC + 1) * SF_SEC_SIZE,
+
+	FLASH_CFG_MAX_SIZE    = 0x10000 , /* max size of the flash config file*/
+	FLASH_CFG_OFFSET      = 0x1f0000,
+	FLASH_CFG_START_SEC   = FLASH_CFG_OFFSET / SF_SEC_SIZE,
+	FPGA_FLASH_CFG_OFFSET = 0xf0000 , /* if FPGA mode, then cfg file is
+					   * at 1MB - 64KB */
+	FPGA_FLASH_CFG_START_SEC  = FPGA_FLASH_CFG_OFFSET / SF_SEC_SIZE,
+};
+
+/*
+ * Flash layout.
+ */
+#define FLASH_START(start)	((start) * SF_SEC_SIZE)
+#define FLASH_MAX_SIZE(nsecs)	((nsecs) * SF_SEC_SIZE)
+
+enum {
+	/*
+	 * Location of firmware image in FLASH.
+	 */
+	FLASH_FW_START_SEC = 8,
+	FLASH_FW_NSECS = 8,
+	FLASH_FW_START = FLASH_START(FLASH_FW_START_SEC),
+	FLASH_FW_MAX_SIZE = FLASH_MAX_SIZE(FLASH_FW_NSECS),
+
+};
+
+#undef FLASH_START
+#undef FLASH_MAX_SIZE
+
+/* Management module */
+enum {
+	CSIO_MGMT_EQ_WRSIZE = 512,
+	CSIO_MGMT_IQ_WRSIZE = 128,
+	CSIO_MGMT_EQLEN = 64,
+	CSIO_MGMT_IQLEN = 64,
+};
+
+#define CSIO_MGMT_EQSIZE	(CSIO_MGMT_EQLEN * CSIO_MGMT_EQ_WRSIZE)
+#define CSIO_MGMT_IQSIZE	(CSIO_MGMT_IQLEN * CSIO_MGMT_IQ_WRSIZE)
+
+/* mgmt module stats */
+struct csio_mgmtm_stats {
+	uint32_t	n_abort_req;		/* Total abort request */
+	uint32_t	n_abort_rsp;		/* Total abort response */
+	uint32_t	n_close_req;		/* Total close request */
+	uint32_t	n_close_rsp;		/* Total close response */
+	uint32_t	n_err;			/* Total Errors */
+	uint32_t	n_drop;			/* Total request dropped */
+	uint32_t	n_active;		/* Count of active_q */
+	uint32_t	n_cbfn;			/* Count of cbfn_q */
+};
+
+/* MGMT module */
+struct csio_mgmtm {
+	struct	csio_hw		*hw;		/* Pointer to HW moduel */
+	int			eq_idx;		/* Egress queue index */
+	int			iq_idx;		/* Ingress queue index */
+	int			msi_vec;	/* MSI vector */
+	struct list_head	active_q;	/* Outstanding ELS/CT */
+	struct list_head	abort_q;	/* Outstanding abort req */
+	struct list_head	cbfn_q;		/* Completion queue */
+	struct list_head	mgmt_req_freelist; /* Free poll of reqs */
+						/* ELSCT request freelist*/
+	struct timer_list	mgmt_timer;	/* MGMT timer */
+	struct csio_mgmtm_stats stats;		/* ELS/CT stats */
+};
+
+struct csio_adap_desc {
+	char model_no[16];
+	char description[32];
+};
+
+struct pci_params {
+	uint16_t   vendor_id;
+	uint16_t   device_id;
+	uint32_t   vpd_cap_addr;
+	uint16_t   speed;
+	uint8_t    width;
+};
+
+/* User configurable hw parameters */
+struct csio_hw_params {
+	uint32_t		sf_size;		/* serial flash
+							 * size in bytes
+							 */
+	uint32_t		sf_nsec;		/* # of flash sectors */
+	struct pci_params	pci;
+	uint32_t		log_level;		/* Module-level for
+							 * debug log.
+							 */
+};
+
+struct csio_vpd {
+	uint32_t cclk;
+	uint8_t ec[EC_LEN + 1];
+	uint8_t sn[SERNUM_LEN + 1];
+	uint8_t id[ID_LEN + 1];
+};
+
+struct csio_pport {
+	uint16_t	pcap;
+	uint8_t		portid;
+	uint8_t		link_status;
+	uint16_t	link_speed;
+	uint8_t		mac[6];
+	uint8_t		mod_type;
+	uint8_t		rsvd1;
+	uint8_t		rsvd2;
+	uint8_t		rsvd3;
+};
+
+/* fcoe resource information */
+struct csio_fcoe_res_info {
+	uint16_t	e_d_tov;
+	uint16_t	r_a_tov_seq;
+	uint16_t	r_a_tov_els;
+	uint16_t	r_r_tov;
+	uint32_t	max_xchgs;
+	uint32_t	max_ssns;
+	uint32_t	used_xchgs;
+	uint32_t	used_ssns;
+	uint32_t	max_fcfs;
+	uint32_t	max_vnps;
+	uint32_t	used_fcfs;
+	uint32_t	used_vnps;
+};
+
+/* HW State machine Events */
+enum csio_hw_ev {
+	CSIO_HWE_CFG = (uint32_t)1, /* Starts off the State machine */
+	CSIO_HWE_INIT,	         /* Config done, start Init      */
+	CSIO_HWE_INIT_DONE,      /* Init Mailboxes sent, HW ready */
+	CSIO_HWE_FATAL,		 /* Fatal error during initialization */
+	CSIO_HWE_PCIERR_DETECTED,/* PCI error recovery detetced */
+	CSIO_HWE_PCIERR_SLOT_RESET, /* Slot reset after PCI recoviery */
+	CSIO_HWE_PCIERR_RESUME,  /* Resume after PCI error recovery */
+	CSIO_HWE_QUIESCED,	 /* HBA quiesced */
+	CSIO_HWE_HBA_RESET,      /* HBA reset requested */
+	CSIO_HWE_HBA_RESET_DONE, /* HBA reset completed */
+	CSIO_HWE_FW_DLOAD,       /* FW download requested */
+	CSIO_HWE_PCI_REMOVE,     /* PCI de-instantiation */
+	CSIO_HWE_SUSPEND,        /* HW suspend for Online(hot) replacement */
+	CSIO_HWE_RESUME,         /* HW resume for Online(hot) replacement */
+	CSIO_HWE_MAX,		 /* Max HW event */
+};
+
+/* hw stats */
+struct csio_hw_stats {
+	uint32_t	n_evt_activeq;	/* Number of event in active Q */
+	uint32_t	n_evt_freeq;	/* Number of event in free Q */
+	uint32_t	n_evt_drop;	/* Number of event droped */
+	uint32_t	n_evt_unexp;	/* Number of unexpected events */
+	uint32_t	n_pcich_offline;/* Number of pci channel offline */
+	uint32_t	n_lnlkup_miss;  /* Number of lnode lookup miss */
+	uint32_t	n_cpl_fw6_msg;	/* Number of cpl fw6 message*/
+	uint32_t	n_cpl_fw6_pld;	/* Number of cpl fw6 payload*/
+	uint32_t	n_cpl_unexp;	/* Number of unexpected cpl */
+	uint32_t	n_mbint_unexp;	/* Number of unexpected mbox */
+					/* interrupt */
+	uint32_t	n_plint_unexp;	/* Number of unexpected PL */
+					/* interrupt */
+	uint32_t	n_plint_cnt;	/* Number of PL interrupt */
+	uint32_t	n_int_stray;	/* Number of stray interrupt */
+	uint32_t	n_err;		/* Number of hw errors */
+	uint32_t	n_err_fatal;	/* Number of fatal errors */
+	uint32_t	n_err_nomem;	/* Number of memory alloc failure */
+	uint32_t	n_err_io;	/* Number of IO failure */
+	enum csio_hw_ev	n_evt_sm[CSIO_HWE_MAX];	/* Number of sm events */
+	uint64_t	n_reset_start;  /* Start time after the reset */
+	uint32_t	rsvd1;
+};
+
+/* Defines for hw->flags */
+#define CSIO_HWF_MASTER			0x00000001	/* This is the Master
+							 * function for the
+							 * card.
+							 */
+#define	CSIO_HWF_HW_INTR_ENABLED	0x00000002	/* Are HW Interrupt
+							 * enable bit set?
+							 */
+#define	CSIO_HWF_FWEVT_PENDING		0x00000004	/* FW events pending */
+#define	CSIO_HWF_Q_MEM_ALLOCED		0x00000008	/* Queues have been
+							 * allocated memory.
+							 */
+#define	CSIO_HWF_Q_FW_ALLOCED		0x00000010	/* Queues have been
+							 * allocated in FW.
+							 */
+#define CSIO_HWF_VPD_VALID		0x00000020	/* Valid VPD copied */
+#define CSIO_HWF_DEVID_CACHED		0X00000040	/* PCI vendor & device
+							 * id cached */
+#define	CSIO_HWF_FWEVT_STOP		0x00000080	/* Stop processing
+							 * FW events
+							 */
+#define CSIO_HWF_USING_SOFT_PARAMS	0x00000100      /* Using FW config
+							 * params
+							 */
+#define	CSIO_HWF_HOST_INTR_ENABLED	0x00000200	/* Are host interrupts
+							 * enabled?
+							 */
+
+#define csio_is_hw_intr_enabled(__hw)	\
+				((__hw)->flags & CSIO_HWF_HW_INTR_ENABLED)
+#define csio_is_host_intr_enabled(__hw)	\
+				((__hw)->flags & CSIO_HWF_HOST_INTR_ENABLED)
+#define csio_is_hw_master(__hw)		((__hw)->flags & CSIO_HWF_MASTER)
+#define csio_is_valid_vpd(__hw)		((__hw)->flags & CSIO_HWF_VPD_VALID)
+#define csio_is_dev_id_cached(__hw)	((__hw)->flags & CSIO_HWF_DEVID_CACHED)
+#define csio_valid_vpd_copied(__hw)	((__hw)->flags |= CSIO_HWF_VPD_VALID)
+#define csio_dev_id_cached(__hw)	((__hw)->flags |= CSIO_HWF_DEVID_CACHED)
+
+/* Defines for intr_mode */
+enum csio_intr_mode {
+	CSIO_IM_NONE = 0,
+	CSIO_IM_INTX = 1,
+	CSIO_IM_MSI  = 2,
+	CSIO_IM_MSIX = 3,
+};
+
+/* Master HW structure: One per function */
+struct csio_hw {
+	struct csio_sm		sm;			/* State machine: should
+							 * be the 1st member.
+							 */
+	spinlock_t		lock;			/* Lock for hw */
+
+	struct csio_scsim	scsim;			/* SCSI module*/
+	struct csio_wrm		wrm;			/* Work request module*/
+	struct pci_dev		*pdev;			/* PCI device */
+
+	void __iomem		*regstart;		/* Virtual address of
+							 * register map
+							 */
+	/* SCSI queue sets */
+	uint32_t		num_sqsets;		/* Number of SCSI
+							 * queue sets */
+	uint32_t		num_scsi_msix_cpus;	/* Number of CPUs that
+							 * will be used
+							 * for ingress
+							 * processing.
+							 */
+
+	struct csio_scsi_qset	sqset[CSIO_MAX_PPORTS][CSIO_MAX_SCSI_CPU];
+	struct csio_scsi_cpu_info scsi_cpu_info[CSIO_MAX_PPORTS];
+
+	uint32_t		evtflag;		/* Event flag  */
+	uint32_t		flags;			/* HW flags */
+
+	struct csio_mgmtm	mgmtm;			/* management module */
+	struct csio_mbm		mbm;			/* Mailbox module */
+
+	/* Lnodes */
+	uint32_t		num_lns;		/* Number of lnodes */
+	struct csio_lnode	*rln;			/* Root lnode */
+	struct list_head	sln_head;		/* Sibling node list
+							 * list
+							 */
+	int			intr_iq_idx;		/* Forward interrupt
+							 * queue.
+							 */
+	int			fwevt_iq_idx;		/* FW evt queue */
+	struct work_struct	evtq_work;		/* Worker thread for
+							 * HW events.
+							 */
+	struct list_head	evt_free_q;		/* freelist of evt
+							 * elements
+							 */
+	struct list_head	evt_active_q;		/* active evt queue*/
+
+	/* board related info */
+	char			name[32];
+	char			hw_ver[16];
+	char			model_desc[32];
+	char			drv_version[32];
+	char			fwrev_str[32];
+	uint32_t		optrom_ver;
+	uint32_t		fwrev;
+	uint32_t		tp_vers;
+	char			chip_ver;
+	uint32_t		cfg_finiver;
+	uint32_t		cfg_finicsum;
+	uint32_t		cfg_cfcsum;
+	uint8_t			cfg_csum_status;
+	uint8_t			cfg_store;
+	enum csio_dev_state	fw_state;
+	struct csio_vpd		vpd;
+
+	uint8_t			pfn;			/* Physical Function
+							 * number
+							 */
+	uint32_t		port_vec;		/* Port vector */
+	uint8_t			num_pports;		/* Number of physical
+							 * ports.
+							 */
+	uint8_t			rst_retries;		/* Reset retries */
+	uint8_t			cur_evt;		/* current s/m evt */
+	uint8_t			prev_evt;		/* Previous s/m evt */
+	uint32_t		dev_num;		/* device number */
+	struct csio_pport	pport[CSIO_MAX_PPORTS];	/* Ports (XGMACs) */
+	struct csio_hw_params	params;			/* Hw parameters */
+
+	struct pci_pool		*scsi_pci_pool;		/* PCI pool for SCSI */
+	mempool_t		*mb_mempool;		/* Mailbox memory pool*/
+	mempool_t		*rnode_mempool;		/* rnode memory pool */
+
+	/* Interrupt */
+	enum csio_intr_mode	intr_mode;		/* INTx, MSI, MSIX */
+	uint32_t		fwevt_intr_idx;		/* FW evt MSIX/interrupt
+							 * index
+							 */
+	uint32_t		nondata_intr_idx;	/* nondata MSIX/intr
+							 * idx
+							 */
+
+	uint8_t			cfg_neq;		/* FW configured no of
+							 * egress queues
+							 */
+	uint8_t			cfg_niq;		/* FW configured no of
+							 * iq queues.
+							 */
+
+	struct csio_fcoe_res_info  fres_info;		/* Fcoe resource info */
+
+	/* MSIX vectors */
+	struct csio_msix_entries msix_entries[CSIO_MAX_MSIX_VECS];
+
+	struct dentry		*debugfs_root;		/* Debug FS */
+	struct csio_hw_stats	stats;			/* Hw statistics */
+};
+
+/* Register access macros */
+#define csio_reg(_b, _r)		((_b) + (_r))
+
+#define	csio_rd_reg8(_h, _r)		readb(csio_reg((_h)->regstart, (_r)))
+#define	csio_rd_reg16(_h, _r)		readw(csio_reg((_h)->regstart, (_r)))
+#define	csio_rd_reg32(_h, _r)		readl(csio_reg((_h)->regstart, (_r)))
+#define	csio_rd_reg64(_h, _r)		readq(csio_reg((_h)->regstart, (_r)))
+
+#define	csio_wr_reg8(_h, _v, _r)	writeb((_v), \
+						csio_reg((_h)->regstart, (_r)))
+#define	csio_wr_reg16(_h, _v, _r)	writew((_v), \
+						csio_reg((_h)->regstart, (_r)))
+#define	csio_wr_reg32(_h, _v, _r)	writel((_v), \
+						csio_reg((_h)->regstart, (_r)))
+#define	csio_wr_reg64(_h, _v, _r)	writeq((_v), \
+						csio_reg((_h)->regstart, (_r)))
+
+void csio_set_reg_field(struct csio_hw *, uint32_t, uint32_t, uint32_t);
+
+/* Core clocks <==> uSecs */
+static inline uint32_t
+csio_core_ticks_to_us(struct csio_hw *hw, uint32_t ticks)
+{
+	/* add Core Clock / 2 to round ticks to nearest uS */
+	return (ticks * 1000 + hw->vpd.cclk/2) / hw->vpd.cclk;
+}
+
+static inline uint32_t
+csio_us_to_core_ticks(struct csio_hw *hw, uint32_t us)
+{
+	return (us * hw->vpd.cclk) / 1000;
+}
+
+/* Easy access macros */
+#define csio_hw_to_wrm(hw)		((struct csio_wrm *)(&(hw)->wrm))
+#define csio_hw_to_mbm(hw)		((struct csio_mbm *)(&(hw)->mbm))
+#define csio_hw_to_scsim(hw)		((struct csio_scsim *)(&(hw)->scsim))
+#define csio_hw_to_mgmtm(hw)		((struct csio_mgmtm *)(&(hw)->mgmtm))
+
+#define CSIO_PCI_BUS(hw)		((hw)->pdev->bus->number)
+#define CSIO_PCI_DEV(hw)		(PCI_SLOT((hw)->pdev->devfn))
+#define CSIO_PCI_FUNC(hw)		(PCI_FUNC((hw)->pdev->devfn))
+
+#define csio_set_fwevt_intr_idx(_h, _i)		((_h)->fwevt_intr_idx = (_i))
+#define csio_get_fwevt_intr_idx(_h)		((_h)->fwevt_intr_idx)
+#define csio_set_nondata_intr_idx(_h, _i)	((_h)->nondata_intr_idx = (_i))
+#define csio_get_nondata_intr_idx(_h)		((_h)->nondata_intr_idx)
+
+/* Printing/logging */
+#define CSIO_DEVID(__dev)		((__dev)->dev_num)
+#define CSIO_DEVID_LO(__dev)		(CSIO_DEVID((__dev)) & 0xFFFF)
+#define CSIO_DEVID_HI(__dev)		((CSIO_DEVID((__dev)) >> 16) & 0xFFFF)
+
+#define csio_info(__hw, __fmt, ...)					\
+			dev_info(&(__hw)->pdev->dev, __fmt, ##__VA_ARGS__)
+
+#define csio_fatal(__hw, __fmt, ...)					\
+			dev_crit(&(__hw)->pdev->dev, __fmt, ##__VA_ARGS__)
+
+#define csio_err(__hw, __fmt, ...)					\
+			dev_err(&(__hw)->pdev->dev, __fmt, ##__VA_ARGS__)
+
+#define csio_warn(__hw, __fmt, ...)					\
+			dev_warn(&(__hw)->pdev->dev, __fmt, ##__VA_ARGS__)
+
+#ifdef __CSIO_DEBUG__
+#define csio_dbg(__hw, __fmt, ...)					\
+			csio_info((__hw), __fmt, ##__VA_ARGS__);
+#else
+#define csio_dbg(__hw, __fmt, ...)
+#endif
+
+int csio_mgmt_req_lookup(struct csio_mgmtm *, struct csio_ioreq *);
+void csio_hw_intr_disable(struct csio_hw *);
+int csio_hw_slow_intr_handler(struct csio_hw *hw);
+int csio_hw_start(struct csio_hw *);
+int csio_hw_stop(struct csio_hw *);
+int csio_hw_reset(struct csio_hw *);
+int csio_is_hw_ready(struct csio_hw *);
+int csio_is_hw_removing(struct csio_hw *);
+
+int csio_fwevtq_handler(struct csio_hw *);
+void csio_evtq_worker(struct work_struct *);
+int csio_enqueue_evt(struct csio_hw *hw, enum csio_evt type,
+				void *evt_msg, uint16_t len);
+void csio_evtq_flush(struct csio_hw *hw);
+
+int csio_request_irqs(struct csio_hw *);
+void csio_intr_enable(struct csio_hw *);
+void csio_intr_disable(struct csio_hw *, bool);
+
+struct csio_lnode *csio_lnode_alloc(struct csio_hw *);
+int csio_config_queues(struct csio_hw *);
+
+int csio_hw_mc_read(struct csio_hw *, uint32_t,
+			      uint32_t *, uint64_t *);
+int csio_hw_edc_read(struct csio_hw *, int, uint32_t, uint32_t *,
+			       uint64_t *);
+int csio_hw_init(struct csio_hw *);
+void csio_hw_exit(struct csio_hw *);
+#endif /* ifndef __CSIO_HW_H__ */
diff --git a/drivers/scsi/csiostor/csio_init.h b/drivers/scsi/csiostor/csio_init.h
new file mode 100644
index 0000000..0838fd7
--- /dev/null
+++ b/drivers/scsi/csiostor/csio_init.h
@@ -0,0 +1,158 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2008-2012 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __CSIO_INIT_H__
+#define __CSIO_INIT_H__
+
+#include <linux/pci.h>
+#include <linux/if_ether.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_device.h>
+#include <scsi/scsi_host.h>
+#include <scsi/scsi_transport_fc.h>
+
+#include "csio_scsi.h"
+#include "csio_lnode.h"
+#include "csio_rnode.h"
+#include "csio_hw.h"
+
+#define CSIO_DRV_AUTHOR			"Chelsio Communications"
+#define CSIO_DRV_LICENSE		"Dual BSD/GPL"
+#define CSIO_DRV_DESC			"Chelsio FCoE driver"
+#define CSIO_DRV_VERSION		"1.0.0"
+
+#define CSIO_DEVICE(devid, idx)					\
+{ PCI_VENDOR_ID_CHELSIO, (devid), PCI_ANY_ID, PCI_ANY_ID, 0, 0, (idx) }
+
+#define CSIO_IS_T4_FPGA(_dev)		(((_dev) == CSIO_DEVID_PE10K) ||\
+					 ((_dev) == CSIO_DEVID_PE10K_PF1))
+
+/* FCoE device IDs */
+#define CSIO_DEVID_PE10K		0xA000
+#define CSIO_DEVID_PE10K_PF1		0xA001
+#define CSIO_DEVID_T440DBG_FCOE		0x4600
+#define CSIO_DEVID_T420CR_FCOE		0x4601
+#define CSIO_DEVID_T422CR_FCOE		0x4602
+#define CSIO_DEVID_T440CR_FCOE		0x4603
+#define CSIO_DEVID_T420BCH_FCOE		0x4604
+#define CSIO_DEVID_T440BCH_FCOE		0x4605
+#define CSIO_DEVID_T440CH_FCOE		0x4606
+#define CSIO_DEVID_T420SO_FCOE		0x4607
+#define CSIO_DEVID_T420CX_FCOE		0x4608
+#define CSIO_DEVID_T420BT_FCOE		0x4609
+#define CSIO_DEVID_T404BT_FCOE		0x460A
+#define CSIO_DEVID_B420_FCOE		0x460B
+#define CSIO_DEVID_B404_FCOE		0x460C
+#define CSIO_DEVID_T480CR_FCOE		0x460D
+#define CSIO_DEVID_T440LPCR_FCOE	0x460E
+
+extern struct fc_function_template csio_fc_transport_funcs;
+extern struct fc_function_template csio_fc_transport_vport_funcs;
+
+void csio_fchost_attr_init(struct csio_lnode *);
+
+/* INTx handlers */
+void csio_scsi_intx_handler(struct csio_hw *, void *, uint32_t,
+			       struct csio_fl_dma_buf *, void *);
+
+void csio_fwevt_intx_handler(struct csio_hw *, void *, uint32_t,
+				struct csio_fl_dma_buf *, void *);
+
+/* Common os lnode APIs */
+void csio_lnodes_block_request(struct csio_hw *);
+void csio_lnodes_unblock_request(struct csio_hw *);
+void csio_lnodes_block_by_port(struct csio_hw *, uint8_t);
+void csio_lnodes_unblock_by_port(struct csio_hw *, uint8_t);
+
+struct csio_lnode *csio_shost_init(struct csio_hw *, struct device *, bool,
+					struct csio_lnode *);
+void csio_shost_exit(struct csio_lnode *);
+void csio_lnodes_exit(struct csio_hw *, bool);
+
+static inline struct Scsi_Host *
+csio_ln_to_shost(struct csio_lnode *ln)
+{
+	return container_of((void *)ln, struct Scsi_Host, hostdata[0]);
+}
+
+/* SCSI -- locking version of get/put ioreqs  */
+static inline struct csio_ioreq *
+csio_get_scsi_ioreq_lock(struct csio_hw *hw, struct csio_scsim *scsim)
+{
+	struct csio_ioreq *ioreq;
+	unsigned long flags;
+
+	spin_lock_irqsave(&scsim->freelist_lock, flags);
+	ioreq = csio_get_scsi_ioreq(scsim);
+	spin_unlock_irqrestore(&scsim->freelist_lock, flags);
+
+	return ioreq;
+}
+
+static inline void
+csio_put_scsi_ioreq_lock(struct csio_hw *hw, struct csio_scsim *scsim,
+			 struct csio_ioreq *ioreq)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&scsim->freelist_lock, flags);
+	csio_put_scsi_ioreq(scsim, ioreq);
+	spin_unlock_irqrestore(&scsim->freelist_lock, flags);
+}
+
+/* Called in interrupt context */
+static inline void
+csio_put_scsi_ioreq_list_lock(struct csio_hw *hw, struct csio_scsim *scsim,
+			      struct list_head *reqlist, int n)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&scsim->freelist_lock, flags);
+	csio_put_scsi_ioreq_list(scsim, reqlist, n);
+	spin_unlock_irqrestore(&scsim->freelist_lock, flags);
+}
+
+/* Called in interrupt context */
+static inline void
+csio_put_scsi_ddp_list_lock(struct csio_hw *hw, struct csio_scsim *scsim,
+			      struct list_head *reqlist, int n)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&hw->lock, flags);
+	csio_put_scsi_ddp_list(scsim, reqlist, n);
+	spin_unlock_irqrestore(&hw->lock, flags);
+}
+
+#endif /* ifndef __CSIO_INIT_H__ */
diff --git a/drivers/scsi/csiostor/t4fw_api_stor.h b/drivers/scsi/csiostor/t4fw_api_stor.h
new file mode 100644
index 0000000..b96903a
--- /dev/null
+++ b/drivers/scsi/csiostor/t4fw_api_stor.h
@@ -0,0 +1,578 @@
+/*
+ * This file is part of the Chelsio FCoE driver for Linux.
+ *
+ * Copyright (c) 2009-2010 Chelsio Communications, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _T4FW_API_STOR_H_
+#define _T4FW_API_STOR_H_
+
+
+/******************************************************************************
+ *   R E T U R N   V A L U E S
+ ********************************/
+
+enum fw_retval {
+	FW_SUCCESS		= 0,	/* completed sucessfully */
+	FW_EPERM		= 1,	/* operation not permitted */
+	FW_ENOENT		= 2,	/* no such file or directory */
+	FW_EIO			= 5,	/* input/output error; hw bad */
+	FW_ENOEXEC		= 8,	/* exec format error; inv microcode */
+	FW_EAGAIN		= 11,	/* try again */
+	FW_ENOMEM		= 12,	/* out of memory */
+	FW_EFAULT		= 14,	/* bad address; fw bad */
+	FW_EBUSY		= 16,	/* resource busy */
+	FW_EEXIST		= 17,	/* file exists */
+	FW_EINVAL		= 22,	/* invalid argument */
+	FW_ENOSPC		= 28,	/* no space left on device */
+	FW_ENOSYS		= 38,	/* functionality not implemented */
+	FW_EPROTO		= 71,	/* protocol error */
+	FW_EADDRINUSE		= 98,	/* address already in use */
+	FW_EADDRNOTAVAIL	= 99,	/* cannot assigned requested address */
+	FW_ENETDOWN		= 100,	/* network is down */
+	FW_ENETUNREACH		= 101,	/* network is unreachable */
+	FW_ENOBUFS		= 105,	/* no buffer space available */
+	FW_ETIMEDOUT		= 110,	/* timeout */
+	FW_EINPROGRESS		= 115,	/* fw internal */
+	FW_SCSI_ABORT_REQUESTED	= 128,	/* */
+	FW_SCSI_ABORT_TIMEDOUT	= 129,	/* */
+	FW_SCSI_ABORTED		= 130,	/* */
+	FW_SCSI_CLOSE_REQUESTED	= 131,	/* */
+	FW_ERR_LINK_DOWN	= 132,	/* */
+	FW_RDEV_NOT_READY	= 133,	/* */
+	FW_ERR_RDEV_LOST	= 134,	/* */
+	FW_ERR_RDEV_LOGO	= 135,	/* */
+	FW_FCOE_NO_XCHG		= 136,	/* */
+	FW_SCSI_RSP_ERR		= 137,	/* */
+	FW_ERR_RDEV_IMPL_LOGO	= 138,	/* */
+	FW_SCSI_UNDER_FLOW_ERR  = 139,	/* */
+	FW_SCSI_OVER_FLOW_ERR   = 140,	/* */
+	FW_SCSI_DDP_ERR		= 141,	/* DDP error*/
+	FW_SCSI_TASK_ERR	= 142,	/* No SCSI tasks available */
+};
+
+enum fw_fcoe_link_sub_op {
+	FCOE_LINK_DOWN	= 0x0,
+	FCOE_LINK_UP	= 0x1,
+	FCOE_LINK_COND	= 0x2,
+};
+
+enum fw_fcoe_link_status {
+	FCOE_LINKDOWN	= 0x0,
+	FCOE_LINKUP	= 0x1,
+};
+
+enum fw_ofld_prot {
+	PROT_FCOE	= 0x1,
+	PROT_ISCSI	= 0x2,
+};
+
+enum rport_type_fcoe {
+	FLOGI_VFPORT	= 0x1,		/* 0xfffffe */
+	FDISC_VFPORT	= 0x2,		/* 0xfffffe */
+	NS_VNPORT	= 0x3,		/* 0xfffffc */
+	REG_FC4_VNPORT	= 0x4,		/* any FC4 type VN_PORT */
+	REG_VNPORT	= 0x5,		/* 0xfffxxx - non FC4 port in switch */
+	FDMI_VNPORT	= 0x6,		/* 0xfffffa */
+	FAB_CTLR_VNPORT	= 0x7,		/* 0xfffffd */
+};
+
+enum event_cause_fcoe {
+	PLOGI_ACC_RCVD		= 0x01,
+	PLOGI_RJT_RCVD		= 0x02,
+	PLOGI_RCVD		= 0x03,
+	PLOGO_RCVD		= 0x04,
+	PRLI_ACC_RCVD		= 0x05,
+	PRLI_RJT_RCVD		= 0x06,
+	PRLI_RCVD		= 0x07,
+	PRLO_RCVD		= 0x08,
+	NPORT_ID_CHGD		= 0x09,
+	FLOGO_RCVD		= 0x0a,
+	CLR_VIRT_LNK_RCVD	= 0x0b,
+	FLOGI_ACC_RCVD		= 0x0c,
+	FLOGI_RJT_RCVD		= 0x0d,
+	FDISC_ACC_RCVD		= 0x0e,
+	FDISC_RJT_RCVD		= 0x0f,
+	FLOGI_TMO_MAX_RETRY	= 0x10,
+	IMPL_LOGO_ADISC_ACC	= 0x11,
+	IMPL_LOGO_ADISC_RJT	= 0x12,
+	IMPL_LOGO_ADISC_CNFLT	= 0x13,
+	PRLI_TMO		= 0x14,
+	ADISC_TMO		= 0x15,
+	RSCN_DEV_LOST		= 0x16,
+	SCR_ACC_RCVD		= 0x17,
+	ADISC_RJT_RCVD		= 0x18,
+	LOGO_SNT		= 0x19,
+	PROTO_ERR_IMPL_LOGO	= 0x1a,
+};
+
+enum fcoe_cmn_type {
+	FCOE_ELS,
+	FCOE_CT,
+	FCOE_SCSI_CMD,
+	FCOE_UNSOL_ELS,
+};
+
+enum fw_wr_stor_opcodes {
+	FW_RDEV_WR                     = 0x38,
+	FW_FCOE_ELS_CT_WR              = 0x30,
+	FW_SCSI_WRITE_WR               = 0x31,
+	FW_SCSI_READ_WR                = 0x32,
+	FW_SCSI_CMD_WR                 = 0x33,
+	FW_SCSI_ABRT_CLS_WR            = 0x34,
+};
+
+struct fw_rdev_wr {
+	__be32 op_to_immdlen;
+	__be32 alloc_to_len16;
+	__be64 cookie;
+	u8     protocol;
+	u8     event_cause;
+	u8     cur_state;
+	u8     prev_state;
+	__be32 flags_to_assoc_flowid;
+	union rdev_entry {
+		struct fcoe_rdev_entry {
+			__be32 flowid;
+			u8     protocol;
+			u8     event_cause;
+			u8     flags;
+			u8     rjt_reason;
+			u8     cur_login_st;
+			u8     prev_login_st;
+			__be16 rcv_fr_sz;
+			u8     rd_xfer_rdy_to_rport_type;
+			u8     vft_to_qos;
+			u8     org_proc_assoc_to_acc_rsp_code;
+			u8     enh_disc_to_tgt;
+			u8     wwnn[8];
+			u8     wwpn[8];
+			__be16 iqid;
+			u8     fc_oui[3];
+			u8     r_id[3];
+		} fcoe_rdev;
+		struct iscsi_rdev_entry {
+			__be32 flowid;
+			u8     protocol;
+			u8     event_cause;
+			u8     flags;
+			u8     r3;
+			__be16 iscsi_opts;
+			__be16 tcp_opts;
+			__be16 ip_opts;
+			__be16 max_rcv_len;
+			__be16 max_snd_len;
+			__be16 first_brst_len;
+			__be16 max_brst_len;
+			__be16 r4;
+			__be16 def_time2wait;
+			__be16 def_time2ret;
+			__be16 nop_out_intrvl;
+			__be16 non_scsi_to;
+			__be16 isid;
+			__be16 tsid;
+			__be16 port;
+			__be16 tpgt;
+			u8     r5[6];
+			__be16 iqid;
+		} iscsi_rdev;
+	} u;
+};
+
+#define FW_RDEV_WR_FLOWID_GET(x)	(((x) >> 8) & 0xfffff)
+#define FW_RDEV_WR_ASSOC_FLOWID_GET(x)	(((x) >> 0) & 0xfffff)
+#define FW_RDEV_WR_RPORT_TYPE_GET(x)	(((x) >> 0) & 0x1f)
+#define FW_RDEV_WR_NPIV_GET(x)		(((x) >> 6) & 0x1)
+#define FW_RDEV_WR_CLASS_GET(x)		(((x) >> 4) & 0x3)
+#define FW_RDEV_WR_TASK_RETRY_ID_GET(x)	(((x) >> 5) & 0x1)
+#define FW_RDEV_WR_RETRY_GET(x)		(((x) >> 4) & 0x1)
+#define FW_RDEV_WR_CONF_CMPL_GET(x)	(((x) >> 3) & 0x1)
+#define FW_RDEV_WR_INI_GET(x)		(((x) >> 1) & 0x1)
+#define FW_RDEV_WR_TGT_GET(x)		(((x) >> 0) & 0x1)
+
+struct fw_fcoe_els_ct_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     els_ct_type;
+	u8     ctl_pri;
+	u8     cp_en_class;
+	__be16 xfer_cnt;
+	u8     fl_to_sp;
+	u8     l_id[3];
+	u8     r5;
+	u8     r_id[3];
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r6;
+};
+
+#define FW_FCOE_ELS_CT_WR_OPCODE(x)		((x) << 24)
+#define FW_FCOE_ELS_CT_WR_OPCODE_GET(x)		(((x) >> 24) & 0xff)
+#define FW_FCOE_ELS_CT_WR_IMMDLEN(x)		((x) << 0)
+#define FW_FCOE_ELS_CT_WR_IMMDLEN_GET(x)	(((x) >> 0) & 0xff)
+#define FW_FCOE_ELS_CT_WR_SP(x)			((x) << 0)
+
+struct fw_scsi_write_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     use_xfer_cnt;
+	union fw_scsi_write_priv {
+		struct fcoe_write_priv {
+			u8   ctl_pri;
+			u8   cp_en_class;
+			u8   r3_lo[2];
+		} fcoe;
+		struct iscsi_write_priv {
+			u8   r3[4];
+		} iscsi;
+	} u;
+	__be32 xfer_cnt;
+	__be32 ini_xfer_cnt;
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r4;
+};
+
+#define FW_SCSI_WRITE_WR_IMMDLEN(x)	((x) << 0)
+
+struct fw_scsi_read_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     use_xfer_cnt;
+	union fw_scsi_read_priv {
+		struct fcoe_read_priv {
+			u8   ctl_pri;
+			u8   cp_en_class;
+			u8   r3_lo[2];
+		} fcoe;
+		struct iscsi_read_priv {
+			u8   r3[4];
+		} iscsi;
+	} u;
+	__be32 xfer_cnt;
+	__be32 ini_xfer_cnt;
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r4;
+};
+
+#define FW_SCSI_READ_WR_IMMDLEN(x)	((x) << 0)
+
+struct fw_scsi_cmd_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     r3;
+	union fw_scsi_cmd_priv {
+		struct fcoe_cmd_priv {
+			u8   ctl_pri;
+			u8   cp_en_class;
+			u8   r4_lo[2];
+		} fcoe;
+		struct iscsi_cmd_priv {
+			u8   r4[4];
+		} iscsi;
+	} u;
+	u8     r5[8];
+	__be64 rsp_dmaaddr;
+	__be32 rsp_dmalen;
+	__be32 r6;
+};
+
+#define FW_SCSI_CMD_WR_IMMDLEN(x)	((x) << 0)
+
+#define SCSI_ABORT 0
+#define SCSI_CLOSE 1
+
+struct fw_scsi_abrt_cls_wr {
+	__be32 op_immdlen;
+	__be32 flowid_len16;
+	__be64 cookie;
+	__be16 iqid;
+	u8     tmo_val;
+	u8     sub_opcode_to_chk_all_io;
+	u8     r3[4];
+	__be64 t_cookie;
+};
+
+#define FW_SCSI_ABRT_CLS_WR_SUB_OPCODE(x)	((x) << 2)
+#define FW_SCSI_ABRT_CLS_WR_SUB_OPCODE_GET(x)	(((x) >> 2) & 0x3f)
+#define FW_SCSI_ABRT_CLS_WR_CHK_ALL_IO(x)	((x) << 0)
+
+enum fw_cmd_stor_opcodes {
+	FW_FCOE_RES_INFO_CMD           = 0x31,
+	FW_FCOE_LINK_CMD               = 0x32,
+	FW_FCOE_VNP_CMD                = 0x33,
+	FW_FCOE_SPARAMS_CMD            = 0x35,
+	FW_FCOE_STATS_CMD              = 0x37,
+	FW_FCOE_FCF_CMD                = 0x38,
+};
+
+struct fw_fcoe_res_info_cmd {
+	__be32 op_to_read;
+	__be32 retval_len16;
+	__be16 e_d_tov;
+	__be16 r_a_tov_seq;
+	__be16 r_a_tov_els;
+	__be16 r_r_tov;
+	__be32 max_xchgs;
+	__be32 max_ssns;
+	__be32 used_xchgs;
+	__be32 used_ssns;
+	__be32 max_fcfs;
+	__be32 max_vnps;
+	__be32 used_fcfs;
+	__be32 used_vnps;
+};
+
+struct fw_fcoe_link_cmd {
+	__be32 op_to_portid;
+	__be32 retval_len16;
+	__be32 sub_opcode_fcfi;
+	u8     r3;
+	u8     lstatus;
+	__be16 flags;
+	u8     r4;
+	u8     set_vlan;
+	__be16 vlan_id;
+	__be32 vnpi_pkd;
+	__be16 r6;
+	u8     phy_mac[6];
+	u8     vnport_wwnn[8];
+	u8     vnport_wwpn[8];
+};
+
+#define FW_FCOE_LINK_CMD_PORTID(x)	((x) << 0)
+#define FW_FCOE_LINK_CMD_PORTID_GET(x)	(((x) >> 0) & 0xf)
+#define FW_FCOE_LINK_CMD_SUB_OPCODE(x)  ((x) << 24U)
+#define FW_FCOE_LINK_CMD_FCFI(x)	((x) << 0)
+#define FW_FCOE_LINK_CMD_FCFI_GET(x)	(((x) >> 0) & 0xffffff)
+#define FW_FCOE_LINK_CMD_VNPI_GET(x)	(((x) >> 0) & 0xfffff)
+
+struct fw_fcoe_vnp_cmd {
+	__be32 op_to_fcfi;
+	__be32 alloc_to_len16;
+	__be32 gen_wwn_to_vnpi;
+	__be32 vf_id;
+	__be16 iqid;
+	u8   vnport_mac[6];
+	u8   vnport_wwnn[8];
+	u8   vnport_wwpn[8];
+	u8   cmn_srv_parms[16];
+	u8   clsp_word_0_1[8];
+};
+
+#define FW_FCOE_VNP_CMD_FCFI(x)		((x) << 0)
+#define FW_FCOE_VNP_CMD_ALLOC		(1U << 31)
+#define FW_FCOE_VNP_CMD_FREE		(1U << 30)
+#define FW_FCOE_VNP_CMD_MODIFY		(1U << 29)
+#define FW_FCOE_VNP_CMD_GEN_WWN		(1U << 22)
+#define FW_FCOE_VNP_CMD_VFID_EN		(1U << 20)
+#define FW_FCOE_VNP_CMD_VNPI(x)		((x) << 0)
+#define FW_FCOE_VNP_CMD_VNPI_GET(x)	(((x) >> 0) & 0xfffff)
+
+struct fw_fcoe_sparams_cmd {
+	__be32 op_to_portid;
+	__be32 retval_len16;
+	u8     r3[7];
+	u8     cos;
+	u8     lport_wwnn[8];
+	u8     lport_wwpn[8];
+	u8     cmn_srv_parms[16];
+	u8     cls_srv_parms[16];
+};
+
+#define FW_FCOE_SPARAMS_CMD_PORTID(x)	((x) << 0)
+
+struct fw_fcoe_stats_cmd {
+	__be32 op_to_flowid;
+	__be32 free_to_len16;
+	union fw_fcoe_stats {
+		struct fw_fcoe_stats_ctl {
+			u8   nstats_port;
+			u8   port_valid_ix;
+			__be16 r6;
+			__be32 r7;
+			__be64 stat0;
+			__be64 stat1;
+			__be64 stat2;
+			__be64 stat3;
+			__be64 stat4;
+			__be64 stat5;
+		} ctl;
+		struct fw_fcoe_port_stats {
+			__be64 tx_bcast_bytes;
+			__be64 tx_bcast_frames;
+			__be64 tx_mcast_bytes;
+			__be64 tx_mcast_frames;
+			__be64 tx_ucast_bytes;
+			__be64 tx_ucast_frames;
+			__be64 tx_drop_frames;
+			__be64 tx_offload_bytes;
+			__be64 tx_offload_frames;
+			__be64 rx_bcast_bytes;
+			__be64 rx_bcast_frames;
+			__be64 rx_mcast_bytes;
+			__be64 rx_mcast_frames;
+			__be64 rx_ucast_bytes;
+			__be64 rx_ucast_frames;
+			__be64 rx_err_frames;
+		} port_stats;
+		struct fw_fcoe_fcf_stats {
+			__be32 fip_tx_bytes;
+			__be32 fip_tx_fr;
+			__be64 fcf_ka;
+			__be64 mcast_adv_rcvd;
+			__be16 ucast_adv_rcvd;
+			__be16 sol_sent;
+			__be16 vlan_req;
+			__be16 vlan_rpl;
+			__be16 clr_vlink;
+			__be16 link_down;
+			__be16 link_up;
+			__be16 logo;
+			__be16 flogi_req;
+			__be16 flogi_rpl;
+			__be16 fdisc_req;
+			__be16 fdisc_rpl;
+			__be16 fka_prd_chg;
+			__be16 fc_map_chg;
+			__be16 vfid_chg;
+			u8   no_fka_req;
+			u8   no_vnp;
+		} fcf_stats;
+		struct fw_fcoe_pcb_stats {
+			__be64 tx_bytes;
+			__be64 tx_frames;
+			__be64 rx_bytes;
+			__be64 rx_frames;
+			__be32 vnp_ka;
+			__be32 unsol_els_rcvd;
+			__be64 unsol_cmd_rcvd;
+			__be16 implicit_logo;
+			__be16 flogi_inv_sparm;
+			__be16 fdisc_inv_sparm;
+			__be16 flogi_rjt;
+			__be16 fdisc_rjt;
+			__be16 no_ssn;
+			__be16 mac_flt_fail;
+			__be16 inv_fr_rcvd;
+		} pcb_stats;
+		struct fw_fcoe_scb_stats {
+			__be64 tx_bytes;
+			__be64 tx_frames;
+			__be64 rx_bytes;
+			__be64 rx_frames;
+			__be32 host_abrt_req;
+			__be32 adap_auto_abrt;
+			__be32 adap_abrt_rsp;
+			__be32 host_ios_req;
+			__be16 ssn_offl_ios;
+			__be16 ssn_not_rdy_ios;
+			u8   rx_data_ddp_err;
+			u8   ddp_flt_set_err;
+			__be16 rx_data_fr_err;
+			u8   bad_st_abrt_req;
+			u8   no_io_abrt_req;
+			u8   abort_tmo;
+			u8   abort_tmo_2;
+			__be32 abort_req;
+			u8   no_ppod_res_tmo;
+			u8   bp_tmo;
+			u8   adap_auto_cls;
+			u8   no_io_cls_req;
+			__be32 host_cls_req;
+			__be64 unsol_cmd_rcvd;
+			__be32 plogi_req_rcvd;
+			__be32 prli_req_rcvd;
+			__be16 logo_req_rcvd;
+			__be16 prlo_req_rcvd;
+			__be16 plogi_rjt_rcvd;
+			__be16 prli_rjt_rcvd;
+			__be32 adisc_req_rcvd;
+			__be32 rscn_rcvd;
+			__be32 rrq_req_rcvd;
+			__be32 unsol_els_rcvd;
+			u8   adisc_rjt_rcvd;
+			u8   scr_rjt;
+			u8   ct_rjt;
+			u8   inval_bls_rcvd;
+			__be32 ba_rjt_rcvd;
+		} scb_stats;
+	} u;
+};
+
+#define FW_FCOE_STATS_CMD_FLOWID(x)	((x) << 0)
+#define FW_FCOE_STATS_CMD_FREE		(1U << 30)
+#define FW_FCOE_STATS_CMD_NSTATS(x)	((x) << 4)
+#define FW_FCOE_STATS_CMD_PORT(x)	((x) << 0)
+#define FW_FCOE_STATS_CMD_PORT_VALID	(1U << 7)
+#define FW_FCOE_STATS_CMD_IX(x)		((x) << 0)
+
+struct fw_fcoe_fcf_cmd {
+	__be32 op_to_fcfi;
+	__be32 retval_len16;
+	__be16 priority_pkd;
+	u8     mac[6];
+	u8     name_id[8];
+	u8     fabric[8];
+	__be16 vf_id;
+	__be16 max_fcoe_size;
+	u8     vlan_id;
+	u8     fc_map[3];
+	__be32 fka_adv;
+	__be32 r6;
+	u8     r7_hi;
+	u8     fpma_to_portid;
+	u8     spma_mac[6];
+	__be64 r8;
+};
+
+#define FW_FCOE_FCF_CMD_FCFI(x)		((x) << 0)
+#define FW_FCOE_FCF_CMD_FCFI_GET(x)	(((x) >> 0) & 0xfffff)
+#define FW_FCOE_FCF_CMD_PRIORITY_GET(x)	(((x) >> 0) & 0xff)
+#define FW_FCOE_FCF_CMD_FPMA_GET(x)	(((x) >> 6) & 0x1)
+#define FW_FCOE_FCF_CMD_SPMA_GET(x)	(((x) >> 5) & 0x1)
+#define FW_FCOE_FCF_CMD_LOGIN_GET(x)	(((x) >> 4) & 0x1)
+#define FW_FCOE_FCF_CMD_PORTID_GET(x)	(((x) >> 0) & 0xf)
+
+#endif /* _T4FW_API_STOR_H_ */
-- 
1.7.1


^ permalink raw reply related

* [V4 PATCH 0/8] csiostor: Chelsio FCoE offload driver submission
From: Naresh Kumar Inna @ 2012-09-12 17:18 UTC (permalink / raw)
  To: JBottomley, linux-scsi, dm, leedom; +Cc: netdev, naresh, chethan

This is the initial submission of the Chelsio FCoE offload driver (csiostor)
to the upstream kernel. This driver currently supports FCoE offload
functionality over Chelsio T4-based 10Gb Converged Network Adapters.

The following patches contain the driver sources for csiostor driver and
updates to firmware/hardware header files shared between csiostor,
cxgb4 (Chelsio T4-based NIC driver) and cxgb4vf (Chelsio T4-based Virtual
Function NIC driver). The csiostor driver is dependent on these
header updates. These patches have been generated against scsi 'misc' branch.

csiostor is a low level SCSI driver that interfaces with PCI, SCSI midlayer and
FC transport subsystems. This driver claims the FCoE PCIe function on
Chelsio Converged Network Adapters. It relies on firmware events for slow path
operations like discovery, thereby offloading session management. The driver
programs firmware via Work Request interfaces for fast path I/O offload
features.

In this version (V4), the patches have been re-arranged to make them bisectable.

Here is the brief description of patches:
[V4 PATCH 1/8]: Updates to header files shared between cxgb4, cxgb4vf and
                csiostor.
[V4 PATCH 2/8]: Header files part 1.
[V4 PATCH 3/8]: Header files part 2.
[V4 PATCH 4/8]: Driver initialization and Work Request services.
[V4 PATCH 5/8]: FC transport interfaces and mailbox services.
[V4 PATCH 6/8]: Local and remote port state tracking functionality.
[V4 PATCH 7/8]: Interrupt handling and fast path I/O functionality.
[V4 PATCH 8/8]: Hardware interface, Makefile and Kconfig changes.

Naresh Kumar Inna (8):
  cxgb4/cxgb4vf: Chelsio FCoE offload driver submission (common header
    updates).
  csiostor: Chelsio FCoE offload driver submission (headers part 1).
  csiostor: Chelsio FCoE offload driver submission (headers part 2).
  csiostor: Chelsio FCoE offload driver submission (sources part 1).
  csiostor: Chelsio FCoE offload driver submission (sources part 2).
  csiostor: Chelsio FCoE offload driver submission (sources part 3).
  csiostor: Chelsio FCoE offload driver submission (sources part 4).
  csiostor: Chelsio FCoE offload driver submission (sources part 5).

 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |    2 +-
 drivers/net/ethernet/chelsio/cxgb4/sge.c        |   10 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c      |   16 +-
 drivers/net/ethernet/chelsio/cxgb4/t4_msg.h     |    1 +
 drivers/net/ethernet/chelsio/cxgb4/t4_regs.h    |   69 +-
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h   |  104 +-
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c      |   11 +-
 drivers/scsi/Kconfig                            |    1 +
 drivers/scsi/Makefile                           |    1 +
 drivers/scsi/csiostor/Kconfig                   |   19 +
 drivers/scsi/csiostor/Makefile                  |   11 +
 drivers/scsi/csiostor/csio_attr.c               |  809 +++++
 drivers/scsi/csiostor/csio_defs.h               |  108 +
 drivers/scsi/csiostor/csio_hw.c                 | 4396 +++++++++++++++++++++++
 drivers/scsi/csiostor/csio_hw.h                 |  666 ++++
 drivers/scsi/csiostor/csio_init.c               | 1272 +++++++
 drivers/scsi/csiostor/csio_init.h               |  158 +
 drivers/scsi/csiostor/csio_isr.c                |  624 ++++
 drivers/scsi/csiostor/csio_lnode.c              | 2148 +++++++++++
 drivers/scsi/csiostor/csio_lnode.h              |  255 ++
 drivers/scsi/csiostor/csio_mb.c                 | 1769 +++++++++
 drivers/scsi/csiostor/csio_mb.h                 |  278 ++
 drivers/scsi/csiostor/csio_rnode.c              |  889 +++++
 drivers/scsi/csiostor/csio_rnode.h              |  141 +
 drivers/scsi/csiostor/csio_scsi.c               | 2560 +++++++++++++
 drivers/scsi/csiostor/csio_scsi.h               |  342 ++
 drivers/scsi/csiostor/csio_wr.c                 | 1632 +++++++++
 drivers/scsi/csiostor/csio_wr.h                 |  512 +++
 drivers/scsi/csiostor/t4fw_api_stor.h           |  578 +++
 29 files changed, 19345 insertions(+), 37 deletions(-)
 create mode 100644 drivers/scsi/csiostor/Kconfig
 create mode 100644 drivers/scsi/csiostor/Makefile
 create mode 100644 drivers/scsi/csiostor/csio_attr.c
 create mode 100644 drivers/scsi/csiostor/csio_defs.h
 create mode 100644 drivers/scsi/csiostor/csio_hw.c
 create mode 100644 drivers/scsi/csiostor/csio_hw.h
 create mode 100644 drivers/scsi/csiostor/csio_init.c
 create mode 100644 drivers/scsi/csiostor/csio_init.h
 create mode 100644 drivers/scsi/csiostor/csio_isr.c
 create mode 100644 drivers/scsi/csiostor/csio_lnode.c
 create mode 100644 drivers/scsi/csiostor/csio_lnode.h
 create mode 100644 drivers/scsi/csiostor/csio_mb.c
 create mode 100644 drivers/scsi/csiostor/csio_mb.h
 create mode 100644 drivers/scsi/csiostor/csio_rnode.c
 create mode 100644 drivers/scsi/csiostor/csio_rnode.h
 create mode 100644 drivers/scsi/csiostor/csio_scsi.c
 create mode 100644 drivers/scsi/csiostor/csio_scsi.h
 create mode 100644 drivers/scsi/csiostor/csio_wr.c
 create mode 100644 drivers/scsi/csiostor/csio_wr.h
 create mode 100644 drivers/scsi/csiostor/t4fw_api_stor.h


^ permalink raw reply

* Re: [V3 PATCH 9/9] cxgb4vf: Chelsio FCoE offload driver submission (header compatibility fixes).
From: Andi Kleen @ 2012-09-12 16:52 UTC (permalink / raw)
  To: Naresh Kumar Inna
  Cc: David Miller, JBottomley@parallels.com,
	linux-scsi@vger.kernel.org, Dimitrios Michailidis, Casey Leedom,
	netdev@vger.kernel.org, Chethan Seshadri
In-Reply-To: <50502B3A.6080803@chelsio.com>

Naresh Kumar Inna <naresh@chelsio.com> writes:
>
> OK, I think I should be able to arrange the patch set to fulfill that
> requirement. I was under the impression it was fine for new drivers to
> split patches in this fashion, since they go as a single commit, sorry
> about that.

What they normally do then is to add the Kconfig or Makefile entry
only in the end, so it cannot build before.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply

* Re: GRO aggregation
From: Rick Jones @ 2012-09-12 16:52 UTC (permalink / raw)
  To: Shlomo Pongartz; +Cc: Eric Dumazet, netdev@vger.kernel.org
In-Reply-To: <5050B9B2.5070107@mellanox.com>

On 09/12/2012 09:34 AM, Shlomo Pongartz wrote:
> On 9/12/2012 7:23 PM, Rick Jones wrote:
>> On 09/12/2012 07:41 AM, Shlomo Pongartz wrote:
>>> Hi Eric
>>>
>>> The TSO is just a mean to create a burst of frames on the wire so the
>>> NAPI will be able to pool as much as possible.
>>
>> Is it?  If I recall correctly, TSO was in place well before all
>> drivers were using NAPI.  And NAPI was being proposed independent of
>> TSO. TSO is there to save CPU cycles on the transmit side.  "On the
>> wire" what it sends is to be identical to what a host with greater CPU
>> performance could accomplish.
>>
>> rick jones
>>
> Hi Rick.
>
> What I say is that I use TSO on the machine that transmits so I'll have
> a burst of frames on the wire for the NAPI on the receiver machine.

Also, NAPI was in place before GRO.  IIRC, the napi code was simply a 
convenient/correct/natural place to have the GRO functionality.

rick jones

^ permalink raw reply

* Re: GRO aggregation
From: Shlomo Pongartz @ 2012-09-12 16:34 UTC (permalink / raw)
  To: Rick Jones; +Cc: Eric Dumazet, netdev@vger.kernel.org
In-Reply-To: <5050B6FF.5050002@hp.com>

On 9/12/2012 7:23 PM, Rick Jones wrote:
> On 09/12/2012 07:41 AM, Shlomo Pongartz wrote:
>> Hi Eric
>>
>> The TSO is just a mean to create a burst of frames on the wire so the
>> NAPI will be able to pool as much as possible.
>
> Is it?  If I recall correctly, TSO was in place well before all 
> drivers were using NAPI.  And NAPI was being proposed independent of 
> TSO. TSO is there to save CPU cycles on the transmit side.  "On the 
> wire" what it sends is to be identical to what a host with greater CPU 
> performance could accomplish.
>
> rick jones
>
Hi Rick.

What I say is that I use TSO on the machine that transmits so I'll have  
a burst of frames on the wire for the NAPI on the receiver machine.
The best thing for my purpose is  that the HW will do the segmentation. 
And unless I'm mistaken the Intel card is capable to do so.

Shlomo.

^ permalink raw reply

* Re: GRO aggregation
From: Rick Jones @ 2012-09-12 16:23 UTC (permalink / raw)
  To: Shlomo Pongartz; +Cc: Eric Dumazet, netdev@vger.kernel.org
In-Reply-To: <50509F30.30402@mellanox.com>

On 09/12/2012 07:41 AM, Shlomo Pongartz wrote:
> Hi Eric
>
> The TSO is just a mean to create a burst of frames on the wire so the
> NAPI will be able to pool as much as possible.

Is it?  If I recall correctly, TSO was in place well before all drivers 
were using NAPI.  And NAPI was being proposed independent of TSO. TSO is 
there to save CPU cycles on the transmit side.  "On the wire" what it 
sends is to be identical to what a host with greater CPU performance 
could accomplish.

rick jones

^ permalink raw reply

* Re: Failure to send fragmented IP packet in case of missing ARP entry
From: Andrei Dolnikov @ 2012-09-12 15:54 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev
In-Reply-To: <1347270798.1234.1370.camel@edumazet-glaptop>

Works for me.

Thank you!
Andrei.

On 09/10/2012 01:53 PM, Eric Dumazet wrote:
> On Mon, 2012-09-10 at 11:42 +0200, Eric Dumazet wrote:
>> On Mon, 2012-09-10 at 12:59 +0400, Andrei Dolnikov wrote:
>>> Hello all,
>>>
>>> The following issue is observed on most Linux distributions:
>>> Transmission of fragmented IP packets in case of missing ARP entry for
>>> destination IP fails.
>>> Actually ARP request is sent, and, once ARP response is received, only
>>> few queued fragments are transmitted. Remaining fragments are lost.
>>> It can be easily reproduced as follows:
>>>       # arp -d <dst IP>
>>>       # ping -s 65000 -c 1 <dst IP>
>>> Ping result is: "1 packets transmitted, 0 received, 100% packet loss,
>>> time 0ms".
>>>
>>> The latest kernel version I tried was 3.5.0-1 x86_64, but I also was
>>> able to reproduce it with 3.2.x, 3.0.x and 2.6.32.
>>> It doesn't depend on hardware: was able to reproduce with VMWare Player,
>>> Intel based laptop, Intel Atom and ARM based custom boards.
>>> As I'm not a networking standards expert I'm not sure if it's a real bug
>>> or acceptable behaviour, but decided to raise the issue here as I can't
>>> reproduce this anomaly with the Windows 7 PC.
>>>
>>> Thanks,
>>> Andrei.
>>> --
>> Its a bit better with linux-3.3, with commit
>> 8b5c171bb3dc0686b2647a84e990199c5faa9ef8
>> (neigh: new unresolved queue limits)
>>
>> +neigh/default/unres_qlen_bytes - INTEGER
>> +       The maximum number of bytes which may be used by packets
>> +       queued for each unresolved address by other network layers.
>> +       (added in linux 3.3)
>> +
>> +neigh/default/unres_qlen - INTEGER
>> +       The maximum number of packets which may be queued for each
>> +       unresolved address by other network layers.
>> +       (deprecated in linux 3.3) : use unres_qlen_bytes instead.
>>
>>
>> Problem is : unres_qlen_bytes default value is 65536, so its a bit too
>> small once you take into account truesize overhead
>>
>> I guess following patch would be needed :
>>
>> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
>> index 4780045..3395bb6 100644
>> --- a/net/ipv4/arp.c
>> +++ b/net/ipv4/arp.c
>> @@ -171,7 +171,7 @@ struct neigh_table arp_tbl = {
>>   		.gc_staletime		= 60 * HZ,
>>   		.reachable_time		= 30 * HZ,
>>   		.delay_probe_time	= 5 * HZ,
>> -		.queue_len_bytes	= 64*1024,
>> +		.queue_len_bytes	= 64 * SKB_TRUESIZE(1024),
>>   		.ucast_probes		= 3,
>>   		.mcast_probes		= 3,
>>   		.anycast_delay		= 1 * HZ,
> In the mean time, you can also do
>
> echo 50 >/proc/sys/net/ipv4/neigh/eth0/unres_qlen
>
> (change eth0 by the name of your interface)
>
>
>

^ permalink raw reply

* Re: [PATCH V2] netfilter/iptables: Fix log-level processing
From: Pablo Neira Ayuso @ 2012-09-12 15:25 UTC (permalink / raw)
  To: joe
  Cc: netfilter-devel, netdev, Bart De Schuymer, Patrick McHardy,
	Stephen Hemminger
In-Reply-To: <1347437245.13103.697.camel@edumazet-glaptop>

I have applied this patch. Thanks Joe.

^ permalink raw reply

* Re: [PATCH net-next V3 1/2] IB/ipoib: Add rtnl_link_ops support
From: Eric Dumazet @ 2012-09-12 15:13 UTC (permalink / raw)
  To: Rami Rosen; +Cc: Or Gerlitz, Patrick McHardy, netdev, Shlomo Pongratz
In-Reply-To: <CAKoUAr=8zZA9EgvJEVrd0a-Uw=zzksHj+5P2Sp3Lb4vXgSJRYA@mail.gmail.com>

On Wed, 2012-09-12 at 17:53 +0300, Rami Rosen wrote:
> Hi,
> 
> From the dump of CPU #1, it seems indeed not related at all to "modprobe -r".
> 
> Could it be that there is some IB stack sysfs write activity?
> (regardless of the modprobe -r" you issued) ?  I see some candidates
> for it.
> 
> delete_child() is a method of the IB stack (ipoib/ipoib_main.c)
> 
> Maybe in order to help debug the problem, you might try to add in
> delete_child() method, print of the name of the attribute which is
> being deleted ?
> 
>   (struct device_attribute has a a member "struct attribute attr",
> which in turn has  "const char *name").


It might be related to module load/unload

udevd or some external daemon can access sysfs files while you unload
the module

^ permalink raw reply

* [PATCH 9/9] drivers/isdn/gigaset/common.c: Remove useless kfree
From: Peter Senna Tschudin @ 2012-09-12 15:06 UTC (permalink / raw)
  To: Hansjoerg Lipp
  Cc: kernel-janitors, Tilman Schmidt, Karsten Keil, gigaset307x-common,
	netdev, linux-kernel

From: Peter Senna Tschudin <peter.senna@gmail.com>

Remove useless kfree() and clean up code related to the removal.

The semantic patch that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@r exists@
position p1,p2;
expression x;
@@

if (x@p1 == NULL) { ... kfree@p2(x); ... return ...; }

@unchanged exists@
position r.p1,r.p2;
expression e <= r.x,x,e1;
iterator I;
statement S;
@@

if (x@p1 == NULL) { ... when != I(x,...) S
                        when != e = e1
                        when != e += e1
                        when != e -= e1
                        when != ++e
                        when != --e
                        when != e++
                        when != e--
                        when != &e
   kfree@p2(x); ... return ...; }

@ok depends on unchanged exists@
position any r.p1;
position r.p2;
expression x;
@@

... when != true x@p1 == NULL
kfree@p2(x);

@depends on !ok && unchanged@
position r.p2;
expression x;
@@

*kfree@p2(x);
// </smpl>

Signed-off-by: Peter Senna Tschudin <peter.senna@gmail.com>

---
 drivers/isdn/gigaset/common.c |    1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/isdn/gigaset/common.c b/drivers/isdn/gigaset/common.c
index aa41485..30a6b17 100644
--- a/drivers/isdn/gigaset/common.c
+++ b/drivers/isdn/gigaset/common.c
@@ -1123,7 +1123,6 @@ struct gigaset_driver *gigaset_initdriver(unsigned minor, unsigned minors,
 	return drv;
 
 error:
-	kfree(drv->cs);
 	kfree(drv);
 	return NULL;
 }

^ permalink raw reply related

* Re: [PATCH net-next V3 1/2] IB/ipoib: Add rtnl_link_ops support
From: Rami Rosen @ 2012-09-12 14:53 UTC (permalink / raw)
  To: Or Gerlitz; +Cc: Patrick McHardy, Eric Dumazet, netdev, Shlomo Pongratz
In-Reply-To: <5050668B.1010105@mellanox.com>

Hi,

>From the dump of CPU #1, it seems indeed not related at all to "modprobe -r".

Could it be that there is some IB stack sysfs write activity?
(regardless of the modprobe -r" you issued) ?  I see some candidates
for it.

delete_child() is a method of the IB stack (ipoib/ipoib_main.c)

Maybe in order to help debug the problem, you might try to add in
delete_child() method, print of the name of the attribute which is
being deleted ?

  (struct device_attribute has a a member "struct attribute attr",
which in turn has  "const char *name").


Regards,
Rami Rosen

^ permalink raw reply

* Re: GRO aggregation
From: Shlomo Pongartz @ 2012-09-12 14:41 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev@vger.kernel.org
In-Reply-To: <1347442394.13103.703.camel@edumazet-glaptop>

On 9/12/2012 12:33 PM, Eric Dumazet wrote:
> On Wed, 2012-09-12 at 12:23 +0300, Shlomo Pongartz wrote:
>> On 9/11/2012 10:35 PM, Eric Dumazet wrote:
>>> On Tue, 2012-09-11 at 19:24 +0000, Shlomo Pongratz wrote:
>>>
>>>> I see that in ixgbe the weight for the NAPI is 64 (netif_napi_add). So
>>>> if packets are arriving in high rate then an the CPU is fast enough to
>>>> collect the packets as they arrive, assuming packets continue to
>>>> arrives while the NAPI runs. Then it should have aggregate more. So we
>>>> will have less passes trough the stack.
>>>>
>>> As I said, _if_ your cpu was loaded by other stuff, then you would see
>>> biggest GRO packets.
>>>
>>> GRO is not : "We want to kill latency and have big packets just because
>>> its better"
>>>
>>> Its more like : If load is big enough, try to aggregate TCP frames in
>>> less skbs.
>>>
>>>
>>>
>>>
>> First I want to apologize for breaking the mailing thread. I wasn't at
>> work and used webmail.
>>
>> I agree with your but I think that something is still strange.
>> On the transmitter side all the offloading are enabled, e.g. TSO and GSO.
>> The tcpdump on the sender side shows size of 64240 which is 44 packets
>> of 1460 each.
>> Now since the offloading are enabled the HW should transmit 44 frames
>> back to back,
>> that is in a burst of 44 * 1500 bytes, which according to my calculation
>> should take 52.8 micro on 10G Ethernet.
>> Using ethtool I've set the rx-usecs to 1022 micro, which I think is the
>> maximal value for ixgbe.
>> Note that there is no way to set rx-frames on ixgbe.
>> Now since the ixgbe weight is 64 I expected that the NAPI will be able
>> to poll for more then 21 packets,
>> since 44 packets came in one burst.
>> However the results remains the same.
> TSO uses PAGE frags, so 64KB needs about 16 pages.
>
> tcp_sendmsg() could even use order-3 pages, so that only 2 pages would
> be needed to fill 64KB of data.
>
> GRO uses whatever fragment size provided by NIC, depending on MTU.
>
> One skb has a limit on number of frags.
>
> Handling a huge array of frags would be actually slower in some helper
> functions.
>
> Since you dont exactly describe why you ask all these questions, its
> hard to guess what problem you try to solve.
>
>
>
> .
>
Hi Eric

The TSO is just a mean to create a burst of frames on the wire so the 
NAPI will be able to pool as much as possible.
I'm looking on the aggregation done by GRO on behalf of IPoIB. With 
IPoIB I added a counter that counts how many
packets were aggregated before napi_complete is called (ether directly 
or by net_rx_action) and found that although
the NAPI consumes 64 packets on average before napi_complete is called, 
the tcpdump shows  that no more then 16-17
packets were aggregated. BTW when I increased the MTU  to 4K I did 
reached 64K aggregation which again is 16-17 packets.
So in order to see if 17 packets is the aggregation limit I  wanted to 
see how ixgbe is doing and found that it aggregates 21 packets.
So I wanted to know if there is another factor that governs the 
aggregation, one that I can tune.

Shlomo.

^ permalink raw reply

* Re: [PATCHv4] virtio-spec: virtio network device multiqueue support
From: Tom Herbert @ 2012-09-12 14:40 UTC (permalink / raw)
  To: Michael S. Tsirkin
  Cc: Rusty Russell, Jason Wang, kvm, virtualization, netdev, pbonzini,
	levinsasha928, rick.jones2
In-Reply-To: <20120912075737.GA30455@redhat.com>

On Wed, Sep 12, 2012 at 12:57 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
> On Wed, Sep 12, 2012 at 03:19:11PM +0930, Rusty Russell wrote:
>> Jason Wang <jasowang@redhat.com> writes:
>> > On 09/10/2012 02:33 PM, Michael S. Tsirkin wrote:
>> >> A final addition: what you suggest above would be
>> >> "TX follows RX", right?
>>
>> BTW, yes.  But it's a weird way to express what the nic is doing.
>
> It explains what the system is doing.
> TX is done by driver, RX by nic.
> We document both driver and device in the spec
> so I thought it's fine. any suggestions wellcome.
>
>> >> It is in anticipation of something like that, that I made
>> >> steering programming so generic.
>>
>> >> I think TX follows RX is more immediately useful for reasons above
>> >> but we can add both to spec and let drivers and devices
>> >> decide what they want to support.
>>
>> You mean "RX follows TX"?  ie. accelerated RFS.  I agree.
>
RX following TX is logic of flow director I believe.  {a}RFS has RX
follow CPU where application receive is done on the socket.  So in RFS
there is no requirement to have a 1-1 correspondence between TX and RX
queues, and in fact this allows different number of queues between TX
and RX.  We found this necessary when using priority HW queues, so
that there are more TX queues than RX.

>
> Yes that's what I meant. Thanks for the correction.
>
>> Perhaps Tom can explain how we avoid out-of-order receive for the
>> accelerated RFS case?  It's not clear to me, but we need to be able to
>> do that for virtio-net if it implements accelerated RFS.
>
AFAIK ooo RX is still possible with accelerated RFS.  We have an
algorithm that prevents this for RFS by deferring a migration to a new
queue as long as it's possible that a flow might have outstanding
packets on the old queue.  I suppose this could be implemented in the
device for the HW queues, but I don't think it would be easy to cover
all cases where packets were already in transit to the host or other
cases where host and device queues are out of sync.

> Basically this has tx vq per cpu and relies on scheduler not bouncing threads
> between cpus too aggressively. Appears to be what ixgbe does.
>
>> > AFAIK, ixgbe does "rx follows tx". The only differences between ixgbe
>> > and virtio-net is that ixgbe driver programs the flow director during
>> > packet transmission but we suggest to do it silently in the device for
>> > simplicity.
>>
>> Implying the receive queue by xmit will be slightly laggy.  Don't know
>> if that's a problem.
>>
>> Cheers,
>> Rusty.
>
> Doesn't seem to be a problem in Jason's testing so far.

^ permalink raw reply

* Re: [PATCHv4] virtio-spec: virtio network device multiqueue support
From: Tom Herbert @ 2012-09-12 14:38 UTC (permalink / raw)
  To: Rusty Russell
  Cc: kvm, Michael S. Tsirkin, netdev, rick.jones2, virtualization,
	levinsasha928, pbonzini
In-Reply-To: <87har3dc4o.fsf@rustcorp.com.au>


[-- Attachment #1.1: Type: text/plain, Size: 2086 bytes --]

On Tue, Sep 11, 2012 at 10:49 PM, Rusty Russell <rusty@rustcorp.com.au>wrote:

> Jason Wang <jasowang@redhat.com> writes:
> > On 09/10/2012 02:33 PM, Michael S. Tsirkin wrote:
> >> A final addition: what you suggest above would be
> >> "TX follows RX", right?
>
> BTW, yes.  But it's a weird way to express what the nic is doing.
>
> >> It is in anticipation of something like that, that I made
> >> steering programming so generic.
>
> >> I think TX follows RX is more immediately useful for reasons above
> >> but we can add both to spec and let drivers and devices
> >> decide what they want to support.
>
> You mean "RX follows TX"?  ie. accelerated RFS.  I agree.
>
> RX following TX is logic of flow director I believe.  {a}RFS has RX follow
CPU where application receive is done on the socket.  So in RFS there is no
requirement to have a 1-1 correspondence between TX and RX queues, and in
fact this allows different number of queues between TX and RX.  We found
this necessary when using priority HW queues, so that there are more TX
queues than RX.

Perhaps Tom can explain how we avoid out-of-order receive for the
> accelerated RFS case?  It's not clear to me, but we need to be able to
> do that for virtio-net if it implements accelerated RFS.
>
> AFAIK ooo RX is possible with accelerated RFS.  We have an algorithm that
prevents this for RFS case by deferring a migration to a new queue as long
as it's possible that a flow might have outstanding packets on the old
queue.  I suppose this could be implemented in the device for the HW
queues, but I don't think it would be easy to cover all cases where packets
were already in transit to the host or other cases where host and device
queues are out of sync.


> > AFAIK, ixgbe does "rx follows tx". The only differences between ixgbe
> > and virtio-net is that ixgbe driver programs the flow director during
> > packet transmission but we suggest to do it silently in the device for
> > simplicity.
>
> Implying the receive queue by xmit will be slightly laggy.  Don't know
> if that's a problem.
>
> Cheers,
> Rusty.
>

[-- Attachment #1.2: Type: text/html, Size: 2944 bytes --]

[-- Attachment #2: Type: text/plain, Size: 183 bytes --]

_______________________________________________
Virtualization mailing list
Virtualization@lists.linux-foundation.org
https://lists.linuxfoundation.org/mailman/listinfo/virtualization

^ permalink raw reply

* [PATCH v4 6/8] cgroup: Do not depend on a given order when populating the subsys array
From: Daniel Wagner @ 2012-09-12 14:12 UTC (permalink / raw)
  To: netdev, cgroups
  Cc: Daniel Wagner, Gao feng, Jamal Hadi Salim, John Fastabend,
	Li Zefan, Neil Horman, Tejun Heo
In-Reply-To: <1347459128-32236-1-git-send-email-wagi@monom.org>

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

The *_subsys_id will be used as index to access the subsys. Therefore
we need to care we populate the subsystem at the correct position by
using designated initialization.

With this change we are able to interleave builtin and modules in the subsys
array.

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: netdev@vger.kernel.org
Cc: cgroups@vger.kernel.org
---
 kernel/cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 769600c..343ab4e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -92,7 +92,7 @@ static DEFINE_MUTEX(cgroup_root_mutex);
  * registered after that. The mutable section of this array is protected by
  * cgroup_mutex.
  */
-#define SUBSYS(_x) &_x ## _subsys,
+#define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys,
 #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
 static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
 #include <linux/cgroup_subsys.h>
-- 
1.7.12.315.g682ce8b

^ permalink raw reply related

* [PATCH v4 7/8] cgroup: Assign subsystem IDs during compile time
From: Daniel Wagner @ 2012-09-12 14:12 UTC (permalink / raw)
  To: netdev, cgroups
  Cc: Daniel Wagner, David S. Miller, Paul E. McKenney, Andrew Morton,
	Eric Dumazet, Gao feng, Glauber Costa, Herbert Xu,
	Jamal Hadi Salim, John Fastabend, Kamezawa Hiroyuki, Li Zefan,
	Neil Horman, Tejun Heo
In-Reply-To: <1347459128-32236-1-git-send-email-wagi@monom.org>

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

WARNING: With this change it is impossible to load external built
controllers anymore.

In case where CONFIG_NETPRIO_CGROUP=m and CONFIG_NET_CLS_CGROUP=m is
set, corresponding subsys_id should also be a constant. Up to now,
net_prio_subsys_id and net_cls_subsys_id would be of the type int and
the value would be assigned during runtime.

By switching the macro definition IS_SUBSYS_ENABLED from IS_BUILTIN
to IS_ENABLED, all *_subsys_id will have constant value. That means we
need to remove all the code which assumes a value can be assigned to
net_prio_subsys_id and net_cls_subsys_id.

A close look is necessary on the RCU part which was introduces by
following patch:

commit f845172531fb7410c7fb7780b1a6e51ee6df7d52
Author:	Herbert Xu <herbert@gondor.apana.org.au>  Mon May 24 09:12:34 2010
Committer:	David S. Miller <davem@davemloft.net>  Mon May 24 09:12:34 2010

cls_cgroup: Store classid in struct sock

Tis code was added to init_cgroup_cls()

	/* We can't use rcu_assign_pointer because this is an int. */
	smp_wmb();
	net_cls_subsys_id = net_cls_subsys.subsys_id;

respectively to exit_cgroup_cls()

	net_cls_subsys_id = -1;
	synchronize_rcu();

and in module version of task_cls_classid()

	rcu_read_lock();
	id = rcu_dereference(net_cls_subsys_id);
	if (id >= 0)
		classid = container_of(task_subsys_state(p, id),
				       struct cgroup_cls_state, css)->classid;
	rcu_read_unlock();

Without an explicit explaination why the RCU part is needed. (The
rcu_deference was fixed by exchanging it to rcu_derefence_index_check()
in a later commit, but that is a minor detail.)

So here is my pondering why it was introduced and why it safe to
remove it now. Note that this code was copied over to net_prio the
reasoning holds for that subsystem too.

The idea behind the RCU use for net_cls_subsys_id is to make sure we
get a valid pointer back from task_subsys_state(). task_subsys_state()
is just blindly accessing the subsys array and returning the
pointer. Obviously, passing in -1 as id into task_subsys_state()
returns an invalid value (out of lower bound).

So this code makes sure that only after module is loaded and the
subsystem registered, the id is assigned.

Before unregistering the module all old readers must have left the
critical section. This is done by assigning -1 to the id and issuing a
synchronized_rcu(). Any new readers wont call task_subsys_state()
anymore and therefore it is safe to unregister the subsystem.

The new code relies on the same trick, but it looks at the subsys
pointer return by task_subsys_state() (remember the id is constant
and therefore we allways have a valid index into the subsys
array).

No precautions need to be taken during module loading
module. Eventually, all CPUs will get a valid pointer back from
task_subsys_state() because rebind_subsystem() which is called after
the module init() function will assigned subsys[net_cls_subsys_id] the
newly loaded module subsystem pointer.

When the subsystem is about to be removed, rebind_subsystem() will
called before the module exit() function. In this case,
rebind_subsys() will assign subsys[net_cls_subsys_id] a NULL pointer
and then it calls synchronize_rcu(). All old readers have left by then
the critical section. Any new reader wont access the subsystem
anymore.  At this point we are safe to unregister the subsystem. No
synchronize_rcu() call is needed.

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: netdev@vger.kernel.org
Cc: cgroups@vger.kernel.org
---
 include/linux/cgroup.h       |  2 +-
 include/net/cls_cgroup.h     | 12 ++++--------
 include/net/netprio_cgroup.h | 18 +++++-------------
 kernel/cgroup.c              | 22 +++-------------------
 net/core/netprio_cgroup.c    | 11 -----------
 net/core/sock.c              | 11 -----------
 net/sched/cls_cgroup.c       | 13 -------------
 7 files changed, 13 insertions(+), 76 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index a5ab565..018f819 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -46,7 +46,7 @@ extern const struct file_operations proc_cgroup_operations;
 
 /* Define the enumeration of all builtin cgroup subsystems */
 #define SUBSYS(_x) _x ## _subsys_id,
-#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
+#define IS_SUBSYS_ENABLED(option) IS_ENABLED(option)
 enum cgroup_subsys_id {
 #include <linux/cgroup_subsys.h>
 	__CGROUP_TEMPORARY_PLACEHOLDER
diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index 9bd5db9..b6a6eeb 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -42,22 +42,18 @@ static inline u32 task_cls_classid(struct task_struct *p)
 	return classid;
 }
 #elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
-
-extern int net_cls_subsys_id;
-
 static inline u32 task_cls_classid(struct task_struct *p)
 {
-	int id;
+	struct cgroup_subsys_state *css;
 	u32 classid = 0;
 
 	if (in_interrupt())
 		return 0;
 
 	rcu_read_lock();
-	id = rcu_dereference_index_check(net_cls_subsys_id,
-					 rcu_read_lock_held());
-	if (id >= 0)
-		classid = container_of(task_subsys_state(p, id),
+	css = task_subsys_state(p, net_cls_subsys_id);
+	if (css)
+		classid = container_of(css,
 				       struct cgroup_cls_state, css)->classid;
 	rcu_read_unlock();
 
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
index b202de8..2760f4f 100644
--- a/include/net/netprio_cgroup.h
+++ b/include/net/netprio_cgroup.h
@@ -30,10 +30,6 @@ struct cgroup_netprio_state {
 	u32 prioidx;
 };
 
-#ifndef CONFIG_NETPRIO_CGROUP
-extern int net_prio_subsys_id;
-#endif
-
 extern void sock_update_netprioidx(struct sock *sk, struct task_struct *task);
 
 #if IS_BUILTIN(CONFIG_NETPRIO_CGROUP)
@@ -55,18 +51,14 @@ static inline u32 task_netprioidx(struct task_struct *p)
 
 static inline u32 task_netprioidx(struct task_struct *p)
 {
-	struct cgroup_netprio_state *state;
-	int subsys_id;
+	struct cgroup_subsys_state *css;
 	u32 idx = 0;
 
 	rcu_read_lock();
-	subsys_id = rcu_dereference_index_check(net_prio_subsys_id,
-						rcu_read_lock_held());
-	if (subsys_id >= 0) {
-		state = container_of(task_subsys_state(p, subsys_id),
-				     struct cgroup_netprio_state, css);
-		idx = state->prioidx;
-	}
+	css = task_subsys_state(p, net_prio_subsys_id);
+	if (css)
+		idx = container_of(css,
+				   struct cgroup_netprio_state, css)->prioidx;
 	rcu_read_unlock();
 	return idx;
 }
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 343ab4e..4a364f1 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4458,24 +4458,8 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	/* init base cftset */
 	cgroup_init_cftsets(ss);
 
-	/*
-	 * need to register a subsys id before anything else - for example,
-	 * init_cgroup_css needs it.
-	 */
 	mutex_lock(&cgroup_mutex);
-	/* find the first empty slot in the array */
-	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-		if (subsys[i] == NULL)
-			break;
-	}
-	if (i == CGROUP_SUBSYS_COUNT) {
-		/* maximum number of subsystems already registered! */
-		mutex_unlock(&cgroup_mutex);
-		return -EBUSY;
-	}
-	/* assign ourselves the subsys_id */
-	ss->subsys_id = i;
-	subsys[i] = ss;
+	subsys[ss->subsys_id] = ss;
 
 	/*
 	 * no ss->create seems to need anything important in the ss struct, so
@@ -4484,7 +4468,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	css = ss->create(dummytop);
 	if (IS_ERR(css)) {
 		/* failure case - need to deassign the subsys[] slot. */
-		subsys[i] = NULL;
+		subsys[ss->subsys_id] = NULL;
 		mutex_unlock(&cgroup_mutex);
 		return PTR_ERR(css);
 	}
@@ -4500,7 +4484,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 		if (ret) {
 			dummytop->subsys[ss->subsys_id] = NULL;
 			ss->destroy(dummytop);
-			subsys[i] = NULL;
+			subsys[ss->subsys_id] = NULL;
 			mutex_unlock(&cgroup_mutex);
 			return ret;
 		}
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index c75e3f9..6bc460c 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -326,9 +326,7 @@ struct cgroup_subsys net_prio_subsys = {
 	.create		= cgrp_create,
 	.destroy	= cgrp_destroy,
 	.attach		= net_prio_attach,
-#ifdef CONFIG_NETPRIO_CGROUP
 	.subsys_id	= net_prio_subsys_id,
-#endif
 	.base_cftypes	= ss_files,
 	.module		= THIS_MODULE
 };
@@ -366,10 +364,6 @@ static int __init init_cgroup_netprio(void)
 	ret = cgroup_load_subsys(&net_prio_subsys);
 	if (ret)
 		goto out;
-#ifndef CONFIG_NETPRIO_CGROUP
-	smp_wmb();
-	net_prio_subsys_id = net_prio_subsys.subsys_id;
-#endif
 
 	register_netdevice_notifier(&netprio_device_notifier);
 
@@ -386,11 +380,6 @@ static void __exit exit_cgroup_netprio(void)
 
 	cgroup_unload_subsys(&net_prio_subsys);
 
-#ifndef CONFIG_NETPRIO_CGROUP
-	net_prio_subsys_id = -1;
-	synchronize_rcu();
-#endif
-
 	rtnl_lock();
 	for_each_netdev(&init_net, dev) {
 		old = rtnl_dereference(dev->priomap);
diff --git a/net/core/sock.c b/net/core/sock.c
index ca3eaee..47b4ac0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -326,17 +326,6 @@ int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(__sk_backlog_rcv);
 
-#if defined(CONFIG_CGROUPS)
-#if !defined(CONFIG_NET_CLS_CGROUP)
-int net_cls_subsys_id = -1;
-EXPORT_SYMBOL_GPL(net_cls_subsys_id);
-#endif
-#if !defined(CONFIG_NETPRIO_CGROUP)
-int net_prio_subsys_id = -1;
-EXPORT_SYMBOL_GPL(net_prio_subsys_id);
-#endif
-#endif
-
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
 	struct timeval tv;
diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c
index 7743ea8..67cf90d 100644
--- a/net/sched/cls_cgroup.c
+++ b/net/sched/cls_cgroup.c
@@ -77,9 +77,7 @@ struct cgroup_subsys net_cls_subsys = {
 	.name		= "net_cls",
 	.create		= cgrp_create,
 	.destroy	= cgrp_destroy,
-#ifdef CONFIG_NET_CLS_CGROUP
 	.subsys_id	= net_cls_subsys_id,
-#endif
 	.base_cftypes	= ss_files,
 	.module		= THIS_MODULE,
 };
@@ -283,12 +281,6 @@ static int __init init_cgroup_cls(void)
 	if (ret)
 		goto out;
 
-#ifndef CONFIG_NET_CLS_CGROUP
-	/* We can't use rcu_assign_pointer because this is an int. */
-	smp_wmb();
-	net_cls_subsys_id = net_cls_subsys.subsys_id;
-#endif
-
 	ret = register_tcf_proto_ops(&cls_cgroup_ops);
 	if (ret)
 		cgroup_unload_subsys(&net_cls_subsys);
@@ -301,11 +293,6 @@ static void __exit exit_cgroup_cls(void)
 {
 	unregister_tcf_proto_ops(&cls_cgroup_ops);
 
-#ifndef CONFIG_NET_CLS_CGROUP
-	net_cls_subsys_id = -1;
-	synchronize_rcu();
-#endif
-
 	cgroup_unload_subsys(&net_cls_subsys);
 }
 
-- 
1.7.12.315.g682ce8b

^ permalink raw reply related

* [PATCH v4 8/8] cgroup: Define CGROUP_SUBSYS_COUNT according the configuration
From: Daniel Wagner @ 2012-09-12 14:12 UTC (permalink / raw)
  To: netdev, cgroups
  Cc: Daniel Wagner, Gao feng, Jamal Hadi Salim, John Fastabend,
	Li Zefan, Neil Horman, Tejun Heo
In-Reply-To: <1347459128-32236-1-git-send-email-wagi@monom.org>

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

Since we know exactly how many subsystems exists at compile time we are
able to define CGROUP_SUBSYS_COUNT correctly. CGROUP_SUBSYS_COUNT will
be at max 12 (all controllers enabled). Depending on the architecture
we safe either 32 - 12 pointers (80 bytes) or 64 - 12 pointers (416
bytes) per cgroup.

With this change we can also remove the temporary placeholder to avoid
compilation errors.

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: netdev@vger.kernel.org
Cc: cgroups@vger.kernel.org
---
 include/linux/cgroup.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 018f819..df354ae 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -49,16 +49,10 @@ extern const struct file_operations proc_cgroup_operations;
 #define IS_SUBSYS_ENABLED(option) IS_ENABLED(option)
 enum cgroup_subsys_id {
 #include <linux/cgroup_subsys.h>
-	__CGROUP_TEMPORARY_PLACEHOLDER
+	CGROUP_SUBSYS_COUNT,
 };
 #undef IS_SUBSYS_ENABLED
 #undef SUBSYS
-/*
- * This define indicates the maximum number of subsystems that can be loaded
- * at once. We limit to this many since cgroupfs_root has subsys_bits to keep
- * track of all of them.
- */
-#define CGROUP_SUBSYS_COUNT (BITS_PER_BYTE*sizeof(unsigned long))
 
 /* Per-subsystem/per-cgroup state maintained by the system. */
 struct cgroup_subsys_state {
-- 
1.7.12.315.g682ce8b

^ permalink raw reply related

* [PATCH v4 3/8] cgroup: net_prio: Do not define task_netpioidx() when not selected
From: Daniel Wagner @ 2012-09-12 14:12 UTC (permalink / raw)
  To: netdev, cgroups
  Cc: Daniel Wagner, Gao feng, Jamal Hadi Salim, John Fastabend,
	Li Zefan, Neil Horman
In-Reply-To: <1347459128-32236-1-git-send-email-wagi@monom.org>

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

task_netprioidx() should not be defined in case the configuration is
CONFIG_NETPRIO_CGROUP=n. The reason is that in a following patch the
net_prio_subsys_id will only be defined if CONFIG_NETPRIO_CGROUP!=n.
When net_prio is not built at all any callee should only get an empty
task_netprioidx() without any references to net_prio_subsys_id.

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: netdev@vger.kernel.org
Cc: cgroups@vger.kernel.org
---
 include/net/netprio_cgroup.h | 12 +++++-------
 net/core/sock.c              |  2 ++
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
index 2719dec..b202de8 100644
--- a/include/net/netprio_cgroup.h
+++ b/include/net/netprio_cgroup.h
@@ -18,14 +18,13 @@
 #include <linux/rcupdate.h>
 
 
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
 struct netprio_map {
 	struct rcu_head rcu;
 	u32 priomap_len;
 	u32 priomap[];
 };
 
-#ifdef CONFIG_CGROUPS
-
 struct cgroup_netprio_state {
 	struct cgroup_subsys_state css;
 	u32 prioidx;
@@ -71,18 +70,17 @@ static inline u32 task_netprioidx(struct task_struct *p)
 	rcu_read_unlock();
 	return idx;
 }
+#endif
 
-#else
+#else /* !CONFIG_NETPRIO_CGROUP */
 
 static inline u32 task_netprioidx(struct task_struct *p)
 {
 	return 0;
 }
 
-#endif /* CONFIG_NETPRIO_CGROUP */
-
-#else
 #define sock_update_netprioidx(sk, task)
-#endif
+
+#endif /* CONFIG_NETPRIO_CGROUP */
 
 #endif  /* _NET_CLS_CGROUP_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 82cadc6..ca3eaee 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1237,6 +1237,7 @@ void sock_update_classid(struct sock *sk)
 EXPORT_SYMBOL(sock_update_classid);
 #endif
 
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
 void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
 {
 	if (in_interrupt())
@@ -1246,6 +1247,7 @@ void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(sock_update_netprioidx);
 #endif
+#endif
 
 /**
  *	sk_alloc - All socket objects are allocated here
-- 
1.7.12.315.g682ce8b

^ permalink raw reply related

* [PATCH v4 4/8] cgroup: Remove CGROUP_BUILTIN_SUBSYS_COUNT
From: Daniel Wagner @ 2012-09-12 14:12 UTC (permalink / raw)
  To: netdev, cgroups
  Cc: Daniel Wagner, Gao feng, Jamal Hadi Salim, John Fastabend,
	Li Zefan, Neil Horman
In-Reply-To: <1347459128-32236-1-git-send-email-wagi@monom.org>

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

CGROUP_BUILTIN_SUBSYS_COUNT is used as start index or stop index when
looping over the subsys array looking either at the builtin or the
module subsystems. Since all the builtin subsystems have an id which
is lower then CGROUP_BUILTIN_SUBSYS_COUNT we know that any module will
have an id larger than CGROUP_BUILTIN_SUBSYS_COUNT. In short the ids
are sorted.

We are about to change id assignment to happen only at compile time
later in this series. That means we can't rely on the above trick
since all ids will always be defined at compile time. Furthermore,
ordering the builtin subsystems and the module subsystems is not
really necessary.

So we need a different way to know which subsystem is a builtin or a
module one. We can use the subsys[]->module pointer for this. Any
place where we need to know if a subsys is module we just check for
the pointer. If it is NULL then the subsystem is a builtin one.

With this we are able to drop the CGROUP_BUILTIN_SUBSYS_COUNT
enum. Though we need to introduce a temporary placeholder so that we
don't get a compilation error when only CONFIG_CGROUP is selected and
no single controller. An empty enum definition is not valid. Later in
this series we are able to remove the placeholder again.

And with this change we get a fix for this:

kernel/cgroup.c: In function ‘cgroup_load_subsys’:
kernel/cgroup.c:4326:38: warning: array subscript is below array bounds [-Warray-bounds]

when CONFIG_CGROUP=y and no built in controller was enabled.

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: netdev@vger.kernel.org
Cc: cgroups@vger.kernel.org
---
 include/linux/cgroup.h |  2 +-
 kernel/cgroup.c        | 75 +++++++++++++++++++++++++++++++-------------------
 2 files changed, 48 insertions(+), 29 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 145901f..1916cdb 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -48,7 +48,7 @@ extern const struct file_operations proc_cgroup_operations;
 #define SUBSYS(_x) _x ## _subsys_id,
 enum cgroup_subsys_id {
 #include <linux/cgroup_subsys.h>
-	CGROUP_BUILTIN_SUBSYS_COUNT
+	__CGROUP_TEMPORARY_PLACEHOLDER
 };
 #undef SUBSYS
 /*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ced292d..2726d82 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -88,7 +88,7 @@ static DEFINE_MUTEX(cgroup_root_mutex);
 
 /*
  * Generate an array of cgroup subsystem pointers. At boot time, this is
- * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are
+ * populated with the built in subsystems, and modular subsystems are
  * registered after that. The mutable section of this array is protected by
  * cgroup_mutex.
  */
@@ -1321,11 +1321,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 	 * take duplicate reference counts on a subsystem that's already used,
 	 * but rebind_subsystems handles this case.
 	 */
-	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		unsigned long bit = 1UL << i;
 
 		if (!(bit & opts->subsys_mask))
 			continue;
+		if (!subsys[i]->module)
+			continue;
 		if (!try_module_get(subsys[i]->module)) {
 			module_pin_failed = true;
 			break;
@@ -1337,12 +1339,14 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 		 * raced with a module_delete call, and to the user this is
 		 * essentially a "subsystem doesn't exist" case.
 		 */
-		for (i--; i >= CGROUP_BUILTIN_SUBSYS_COUNT; i--) {
+		for (i--; i >= 0; i--) {
 			/* drop refcounts only on the ones we took */
 			unsigned long bit = 1UL << i;
 
 			if (!(bit & opts->subsys_mask))
 				continue;
+			if (!subsys[i]->module)
+				continue;
 			module_put(subsys[i]->module);
 		}
 		return -ENOENT;
@@ -1354,11 +1358,13 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 static void drop_parsed_module_refcounts(unsigned long subsys_mask)
 {
 	int i;
-	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		unsigned long bit = 1UL << i;
 
 		if (!(bit & subsys_mask))
 			continue;
+		if (!subsys[i]->module)
+			continue;
 		module_put(subsys[i]->module);
 	}
 }
@@ -1437,6 +1443,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->event_list);
 	spin_lock_init(&cgrp->event_list_lock);
 	simple_xattrs_init(&cgrp->xattrs);
+	memset(cgrp->subsys, 0, sizeof(cgrp->subsys));
 }
 
 static void init_cgroup_root(struct cgroupfs_root *root)
@@ -4442,8 +4449,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	 * since cgroup_init_subsys will have already taken care of it.
 	 */
 	if (ss->module == NULL) {
-		/* a few sanity checks */
-		BUG_ON(ss->subsys_id >= CGROUP_BUILTIN_SUBSYS_COUNT);
+		/* a sanity check */
 		BUG_ON(subsys[ss->subsys_id] != ss);
 		return 0;
 	}
@@ -4457,7 +4463,7 @@ int __init_or_module cgroup_load_subsys(struct cgroup_subsys *ss)
 	 */
 	mutex_lock(&cgroup_mutex);
 	/* find the first empty slot in the array */
-	for (i = CGROUP_BUILTIN_SUBSYS_COUNT; i < CGROUP_SUBSYS_COUNT; i++) {
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		if (subsys[i] == NULL)
 			break;
 	}
@@ -4560,7 +4566,6 @@ void cgroup_unload_subsys(struct cgroup_subsys *ss)
 
 	mutex_lock(&cgroup_mutex);
 	/* deassign the subsys_id */
-	BUG_ON(ss->subsys_id < CGROUP_BUILTIN_SUBSYS_COUNT);
 	subsys[ss->subsys_id] = NULL;
 
 	/* remove subsystem from rootnode's list of subsystems */
@@ -4623,10 +4628,13 @@ int __init cgroup_init_early(void)
 	for (i = 0; i < CSS_SET_TABLE_SIZE; i++)
 		INIT_HLIST_HEAD(&css_set_table[i]);
 
-	/* at bootup time, we don't worry about modular subsystems */
-	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
 
+		/* at bootup time, we don't worry about modular subsystems */
+		if (!ss || ss->module)
+			continue;
+
 		BUG_ON(!ss->name);
 		BUG_ON(strlen(ss->name) > MAX_CGROUP_TYPE_NAMELEN);
 		BUG_ON(!ss->create);
@@ -4659,9 +4667,12 @@ int __init cgroup_init(void)
 	if (err)
 		return err;
 
-	/* at bootup time, we don't worry about modular subsystems */
-	for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
+	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 		struct cgroup_subsys *ss = subsys[i];
+
+		/* at bootup time, we don't worry about modular subsystems */
+		if (!ss || ss->module)
+			continue;
 		if (!ss->early_init)
 			cgroup_init_subsys(ss);
 		if (ss->use_id)
@@ -4856,13 +4867,16 @@ void cgroup_fork_callbacks(struct task_struct *child)
 {
 	if (need_forkexit_callback) {
 		int i;
-		/*
-		 * forkexit callbacks are only supported for builtin
-		 * subsystems, and the builtin section of the subsys array is
-		 * immutable, so we don't need to lock the subsys array here.
-		 */
-		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
+
+			/*
+			 * forkexit callbacks are only supported for
+			 * builtin subsystems.
+			 */
+			if (!ss || ss->module)
+				continue;
+
 			if (ss->fork)
 				ss->fork(child);
 		}
@@ -4967,12 +4981,13 @@ void cgroup_exit(struct task_struct *tsk, int run_callbacks)
 	tsk->cgroups = &init_css_set;
 
 	if (run_callbacks && need_forkexit_callback) {
-		/*
-		 * modular subsystems can't use callbacks, so no need to lock
-		 * the subsys array
-		 */
-		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
+
+			/* modular subsystems can't use callbacks */
+			if (!ss || ss->module)
+				continue;
+
 			if (ss->exit) {
 				struct cgroup *old_cgrp =
 					rcu_dereference_raw(cg->subsys[i])->cgroup;
@@ -5158,13 +5173,17 @@ static int __init cgroup_disable(char *str)
 	while ((token = strsep(&str, ",")) != NULL) {
 		if (!*token)
 			continue;
-		/*
-		 * cgroup_disable, being at boot time, can't know about module
-		 * subsystems, so we don't worry about them.
-		 */
-		for (i = 0; i < CGROUP_BUILTIN_SUBSYS_COUNT; i++) {
+		for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
 			struct cgroup_subsys *ss = subsys[i];
 
+			/*
+			 * cgroup_disable, being at boot time, can't
+			 * know about module subsystems, so we don't
+			 * worry about them.
+			 */
+			if (!ss || ss->module)
+				continue;
+
 			if (!strcmp(token, ss->name)) {
 				ss->disabled = 1;
 				printk(KERN_INFO "Disabling %s control group"
-- 
1.7.12.315.g682ce8b

^ permalink raw reply related

* [PATCH v4 2/8] cgroup: net_cls: Do not define task_cls_classid() when not selected
From: Daniel Wagner @ 2012-09-12 14:12 UTC (permalink / raw)
  To: netdev, cgroups
  Cc: Daniel Wagner, Gao feng, Jamal Hadi Salim, John Fastabend,
	Li Zefan, Neil Horman
In-Reply-To: <1347459128-32236-1-git-send-email-wagi@monom.org>

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

task_cls_classid() should not be defined in case the configuration is
CONFIG_NET_CLS_CGROUP=n. The reason is that in a following patch the
net_cls_subsys_id will only be defined if CONFIG_NET_CLS_CGROUP!=n.
When net_cls is not built at all a callee should only get an empty
task_cls_classid() without any references to net_cls_subsys_id.

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: netdev@vger.kernel.org
Cc: cgroups@vger.kernel.org
---
 include/net/cls_cgroup.h | 11 ++++++-----
 net/core/sock.c          |  2 ++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h
index e88527a..9bd5db9 100644
--- a/include/net/cls_cgroup.h
+++ b/include/net/cls_cgroup.h
@@ -17,7 +17,7 @@
 #include <linux/hardirq.h>
 #include <linux/rcupdate.h>
 
-#ifdef CONFIG_CGROUPS
+#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
 struct cgroup_cls_state
 {
 	struct cgroup_subsys_state css;
@@ -26,7 +26,7 @@ struct cgroup_cls_state
 
 extern void sock_update_classid(struct sock *sk);
 
-#ifdef CONFIG_NET_CLS_CGROUP
+#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
 static inline u32 task_cls_classid(struct task_struct *p)
 {
 	int classid;
@@ -41,7 +41,8 @@ static inline u32 task_cls_classid(struct task_struct *p)
 
 	return classid;
 }
-#else
+#elif IS_MODULE(CONFIG_NET_CLS_CGROUP)
+
 extern int net_cls_subsys_id;
 
 static inline u32 task_cls_classid(struct task_struct *p)
@@ -63,7 +64,7 @@ static inline u32 task_cls_classid(struct task_struct *p)
 	return classid;
 }
 #endif
-#else
+#else /* !CGROUP_NET_CLS_CGROUP */
 static inline void sock_update_classid(struct sock *sk)
 {
 }
@@ -72,5 +73,5 @@ static inline u32 task_cls_classid(struct task_struct *p)
 {
 	return 0;
 }
-#endif
+#endif /* CGROUP_NET_CLS_CGROUP */
 #endif  /* _NET_CLS_CGROUP_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 8f67ced..82cadc6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1223,6 +1223,7 @@ static void sk_prot_free(struct proto *prot, struct sock *sk)
 }
 
 #ifdef CONFIG_CGROUPS
+#if IS_ENABLED(CONFIG_NET_CLS_CGROUP)
 void sock_update_classid(struct sock *sk)
 {
 	u32 classid;
@@ -1234,6 +1235,7 @@ void sock_update_classid(struct sock *sk)
 		sk->sk_classid = classid;
 }
 EXPORT_SYMBOL(sock_update_classid);
+#endif
 
 void sock_update_netprioidx(struct sock *sk, struct task_struct *task)
 {
-- 
1.7.12.315.g682ce8b

^ permalink raw reply related

* [PATCH v4 0/8] cgroup: Assign subsystem IDs during compile time
From: Daniel Wagner @ 2012-09-12 14:12 UTC (permalink / raw)
  To: netdev, cgroups
  Cc: Daniel Wagner, David S. Miller, Paul E. McKenney, Andrew Morton,
	Eric Dumazet, Gao feng, Glauber Costa, Herbert Xu,
	Jamal Hadi Salim, John Fastabend, Kamezawa Hiroyuki, Li Zefan,
	Neil Horman, Tejun Heo

From: Daniel Wagner <daniel.wagner@bmw-carit.de>

Hi,

I've removed the useless test in patch #4 and updated the commit message
on patch #7. 

While rewriting the commit message #7 I realized the pointer check was
completely wrong. Instead testing the return value of
task_subsys_state() I tested the pointer return by container_of. For
more details on this see the commit message. 

Because of this I added Herbert and Paul to the Cc list. Please have
close look at my rambling on the RCU part in patch #7. Thanks a lot!

This series is against 

     git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup.git for-3.7

cheers,
daniel


Previous cover letters:

v3:

In this version I tried to concentrate on the main topic of this
series, so I removed some of the things which were not really needed
and I have to admit the result looks much better. So I hope that will
simplify the review for you.

I reordered some of the patches and dropped the jump label
optimization for now. When this series is applied, then I can follow
up with those changes.

Overall, I tried to address all comments I got from v2. I didn't address
Tejun comment on 

  cgroup: Assign subsystem IDs during compile time

to split the net_cls and net_prio changes from that patch.  But I
tried to 'fix' this by beeing a bit more verbose.

The last patch is then the sweet one which gives some memory
back. 

v2:

Most notable changes are, that enabling/disabling of the jump labels
are not inside the cgroup_lock anymore (create/destroy cb). Instead
the corresponding functions will be called on module load or unload.

CGROUP_BUILTIN_SUBSYS_COUNT is also gone in this version.  This time I
trade space for speed. Some extra cycles are spend to identify the
modules in the for loops, e.g.

for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
	struct cgroup_subsys_state *ss = cgrp->subsys[i];

	/* at bootup time, we don't worry about modular subsystems */
	if (!ss || (ss && ss->module))
		continue;

	[...]
}

CGROUP_SUBSYS_COUNT is currently 12 if all controllers are built.  I
haven't found any other way to get rid of CGROUP_BUILTIN_SUBSYS_COUNT
without real dirty preprocessor tricks.

Finally, the two versions of task_cls_classid() and task_netprioidx()
are merged together.

v1:

I was able to 'fix' CGROUP_BUILTIN_SUBSYS_COUNT defition. With this
version there is no unused subsys_id. 

The number of builtin subsystem are counted with gcc's predefined
__COUNTER__ macro. This is a bit fragile, because __COUNTER__
is only reset to 0 per compile unit. There is a workaround for this.
When starting to enumate we need to store the current value of
__COUNTER__ and then subtract that from all enums we define. 

Not sure if that is okay or not.

v0:

The patch #1 and #2 are there to be able to introduce (#3, #4) the 
jump labels in task_cls_classid() and task_netprioidx(). The jump
labels are needed to know when it is safe to access the controller. 
For example not safe means the module is not yet loaded.

All those patches are just preparation for the center piece (#5) 
of these series. This one will remove the dynamic subsystem ID
generation and falls back to compile time generated IDs. 

This is the first result from the discussion around on the
"cgroup cls & netprio 'cleanups'" patches.

This patches are against net-next

v4: - removed unnecessary testing in patch #4
    - updated commit message in patch #7
    - fixed wrong pointer check in patch #7
v3: - dropping unrelated patches such as the jump label patch
    - reordered the patches
    - splitted "cgroup: Assign subsystem IDs during compile time" patch a bit
    - fixed the ordering dependency when assigning the subsystems
    - removed synchronize_rcu() calls
    - more verbose commit messages
v2: - do not use dirty precompiler tricks:
      use ss->module to identify modules in the loops.
    - enable/disable jump labels in module load/unload functions
    - merge builtin/module versions of task_cls_classid() and task_netprioidx
v1: - only use jump labels when built as module (#3, #4)
    - get rid of the additional 'pointer' (#5)
v0: - initial version

Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Gao feng <gaofeng@cn.fujitsu.com>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: John Fastabend <john.r.fastabend@intel.com>
Cc: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: netdev@vger.kernel.org
Cc: cgroups@vger.kernel.org

Daniel Wagner (8):
  cgroup: net_cls: Move sock_update_classid() declaration to
    cls_cgroup.h
  cgroup: net_cls: Do not define task_cls_classid() when not selected
  cgroup: net_prio: Do not define task_netpioidx() when not selected
  cgroup: Remove CGROUP_BUILTIN_SUBSYS_COUNT
  cgroup: Wrap subsystem selection macro
  cgroup: Do not depend on a given order when populating the subsys
    array
  cgroup: Assign subsystem IDs during compile time
  cgroup: Define CGROUP_SUBSYS_COUNT according the configuration

 include/linux/cgroup.h        | 12 +++---
 include/linux/cgroup_subsys.h | 24 +++++------
 include/net/cls_cgroup.h      | 27 ++++++------
 include/net/netprio_cgroup.h  | 30 +++++--------
 include/net/sock.h            |  8 ----
 kernel/cgroup.c               | 98 ++++++++++++++++++++++---------------------
 net/core/netprio_cgroup.c     | 11 -----
 net/core/sock.c               | 15 ++-----
 net/sched/cls_cgroup.c        | 13 ------
 9 files changed, 97 insertions(+), 141 deletions(-)

-- 
1.7.12.315.g682ce8b

^ permalink raw reply

* [PATCH v4 5/8] cgroup: Wrap subsystem selection macro
From: Daniel Wagner @ 2012-09-12 14:12 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA, cgroups-u79uwXL29TY76Z2rM5mHXA
  Cc: Daniel Wagner, Gao feng, Jamal Hadi Salim, John Fastabend,
	Li Zefan, Neil Horman, Tejun Heo
In-Reply-To: <1347459128-32236-1-git-send-email-wagi-kQCPcA+X3s7YtjvyW6yDsg@public.gmane.org>

From: Daniel Wagner <daniel.wagner-98C5kh4wR6ohFhg+JK9F0w@public.gmane.org>

Before we are able to define all subsystem ids at compile time we need
a more fine grained control what gets defined when we include
cgroup_subsys.h. For example we define the enums for the subsystems or
to declare for struct cgroup_subsys (builtin subsystem) by including
cgroup_subsys.h and defining SUBSYS accordingly.

Currently, the decision if a subsys is used is defined inside the
header by testing if CONFIG_*=y is true. By moving this test outside
of cgroup_subsys.h we are able to control it on the include level.

This is done by introducing IS_SUBSYS_ENABLED which then is defined
according the task, e.g. is CONFIG_*=y or CONFIG_*=m.

Signed-off-by: Daniel Wagner <daniel.wagner-98C5kh4wR6ohFhg+JK9F0w@public.gmane.org>
Cc: Gao feng <gaofeng-BthXqXjhjHXQFUHtdCDX3A@public.gmane.org>
Cc: Jamal Hadi Salim <jhs-jkUAjuhPggJWk0Htik3J/w@public.gmane.org>
Cc: John Fastabend <john.r.fastabend-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Cc: Li Zefan <lizefan-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
Cc: Neil Horman <nhorman-2XuSBdqkA4R54TAoqtyWWQ@public.gmane.org>
Cc: Tejun Heo <tj-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>
Cc: netdev-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
Cc: cgroups-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
---
 include/linux/cgroup.h        |  4 ++++
 include/linux/cgroup_subsys.h | 24 ++++++++++++------------
 kernel/cgroup.c               |  1 +
 3 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1916cdb..a5ab565 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -46,10 +46,12 @@ extern const struct file_operations proc_cgroup_operations;
 
 /* Define the enumeration of all builtin cgroup subsystems */
 #define SUBSYS(_x) _x ## _subsys_id,
+#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
 enum cgroup_subsys_id {
 #include <linux/cgroup_subsys.h>
 	__CGROUP_TEMPORARY_PLACEHOLDER
 };
+#undef IS_SUBSYS_ENABLED
 #undef SUBSYS
 /*
  * This define indicates the maximum number of subsystems that can be loaded
@@ -528,7 +530,9 @@ struct cgroup_subsys {
 };
 
 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
+#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
 #include <linux/cgroup_subsys.h>
+#undef IS_SUBSYS_ENABLED
 #undef SUBSYS
 
 static inline struct cgroup_subsys_state *cgroup_subsys_state(
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index dfae957..f204a7a 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -7,73 +7,73 @@
 
 /* */
 
-#ifdef CONFIG_CPUSETS
+#if IS_SUBSYS_ENABLED(CONFIG_CPUSETS)
 SUBSYS(cpuset)
 #endif
 
 /* */
 
-#ifdef CONFIG_CGROUP_DEBUG
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEBUG)
 SUBSYS(debug)
 #endif
 
 /* */
 
-#ifdef CONFIG_CGROUP_SCHED
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_SCHED)
 SUBSYS(cpu_cgroup)
 #endif
 
 /* */
 
-#ifdef CONFIG_CGROUP_CPUACCT
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_CPUACCT)
 SUBSYS(cpuacct)
 #endif
 
 /* */
 
-#ifdef CONFIG_MEMCG
+#if IS_SUBSYS_ENABLED(CONFIG_MEMCG)
 SUBSYS(mem_cgroup)
 #endif
 
 /* */
 
-#ifdef CONFIG_CGROUP_DEVICE
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_DEVICE)
 SUBSYS(devices)
 #endif
 
 /* */
 
-#ifdef CONFIG_CGROUP_FREEZER
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FREEZER)
 SUBSYS(freezer)
 #endif
 
 /* */
 
-#ifdef CONFIG_NET_CLS_CGROUP
+#if IS_SUBSYS_ENABLED(CONFIG_NET_CLS_CGROUP)
 SUBSYS(net_cls)
 #endif
 
 /* */
 
-#ifdef CONFIG_BLK_CGROUP
+#if IS_SUBSYS_ENABLED(CONFIG_BLK_CGROUP)
 SUBSYS(blkio)
 #endif
 
 /* */
 
-#ifdef CONFIG_CGROUP_PERF
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_PERF)
 SUBSYS(perf)
 #endif
 
 /* */
 
-#ifdef CONFIG_NETPRIO_CGROUP
+#if IS_SUBSYS_ENABLED(CONFIG_NETPRIO_CGROUP)
 SUBSYS(net_prio)
 #endif
 
 /* */
 
-#ifdef CONFIG_CGROUP_HUGETLB
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
 SUBSYS(hugetlb)
 #endif
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 2726d82..769600c 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -93,6 +93,7 @@ static DEFINE_MUTEX(cgroup_root_mutex);
  * cgroup_mutex.
  */
 #define SUBSYS(_x) &_x ## _subsys,
+#define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option)
 static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = {
 #include <linux/cgroup_subsys.h>
 };
-- 
1.7.12.315.g682ce8b

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox