Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
* [PATCH v3 29/52] IB/qib: Add qib_qsfp.c
From: Ralph Campbell @ 2010-05-07  0:01 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_qsfp.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_qsfp.c |  564 ++++++++++++++++++++++++++++++++++
 1 files changed, 564 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_qsfp.c

diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
new file mode 100644
index 0000000..35b3604
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+#include "qib_qsfp.h"
+
+/*
+ * QSFP support for ib_qib driver, using "Two Wire Serial Interface" driver
+ * in qib_twsi.c
+ */
+#define QSFP_MAX_RETRY 4
+
+static int qsfp_read(struct qib_pportdata *ppd, int addr, void *bp, int len)
+{
+	struct qib_devdata *dd = ppd->dd;
+	u32 out, mask;
+	int ret, cnt, pass = 0;
+	int stuck = 0;
+	u8 *buff = bp;
+
+	ret = mutex_lock_interruptible(&dd->eep_lock);
+	if (ret)
+		goto no_unlock;
+
+	if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) {
+		ret = -ENXIO;
+		goto bail;
+	}
+
+	/*
+	 * We presume, if we are called at all, that this board has
+	 * QSFP. This is on the same i2c chain as the legacy parts,
+	 * but only responds if the module is selected via GPIO pins.
+	 * Further, there are very long setup and hold requirements
+	 * on MODSEL.
+	 */
+	mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+	out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+	if (ppd->hw_pidx) {
+		mask <<= QSFP_GPIO_PORT2_SHIFT;
+		out <<= QSFP_GPIO_PORT2_SHIFT;
+	}
+
+	dd->f_gpio_mod(dd, out, mask, mask);
+
+	/*
+	 * Module could take up to 2 Msec to respond to MOD_SEL, and there
+	 * is no way to tell if it is ready, so we must wait.
+	 */
+	msleep(2);
+
+	/* Make sure TWSI bus is in sane state. */
+	ret = qib_twsi_reset(dd);
+	if (ret) {
+		qib_dev_porterr(dd, ppd->port,
+				"QSFP interface Reset for read failed\n");
+		ret = -EIO;
+		stuck = 1;
+		goto deselect;
+	}
+
+	/* All QSFP modules are at A0 */
+
+	cnt = 0;
+	while (cnt < len) {
+		unsigned in_page;
+		int wlen = len - cnt;
+		in_page = addr % QSFP_PAGESIZE;
+		if ((in_page + wlen) > QSFP_PAGESIZE)
+			wlen = QSFP_PAGESIZE - in_page;
+		ret = qib_twsi_blk_rd(dd, QSFP_DEV, addr, buff + cnt, wlen);
+		/* Some QSFP's fail first try. Retry as experiment */
+		if (ret && cnt == 0 && ++pass < QSFP_MAX_RETRY)
+			continue;
+		if (ret) {
+			/* qib_twsi_blk_rd() 1 for error, else 0 */
+			ret = -EIO;
+			goto deselect;
+		}
+		addr += wlen;
+		cnt += wlen;
+	}
+	ret = cnt;
+
+deselect:
+	/*
+	 * Module could take up to 10 uSec after transfer before
+	 * ready to respond to MOD_SEL negation, and there is no way
+	 * to tell if it is ready, so we must wait.
+	 */
+	udelay(10);
+	/* set QSFP MODSEL, RST. LP all high */
+	dd->f_gpio_mod(dd, mask, mask, mask);
+
+	/*
+	 * Module could take up to 2 Msec to respond to MOD_SEL
+	 * going away, and there is no way to tell if it is ready.
+	 * so we must wait.
+	 */
+	if (stuck)
+		qib_dev_err(dd, "QSFP interface bus stuck non-idle\n");
+
+	if (pass >= QSFP_MAX_RETRY && ret)
+		qib_dev_porterr(dd, ppd->port, "QSFP failed even retrying\n");
+	else if (pass)
+		qib_dev_porterr(dd, ppd->port, "QSFP retries: %d\n", pass);
+
+	msleep(2);
+
+bail:
+	mutex_unlock(&dd->eep_lock);
+
+no_unlock:
+	return ret;
+}
+
+/*
+ * qsfp_write
+ * We do not ordinarily write the QSFP, but this is needed to select
+ * the page on non-flat QSFPs, and possibly later unusual cases
+ */
+static int qib_qsfp_write(struct qib_pportdata *ppd, int addr, void *bp,
+			  int len)
+{
+	struct qib_devdata *dd = ppd->dd;
+	u32 out, mask;
+	int ret, cnt;
+	u8 *buff = bp;
+
+	ret = mutex_lock_interruptible(&dd->eep_lock);
+	if (ret)
+		goto no_unlock;
+
+	if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) {
+		ret = -ENXIO;
+		goto bail;
+	}
+
+	/*
+	 * We presume, if we are called at all, that this board has
+	 * QSFP. This is on the same i2c chain as the legacy parts,
+	 * but only responds if the module is selected via GPIO pins.
+	 * Further, there are very long setup and hold requirements
+	 * on MODSEL.
+	 */
+	mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+	out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+	if (ppd->hw_pidx) {
+		mask <<= QSFP_GPIO_PORT2_SHIFT;
+		out <<= QSFP_GPIO_PORT2_SHIFT;
+	}
+	dd->f_gpio_mod(dd, out, mask, mask);
+
+	/*
+	 * Module could take up to 2 Msec to respond to MOD_SEL,
+	 * and there is no way to tell if it is ready, so we must wait.
+	 */
+	msleep(2);
+
+	/* Make sure TWSI bus is in sane state. */
+	ret = qib_twsi_reset(dd);
+	if (ret) {
+		qib_dev_porterr(dd, ppd->port,
+				"QSFP interface Reset for write failed\n");
+		ret = -EIO;
+		goto deselect;
+	}
+
+	/* All QSFP modules are at A0 */
+
+	cnt = 0;
+	while (cnt < len) {
+		unsigned in_page;
+		int wlen = len - cnt;
+		in_page = addr % QSFP_PAGESIZE;
+		if ((in_page + wlen) > QSFP_PAGESIZE)
+			wlen = QSFP_PAGESIZE - in_page;
+		ret = qib_twsi_blk_wr(dd, QSFP_DEV, addr, buff + cnt, wlen);
+		if (ret) {
+			/* qib_twsi_blk_wr() 1 for error, else 0 */
+			ret = -EIO;
+			goto deselect;
+		}
+		addr += wlen;
+		cnt += wlen;
+	}
+	ret = cnt;
+
+deselect:
+	/*
+	 * Module could take up to 10 uSec after transfer before
+	 * ready to respond to MOD_SEL negation, and there is no way
+	 * to tell if it is ready, so we must wait.
+	 */
+	udelay(10);
+	/* set QSFP MODSEL, RST, LP high */
+	dd->f_gpio_mod(dd, mask, mask, mask);
+	/*
+	 * Module could take up to 2 Msec to respond to MOD_SEL
+	 * going away, and there is no way to tell if it is ready.
+	 * so we must wait.
+	 */
+	msleep(2);
+
+bail:
+	mutex_unlock(&dd->eep_lock);
+
+no_unlock:
+	return ret;
+}
+
+/*
+ * For validation, we want to check the checksums, even of the
+ * fields we do not otherwise use. This function reads the bytes from
+ * <first> to <next-1> and returns the 8lsbs of the sum, or <0 for errors
+ */
+static int qsfp_cks(struct qib_pportdata *ppd, int first, int next)
+{
+	int ret;
+	u16 cks;
+	u8 bval;
+
+	cks = 0;
+	while (first < next) {
+		ret = qsfp_read(ppd, first, &bval, 1);
+		if (ret < 0)
+			goto bail;
+		cks += bval;
+		++first;
+	}
+	ret = cks & 0xFF;
+bail:
+	return ret;
+
+}
+
+int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
+{
+	int ret;
+	int idx;
+	u16 cks;
+	u32 mask;
+	u8 peek[4];
+
+	/* ensure sane contents on invalid reads, for cable swaps */
+	memset(cp, 0, sizeof(*cp));
+
+	mask = QSFP_GPIO_MOD_PRS_N;
+	if (ppd->hw_pidx)
+		mask <<= QSFP_GPIO_PORT2_SHIFT;
+
+	ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
+	if (ret & mask) {
+		ret = -ENODEV;
+		goto bail;
+	}
+
+	ret = qsfp_read(ppd, 0, peek, 3);
+	if (ret < 0)
+		goto bail;
+	if ((peek[0] & 0xFE) != 0x0C)
+		qib_dev_porterr(ppd->dd, ppd->port,
+				"QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]);
+
+	if ((peek[2] & 2) == 0) {
+		/*
+		 * If cable is paged, rather than "flat memory", we need to
+		 * set the page to zero, Even if it already appears to be zero.
+		 */
+		u8 poke = 0;
+		ret = qib_qsfp_write(ppd, 127, &poke, 1);
+		udelay(50);
+		if (ret != 1) {
+			qib_dev_porterr(ppd->dd, ppd->port,
+					"Failed QSFP Page set\n");
+			goto bail;
+		}
+	}
+
+	ret = qsfp_read(ppd, QSFP_MOD_ID_OFFS, &cp->id, 1);
+	if (ret < 0)
+		goto bail;
+	if ((cp->id & 0xFE) != 0x0C)
+		qib_dev_porterr(ppd->dd, ppd->port,
+				"QSFP ID byte is 0x%02X, S/B 0x0C/D\n", cp->id);
+	cks = cp->id;
+
+	ret = qsfp_read(ppd, QSFP_MOD_PWR_OFFS, &cp->pwr, 1);
+	if (ret < 0)
+		goto bail;
+	cks += cp->pwr;
+
+	ret = qsfp_cks(ppd, QSFP_MOD_PWR_OFFS + 1, QSFP_MOD_LEN_OFFS);
+	if (ret < 0)
+		goto bail;
+	cks += ret;
+
+	ret = qsfp_read(ppd, QSFP_MOD_LEN_OFFS, &cp->len, 1);
+	if (ret < 0)
+		goto bail;
+	cks += cp->len;
+
+	ret = qsfp_read(ppd, QSFP_MOD_TECH_OFFS, &cp->tech, 1);
+	if (ret < 0)
+		goto bail;
+	cks += cp->tech;
+
+	ret = qsfp_read(ppd, QSFP_VEND_OFFS, &cp->vendor, QSFP_VEND_LEN);
+	if (ret < 0)
+		goto bail;
+	for (idx = 0; idx < QSFP_VEND_LEN; ++idx)
+		cks += cp->vendor[idx];
+
+	ret = qsfp_read(ppd, QSFP_IBXCV_OFFS, &cp->xt_xcv, 1);
+	if (ret < 0)
+		goto bail;
+	cks += cp->xt_xcv;
+
+	ret = qsfp_read(ppd, QSFP_VOUI_OFFS, &cp->oui, QSFP_VOUI_LEN);
+	if (ret < 0)
+		goto bail;
+	for (idx = 0; idx < QSFP_VOUI_LEN; ++idx)
+		cks += cp->oui[idx];
+
+	ret = qsfp_read(ppd, QSFP_PN_OFFS, &cp->partnum, QSFP_PN_LEN);
+	if (ret < 0)
+		goto bail;
+	for (idx = 0; idx < QSFP_PN_LEN; ++idx)
+		cks += cp->partnum[idx];
+
+	ret = qsfp_read(ppd, QSFP_REV_OFFS, &cp->rev, QSFP_REV_LEN);
+	if (ret < 0)
+		goto bail;
+	for (idx = 0; idx < QSFP_REV_LEN; ++idx)
+		cks += cp->rev[idx];
+
+	ret = qsfp_read(ppd, QSFP_ATTEN_OFFS, &cp->atten, QSFP_ATTEN_LEN);
+	if (ret < 0)
+		goto bail;
+	for (idx = 0; idx < QSFP_ATTEN_LEN; ++idx)
+		cks += cp->atten[idx];
+
+	ret = qsfp_cks(ppd, QSFP_ATTEN_OFFS + QSFP_ATTEN_LEN, QSFP_CC_OFFS);
+	if (ret < 0)
+		goto bail;
+	cks += ret;
+
+	cks &= 0xFF;
+	ret = qsfp_read(ppd, QSFP_CC_OFFS, &cp->cks1, 1);
+	if (ret < 0)
+		goto bail;
+	if (cks != cp->cks1)
+		qib_dev_porterr(ppd->dd, ppd->port,
+				"QSFP cks1 is %02X, computed %02X\n", cp->cks1,
+				cks);
+
+	/* Second checksum covers 192 to (serial, date, lot) */
+	ret = qsfp_cks(ppd, QSFP_CC_OFFS + 1, QSFP_SN_OFFS);
+	if (ret < 0)
+		goto bail;
+	cks = ret;
+
+	ret = qsfp_read(ppd, QSFP_SN_OFFS, &cp->serial, QSFP_SN_LEN);
+	if (ret < 0)
+		goto bail;
+	for (idx = 0; idx < QSFP_SN_LEN; ++idx)
+		cks += cp->serial[idx];
+
+	ret = qsfp_read(ppd, QSFP_DATE_OFFS, &cp->date, QSFP_DATE_LEN);
+	if (ret < 0)
+		goto bail;
+	for (idx = 0; idx < QSFP_DATE_LEN; ++idx)
+		cks += cp->date[idx];
+
+	ret = qsfp_read(ppd, QSFP_LOT_OFFS, &cp->lot, QSFP_LOT_LEN);
+	if (ret < 0)
+		goto bail;
+	for (idx = 0; idx < QSFP_LOT_LEN; ++idx)
+		cks += cp->lot[idx];
+
+	ret = qsfp_cks(ppd, QSFP_LOT_OFFS + QSFP_LOT_LEN, QSFP_CC_EXT_OFFS);
+	if (ret < 0)
+		goto bail;
+	cks += ret;
+
+	ret = qsfp_read(ppd, QSFP_CC_EXT_OFFS, &cp->cks2, 1);
+	if (ret < 0)
+		goto bail;
+	cks &= 0xFF;
+	if (cks != cp->cks2)
+		qib_dev_porterr(ppd->dd, ppd->port,
+				"QSFP cks2 is %02X, computed %02X\n", cp->cks2,
+				cks);
+	return 0;
+
+bail:
+	cp->id = 0;
+	return ret;
+}
+
+const char * const qib_qsfp_devtech[16] = {
+	"850nm VCSEL", "1310nm VCSEL", "1550nm VCSEL", "1310nm FP",
+	"1310nm DFB", "1550nm DFB", "1310nm EML", "1550nm EML",
+	"Cu Misc", "1490nm DFB", "Cu NoEq", "Cu Eq",
+	"Undef", "Cu Active BothEq", "Cu FarEq", "Cu NearEq"
+};
+
+#define QSFP_DUMP_CHUNK 16 /* Holds longest string */
+#define QSFP_DEFAULT_HDR_CNT 224
+
+static const char *pwr_codes = "1.5W2.0W2.5W3.5W";
+
+/*
+ * Initialize structures that control access to QSFP. Called once per port
+ * on cards that support QSFP.
+ */
+void qib_qsfp_init(struct qib_qsfp_data *qd,
+		   void (*fevent)(struct work_struct *))
+{
+	u32 mask, highs;
+	int pins;
+
+	struct qib_devdata *dd = qd->ppd->dd;
+
+	/* Initialize work struct for later QSFP events */
+	INIT_WORK(&qd->work, fevent);
+
+	/*
+	 * Later, we may want more validation. For now, just set up pins and
+	 * blip reset. If module is present, call qib_refresh_qsfp_cache(),
+	 * to do further init.
+	 */
+	mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+	highs = mask - QSFP_GPIO_MOD_RST_N;
+	if (qd->ppd->hw_pidx) {
+		mask <<= QSFP_GPIO_PORT2_SHIFT;
+		highs <<= QSFP_GPIO_PORT2_SHIFT;
+	}
+	dd->f_gpio_mod(dd, highs, mask, mask);
+	udelay(20); /* Generous RST dwell */
+
+	dd->f_gpio_mod(dd, mask, mask, mask);
+	/* Spec says module can take up to two seconds! */
+	mask = QSFP_GPIO_MOD_PRS_N;
+	if (qd->ppd->hw_pidx)
+		mask <<= QSFP_GPIO_PORT2_SHIFT;
+
+	/* Do not try to wait here. Better to let event handle it */
+	pins = dd->f_gpio_mod(dd, 0, 0, 0);
+	if (pins & mask)
+		goto bail;
+	/* We see a module, but it may be unwise to look yet. Just schedule */
+	qd->t_insert = get_jiffies_64();
+	schedule_work(&qd->work);
+bail:
+	return;
+}
+
+void qib_qsfp_deinit(struct qib_qsfp_data *qd)
+{
+	/*
+	 * There is nothing to do here for now.  our
+	 * work is scheduled with schedule_work(), and
+	 * flush_scheduled_work() from remove_one will
+	 * block until all work ssetup with schedule_work()
+	 * completes.
+	 */
+}
+
+int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
+{
+	struct qib_qsfp_cache cd;
+	u8 bin_buff[QSFP_DUMP_CHUNK];
+	char lenstr[6];
+	int sofar, ret;
+	int bidx = 0;
+
+	sofar = 0;
+	ret = qib_refresh_qsfp_cache(ppd, &cd);
+	if (ret < 0)
+		goto bail;
+
+	lenstr[0] = ' ';
+	lenstr[1] = '\0';
+	if (QSFP_IS_CU(cd.tech))
+		sprintf(lenstr, "%dM ", cd.len);
+
+	sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", pwr_codes +
+			   (QSFP_PWR(cd.pwr) * 4));
+
+	sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n", lenstr,
+			   qib_qsfp_devtech[cd.tech >> 4]);
+
+	sofar += scnprintf(buf + sofar, len - sofar, "Vendor:%.*s\n",
+			   QSFP_VEND_LEN, cd.vendor);
+
+	sofar += scnprintf(buf + sofar, len - sofar, "OUI:%06X\n",
+			   QSFP_OUI(cd.oui));
+
+	sofar += scnprintf(buf + sofar, len - sofar, "Part#:%.*s\n",
+			   QSFP_PN_LEN, cd.partnum);
+	sofar += scnprintf(buf + sofar, len - sofar, "Rev:%.*s\n",
+			   QSFP_REV_LEN, cd.rev);
+	if (QSFP_IS_CU(cd.tech))
+		sofar += scnprintf(buf + sofar, len - sofar, "Atten:%d, %d\n",
+				   QSFP_ATTEN_SDR(cd.atten),
+				   QSFP_ATTEN_DDR(cd.atten));
+	sofar += scnprintf(buf + sofar, len - sofar, "Serial:%.*s\n",
+			   QSFP_SN_LEN, cd.serial);
+	sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n",
+			   QSFP_DATE_LEN, cd.date);
+	sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n",
+			   QSFP_LOT_LEN, cd.date);
+
+	while (bidx < QSFP_DEFAULT_HDR_CNT) {
+		int iidx;
+		ret = qsfp_read(ppd, bidx, bin_buff, QSFP_DUMP_CHUNK);
+		if (ret < 0)
+			goto bail;
+		for (iidx = 0; iidx < ret; ++iidx) {
+			sofar += scnprintf(buf + sofar, len-sofar, " %02X",
+				bin_buff[iidx]);
+		}
+		sofar += scnprintf(buf + sofar, len - sofar, "\n");
+		bidx += QSFP_DUMP_CHUNK;
+	}
+	ret = sofar;
+bail:
+	return ret;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 30/52] IB/qib: Add qib_qsfp.h
From: Ralph Campbell @ 2010-05-07  0:01 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_qsfp.h file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_qsfp.h |  184 ++++++++++++++++++++++++++++++++++
 1 files changed, 184 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_qsfp.h

diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
new file mode 100644
index 0000000..19b527b
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/* QSFP support common definitions, for ib_qib driver */
+
+#define QSFP_DEV 0xA0
+#define QSFP_PWR_LAG_MSEC 2000
+
+/*
+ * Below are masks for various QSFP signals, for Port 1.
+ * Port2 equivalents are shifted by QSFP_GPIO_PORT2_SHIFT.
+ * _N means asserted low
+ */
+#define QSFP_GPIO_MOD_SEL_N (4)
+#define QSFP_GPIO_MOD_PRS_N (8)
+#define QSFP_GPIO_INT_N (0x10)
+#define QSFP_GPIO_MOD_RST_N (0x20)
+#define QSFP_GPIO_LP_MODE (0x40)
+#define QSFP_GPIO_PORT2_SHIFT 5
+
+#define QSFP_PAGESIZE 128
+/* Defined fields that QLogic requires of qualified cables */
+/* Byte 0 is Identifier, not checked */
+/* Byte 1 is reserved "status MSB" */
+/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */
+/*
+ * Rest of first 128 not used, although 127 is reserved for page select
+ * if module is not "Flat memory".
+ */
+/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
+#define QSFP_MOD_ID_OFFS 128
+/*
+ * Byte 129 is "Extended Identifier". We only care about D7,D6: Power class
+ *  0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W
+ */
+#define QSFP_MOD_PWR_OFFS 129
+/* Byte 130 is Connector type. Not QLogic req'd */
+/* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */
+/* Byte 139 is encoding. code 0x01 is 8b10b. Not QLogic req'd */
+/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not QLogic req'd */
+/* Byte 141 is Extended Rate Select. Not QLogic req'd */
+/* Bytes 142..145 are lengths for various fiber types. Not QLogic req'd */
+/* Byte 146 is length for Copper. Units of 1 meter */
+#define QSFP_MOD_LEN_OFFS 146
+/*
+ * Byte 147 is Device technology. D0..3 not Qlogc req'd
+ * D4..7 select from 15 choices, translated by table:
+ */
+#define QSFP_MOD_TECH_OFFS 147
+extern const char *const qib_qsfp_devtech[16];
+/* Active Equalization includes fiber, copper full EQ, and copper near Eq */
+#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
+/* Attenuation should be valid for copper other than full/near Eq */
+#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
+/* Length is only valid if technology is "copper" */
+#define QSFP_IS_CU(tech) ((0xED00 >> ((tech) >> 4)) & 1)
+#define QSFP_TECH_1490 9
+
+#define QSFP_OUI(oui) (((unsigned)oui[0] << 16) | ((unsigned)oui[1] << 8) | \
+			oui[2])
+#define QSFP_OUI_AMPHENOL 0x415048
+#define QSFP_OUI_FINISAR  0x009065
+#define QSFP_OUI_GORE     0x002177
+
+/* Bytes 148..163 are Vendor Name, Left-justified Blank-filled */
+#define QSFP_VEND_OFFS 148
+#define QSFP_VEND_LEN 16
+/* Byte 164 is IB Extended tranceiver codes Bits D0..3 are SDR,DDR,QDR,EDR */
+#define QSFP_IBXCV_OFFS 164
+/* Bytes 165..167 are Vendor OUI number */
+#define QSFP_VOUI_OFFS 165
+#define QSFP_VOUI_LEN 3
+/* Bytes 168..183 are Vendor Part Number, string */
+#define QSFP_PN_OFFS 168
+#define QSFP_PN_LEN 16
+/* Bytes 184,185 are Vendor Rev. Left Justified, Blank-filled */
+#define QSFP_REV_OFFS 184
+#define QSFP_REV_LEN 2
+/*
+ * Bytes 186,187 are Wavelength, if Optical. Not Qlogic req'd
+ *  If copper, they are attenuation in dB:
+ * Byte 186 is at 2.5Gb/sec (SDR), Byte 187 at 5.0Gb/sec (DDR)
+ */
+#define QSFP_ATTEN_OFFS 186
+#define QSFP_ATTEN_LEN 2
+/* Bytes 188,189 are Wavelength tolerance, not QLogic req'd */
+/* Byte 190 is Max Case Temp. Not QLogic req'd */
+/* Byte 191 is LSB of sum of bytes 128..190. Not QLogic req'd */
+#define QSFP_CC_OFFS 191
+/* Bytes 192..195 are Options implemented in qsfp. Not Qlogic req'd */
+/* Bytes 196..211 are Serial Number, String */
+#define QSFP_SN_OFFS 196
+#define QSFP_SN_LEN 16
+/* Bytes 212..219 are date-code YYMMDD (MM==1 for Jan) */
+#define QSFP_DATE_OFFS 212
+#define QSFP_DATE_LEN 6
+/* Bytes 218,219 are optional lot-code, string */
+#define QSFP_LOT_OFFS 218
+#define QSFP_LOT_LEN 2
+/* Bytes 220, 221 indicate monitoring options, Not QLogic req'd */
+/* Byte 223 is LSB of sum of bytes 192..222 */
+#define QSFP_CC_EXT_OFFS 223
+
+/*
+ * struct qib_qsfp_data encapsulates state of QSFP device for one port.
+ * it will be part of port-chip-specific data if a board supports QSFP.
+ *
+ * Since multiple board-types use QSFP, and their pport_data structs
+ * differ (in the chip-specific section), we need a pointer to its head.
+ *
+ * Avoiding premature optimization, we will have one work_struct per port,
+ * and let the (increasingly inaccurately named) eep_lock arbitrate
+ * access to common resources.
+ *
+ */
+
+/*
+ * Hold the parts of the onboard EEPROM that we care about, so we aren't
+ * coonstantly bit-boffing
+ */
+struct qib_qsfp_cache {
+	u8 id;	/* must be 0x0C or 0x0D; 0 indicates invalid EEPROM read */
+	u8 pwr; /* in D6,7 */
+	u8 len;	/* in meters, Cu only */
+	u8 tech;
+	char vendor[QSFP_VEND_LEN];
+	u8 xt_xcv; /* Ext. tranceiver codes, 4 lsbs are IB speed supported */
+	u8 oui[QSFP_VOUI_LEN];
+	u8 partnum[QSFP_PN_LEN];
+	u8 rev[QSFP_REV_LEN];
+	u8 atten[QSFP_ATTEN_LEN];
+	u8 cks1;	/* Checksum of bytes 128..190 */
+	u8 serial[QSFP_SN_LEN];
+	u8 date[QSFP_DATE_LEN];
+	u8 lot[QSFP_LOT_LEN];
+	u8 cks2;	/* Checsum of bytes 192..222 */
+};
+
+#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
+#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
+#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
+
+struct qib_qsfp_data {
+	/* Helps to find our way */
+	struct qib_pportdata *ppd;
+	struct work_struct work;
+	struct qib_qsfp_cache cache;
+	u64 t_insert;
+};
+
+extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd,
+				  struct qib_qsfp_cache *cp);
+extern void qib_qsfp_init(struct qib_qsfp_data *qd,
+			  void (*fevent)(struct work_struct *));
+extern void qib_qsfp_deinit(struct qib_qsfp_data *qd);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 31/52] IB/qib: Add qib_rc.c
From: Ralph Campbell @ 2010-05-07  0:01 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_rc.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_rc.c | 2288 ++++++++++++++++++++++++++++++++++++
 1 files changed, 2288 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_rc.c

diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
new file mode 100644
index 0000000..40c0a37
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -0,0 +1,2288 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/io.h>
+
+#include "qib.h"
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_RC_##x
+
+static void rc_timeout(unsigned long arg);
+
+static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
+		       u32 psn, u32 pmtu)
+{
+	u32 len;
+
+	len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
+	ss->sge = wqe->sg_list[0];
+	ss->sg_list = wqe->sg_list + 1;
+	ss->num_sge = wqe->wr.num_sge;
+	ss->total_len = wqe->length;
+	qib_skip_sge(ss, len, 0);
+	return wqe->length - len;
+}
+
+static void start_timer(struct qib_qp *qp)
+{
+	qp->s_flags |= QIB_S_TIMER;
+	qp->s_timer.function = rc_timeout;
+	/* 4.096 usec. * (1 << qp->timeout) */
+	qp->s_timer.expires = jiffies +
+		usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL);
+	add_timer(&qp->s_timer);
+}
+
+/**
+ * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
+ * @dev: the device for this QP
+ * @qp: a pointer to the QP
+ * @ohdr: a pointer to the IB header being constructed
+ * @pmtu: the path MTU
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
+ * Note the QP s_lock must be held.
+ */
+static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
+			   struct qib_other_headers *ohdr, u32 pmtu)
+{
+	struct qib_ack_entry *e;
+	u32 hwords;
+	u32 len;
+	u32 bth0;
+	u32 bth2;
+
+	/* Don't send an ACK if we aren't supposed to. */
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+		goto bail;
+
+	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
+	hwords = 5;
+
+	switch (qp->s_ack_state) {
+	case OP(RDMA_READ_RESPONSE_LAST):
+	case OP(RDMA_READ_RESPONSE_ONLY):
+		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+		if (e->rdma_sge.mr) {
+			atomic_dec(&e->rdma_sge.mr->refcount);
+			e->rdma_sge.mr = NULL;
+		}
+		/* FALLTHROUGH */
+	case OP(ATOMIC_ACKNOWLEDGE):
+		/*
+		 * We can increment the tail pointer now that the last
+		 * response has been sent instead of only being
+		 * constructed.
+		 */
+		if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC)
+			qp->s_tail_ack_queue = 0;
+		/* FALLTHROUGH */
+	case OP(SEND_ONLY):
+	case OP(ACKNOWLEDGE):
+		/* Check for no next entry in the queue. */
+		if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+			if (qp->s_flags & QIB_S_ACK_PENDING)
+				goto normal;
+			goto bail;
+		}
+
+		e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+		if (e->opcode == OP(RDMA_READ_REQUEST)) {
+			/*
+			 * If a RDMA read response is being resent and
+			 * we haven't seen the duplicate request yet,
+			 * then stop sending the remaining responses the
+			 * responder has seen until the requester resends it.
+			 */
+			len = e->rdma_sge.sge_length;
+			if (len && !e->rdma_sge.mr) {
+				qp->s_tail_ack_queue = qp->r_head_ack_queue;
+				goto bail;
+			}
+			/* Copy SGE state in case we need to resend */
+			qp->s_rdma_mr = e->rdma_sge.mr;
+			if (qp->s_rdma_mr)
+				atomic_inc(&qp->s_rdma_mr->refcount);
+			qp->s_ack_rdma_sge.sge = e->rdma_sge;
+			qp->s_ack_rdma_sge.num_sge = 1;
+			qp->s_cur_sge = &qp->s_ack_rdma_sge;
+			if (len > pmtu) {
+				len = pmtu;
+				qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+			} else {
+				qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+				e->sent = 1;
+			}
+			ohdr->u.aeth = qib_compute_aeth(qp);
+			hwords++;
+			qp->s_ack_rdma_psn = e->psn;
+			bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
+		} else {
+			/* COMPARE_SWAP or FETCH_ADD */
+			qp->s_cur_sge = NULL;
+			len = 0;
+			qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+			ohdr->u.at.aeth = qib_compute_aeth(qp);
+			ohdr->u.at.atomic_ack_eth[0] =
+				cpu_to_be32(e->atomic_data >> 32);
+			ohdr->u.at.atomic_ack_eth[1] =
+				cpu_to_be32(e->atomic_data);
+			hwords += sizeof(ohdr->u.at) / sizeof(u32);
+			bth2 = e->psn & QIB_PSN_MASK;
+			e->sent = 1;
+		}
+		bth0 = qp->s_ack_state << 24;
+		break;
+
+	case OP(RDMA_READ_RESPONSE_FIRST):
+		qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+		/* FALLTHROUGH */
+	case OP(RDMA_READ_RESPONSE_MIDDLE):
+		qp->s_cur_sge = &qp->s_ack_rdma_sge;
+		qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
+		if (qp->s_rdma_mr)
+			atomic_inc(&qp->s_rdma_mr->refcount);
+		len = qp->s_ack_rdma_sge.sge.sge_length;
+		if (len > pmtu)
+			len = pmtu;
+		else {
+			ohdr->u.aeth = qib_compute_aeth(qp);
+			hwords++;
+			qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+			e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+			e->sent = 1;
+		}
+		bth0 = qp->s_ack_state << 24;
+		bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
+		break;
+
+	default:
+normal:
+		/*
+		 * Send a regular ACK.
+		 * Set the s_ack_state so we wait until after sending
+		 * the ACK before setting s_ack_state to ACKNOWLEDGE
+		 * (see above).
+		 */
+		qp->s_ack_state = OP(SEND_ONLY);
+		qp->s_flags &= ~QIB_S_ACK_PENDING;
+		qp->s_cur_sge = NULL;
+		if (qp->s_nak_state)
+			ohdr->u.aeth =
+				cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+					    (qp->s_nak_state <<
+					     QIB_AETH_CREDIT_SHIFT));
+		else
+			ohdr->u.aeth = qib_compute_aeth(qp);
+		hwords++;
+		len = 0;
+		bth0 = OP(ACKNOWLEDGE) << 24;
+		bth2 = qp->s_ack_psn & QIB_PSN_MASK;
+	}
+	qp->s_rdma_ack_cnt++;
+	qp->s_hdrwords = hwords;
+	qp->s_cur_size = len;
+	qib_make_ruc_header(qp, ohdr, bth0, bth2);
+	return 1;
+
+bail:
+	qp->s_ack_state = OP(ACKNOWLEDGE);
+	qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING);
+	return 0;
+}
+
+/**
+ * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
+ * @qp: a pointer to the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_rc_req(struct qib_qp *qp)
+{
+	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+	struct qib_other_headers *ohdr;
+	struct qib_sge_state *ss;
+	struct qib_swqe *wqe;
+	u32 hwords;
+	u32 len;
+	u32 bth0;
+	u32 bth2;
+	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+	char newreq;
+	unsigned long flags;
+	int ret = 0;
+	int delta;
+
+	ohdr = &qp->s_hdr.u.oth;
+	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+		ohdr = &qp->s_hdr.u.l.oth;
+
+	/*
+	 * The lock is needed to synchronize between the sending tasklet,
+	 * the receive interrupt handler, and timeout resends.
+	 */
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	/* Sending responses has higher priority over sending requests. */
+	if ((qp->s_flags & QIB_S_RESP_PENDING) &&
+	    qib_make_rc_ack(dev, qp, ohdr, pmtu))
+		goto done;
+
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
+		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+			goto bail;
+		/* We are in the error state, flush the work request. */
+		if (qp->s_last == qp->s_head)
+			goto bail;
+		/* If DMAs are in progress, we can't flush immediately. */
+		if (atomic_read(&qp->s_dma_busy)) {
+			qp->s_flags |= QIB_S_WAIT_DMA;
+			goto bail;
+		}
+		wqe = get_swqe_ptr(qp, qp->s_last);
+		while (qp->s_last != qp->s_acked) {
+			qib_send_complete(qp, wqe, IB_WC_SUCCESS);
+			if (++qp->s_last >= qp->s_size)
+				qp->s_last = 0;
+			wqe = get_swqe_ptr(qp, qp->s_last);
+		}
+		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		goto done;
+	}
+
+	if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK))
+		goto bail;
+
+	if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
+		if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
+			qp->s_flags |= QIB_S_WAIT_PSN;
+			goto bail;
+		}
+		qp->s_sending_psn = qp->s_psn;
+		qp->s_sending_hpsn = qp->s_psn - 1;
+	}
+
+	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
+	hwords = 5;
+	bth0 = 0;
+
+	/* Send a request. */
+	wqe = get_swqe_ptr(qp, qp->s_cur);
+	switch (qp->s_state) {
+	default:
+		if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK))
+			goto bail;
+		/*
+		 * Resend an old request or start a new one.
+		 *
+		 * We keep track of the current SWQE so that
+		 * we don't reset the "furthest progress" state
+		 * if we need to back up.
+		 */
+		newreq = 0;
+		if (qp->s_cur == qp->s_tail) {
+			/* Check if send work queue is empty. */
+			if (qp->s_tail == qp->s_head)
+				goto bail;
+			/*
+			 * If a fence is requested, wait for previous
+			 * RDMA read and atomic operations to finish.
+			 */
+			if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+			    qp->s_num_rd_atomic) {
+				qp->s_flags |= QIB_S_WAIT_FENCE;
+				goto bail;
+			}
+			wqe->psn = qp->s_next_psn;
+			newreq = 1;
+		}
+		/*
+		 * Note that we have to be careful not to modify the
+		 * original work request since we may need to resend
+		 * it.
+		 */
+		len = wqe->length;
+		ss = &qp->s_sge;
+		bth2 = qp->s_psn & QIB_PSN_MASK;
+		switch (wqe->wr.opcode) {
+		case IB_WR_SEND:
+		case IB_WR_SEND_WITH_IMM:
+			/* If no credit, return. */
+			if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+				goto bail;
+			}
+			wqe->lpsn = wqe->psn;
+			if (len > pmtu) {
+				wqe->lpsn += (len - 1) / pmtu;
+				qp->s_state = OP(SEND_FIRST);
+				len = pmtu;
+				break;
+			}
+			if (wqe->wr.opcode == IB_WR_SEND)
+				qp->s_state = OP(SEND_ONLY);
+			else {
+				qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
+				/* Immediate data comes after the BTH */
+				ohdr->u.imm_data = wqe->wr.ex.imm_data;
+				hwords += 1;
+			}
+			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+				bth0 |= IB_BTH_SOLICITED;
+			bth2 |= IB_BTH_REQ_ACK;
+			if (++qp->s_cur == qp->s_size)
+				qp->s_cur = 0;
+			break;
+
+		case IB_WR_RDMA_WRITE:
+			if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+				qp->s_lsn++;
+			/* FALLTHROUGH */
+		case IB_WR_RDMA_WRITE_WITH_IMM:
+			/* If no credit, return. */
+			if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+			    qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+				qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+				goto bail;
+			}
+			ohdr->u.rc.reth.vaddr =
+				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+			ohdr->u.rc.reth.rkey =
+				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+			ohdr->u.rc.reth.length = cpu_to_be32(len);
+			hwords += sizeof(struct ib_reth) / sizeof(u32);
+			wqe->lpsn = wqe->psn;
+			if (len > pmtu) {
+				wqe->lpsn += (len - 1) / pmtu;
+				qp->s_state = OP(RDMA_WRITE_FIRST);
+				len = pmtu;
+				break;
+			}
+			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+				qp->s_state = OP(RDMA_WRITE_ONLY);
+			else {
+				qp->s_state =
+					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+				/* Immediate data comes after RETH */
+				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+				hwords += 1;
+				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+					bth0 |= IB_BTH_SOLICITED;
+			}
+			bth2 |= IB_BTH_REQ_ACK;
+			if (++qp->s_cur == qp->s_size)
+				qp->s_cur = 0;
+			break;
+
+		case IB_WR_RDMA_READ:
+			/*
+			 * Don't allow more operations to be started
+			 * than the QP limits allow.
+			 */
+			if (newreq) {
+				if (qp->s_num_rd_atomic >=
+				    qp->s_max_rd_atomic) {
+					qp->s_flags |= QIB_S_WAIT_RDMAR;
+					goto bail;
+				}
+				qp->s_num_rd_atomic++;
+				if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+					qp->s_lsn++;
+				/*
+				 * Adjust s_next_psn to count the
+				 * expected number of responses.
+				 */
+				if (len > pmtu)
+					qp->s_next_psn += (len - 1) / pmtu;
+				wqe->lpsn = qp->s_next_psn++;
+			}
+			ohdr->u.rc.reth.vaddr =
+				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+			ohdr->u.rc.reth.rkey =
+				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+			ohdr->u.rc.reth.length = cpu_to_be32(len);
+			qp->s_state = OP(RDMA_READ_REQUEST);
+			hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+			ss = NULL;
+			len = 0;
+			bth2 |= IB_BTH_REQ_ACK;
+			if (++qp->s_cur == qp->s_size)
+				qp->s_cur = 0;
+			break;
+
+		case IB_WR_ATOMIC_CMP_AND_SWP:
+		case IB_WR_ATOMIC_FETCH_AND_ADD:
+			/*
+			 * Don't allow more operations to be started
+			 * than the QP limits allow.
+			 */
+			if (newreq) {
+				if (qp->s_num_rd_atomic >=
+				    qp->s_max_rd_atomic) {
+					qp->s_flags |= QIB_S_WAIT_RDMAR;
+					goto bail;
+				}
+				qp->s_num_rd_atomic++;
+				if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+					qp->s_lsn++;
+				wqe->lpsn = wqe->psn;
+			}
+			if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+				qp->s_state = OP(COMPARE_SWAP);
+				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+					wqe->wr.wr.atomic.swap);
+				ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+					wqe->wr.wr.atomic.compare_add);
+			} else {
+				qp->s_state = OP(FETCH_ADD);
+				ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+					wqe->wr.wr.atomic.compare_add);
+				ohdr->u.atomic_eth.compare_data = 0;
+			}
+			ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+				wqe->wr.wr.atomic.remote_addr >> 32);
+			ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+				wqe->wr.wr.atomic.remote_addr);
+			ohdr->u.atomic_eth.rkey = cpu_to_be32(
+				wqe->wr.wr.atomic.rkey);
+			hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
+			ss = NULL;
+			len = 0;
+			bth2 |= IB_BTH_REQ_ACK;
+			if (++qp->s_cur == qp->s_size)
+				qp->s_cur = 0;
+			break;
+
+		default:
+			goto bail;
+		}
+		qp->s_sge.sge = wqe->sg_list[0];
+		qp->s_sge.sg_list = wqe->sg_list + 1;
+		qp->s_sge.num_sge = wqe->wr.num_sge;
+		qp->s_sge.total_len = wqe->length;
+		qp->s_len = wqe->length;
+		if (newreq) {
+			qp->s_tail++;
+			if (qp->s_tail >= qp->s_size)
+				qp->s_tail = 0;
+		}
+		if (wqe->wr.opcode == IB_WR_RDMA_READ)
+			qp->s_psn = wqe->lpsn + 1;
+		else {
+			qp->s_psn++;
+			if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+				qp->s_next_psn = qp->s_psn;
+		}
+		break;
+
+	case OP(RDMA_READ_RESPONSE_FIRST):
+		/*
+		 * qp->s_state is normally set to the opcode of the
+		 * last packet constructed for new requests and therefore
+		 * is never set to RDMA read response.
+		 * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
+		 * thread to indicate a SEND needs to be restarted from an
+		 * earlier PSN without interferring with the sending thread.
+		 * See qib_restart_rc().
+		 */
+		qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
+		/* FALLTHROUGH */
+	case OP(SEND_FIRST):
+		qp->s_state = OP(SEND_MIDDLE);
+		/* FALLTHROUGH */
+	case OP(SEND_MIDDLE):
+		bth2 = qp->s_psn++ & QIB_PSN_MASK;
+		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+			qp->s_next_psn = qp->s_psn;
+		ss = &qp->s_sge;
+		len = qp->s_len;
+		if (len > pmtu) {
+			len = pmtu;
+			break;
+		}
+		if (wqe->wr.opcode == IB_WR_SEND)
+			qp->s_state = OP(SEND_LAST);
+		else {
+			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
+			/* Immediate data comes after the BTH */
+			ohdr->u.imm_data = wqe->wr.ex.imm_data;
+			hwords += 1;
+		}
+		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+			bth0 |= IB_BTH_SOLICITED;
+		bth2 |= IB_BTH_REQ_ACK;
+		qp->s_cur++;
+		if (qp->s_cur >= qp->s_size)
+			qp->s_cur = 0;
+		break;
+
+	case OP(RDMA_READ_RESPONSE_LAST):
+		/*
+		 * qp->s_state is normally set to the opcode of the
+		 * last packet constructed for new requests and therefore
+		 * is never set to RDMA read response.
+		 * RDMA_READ_RESPONSE_LAST is used by the ACK processing
+		 * thread to indicate a RDMA write needs to be restarted from
+		 * an earlier PSN without interferring with the sending thread.
+		 * See qib_restart_rc().
+		 */
+		qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
+		/* FALLTHROUGH */
+	case OP(RDMA_WRITE_FIRST):
+		qp->s_state = OP(RDMA_WRITE_MIDDLE);
+		/* FALLTHROUGH */
+	case OP(RDMA_WRITE_MIDDLE):
+		bth2 = qp->s_psn++ & QIB_PSN_MASK;
+		if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+			qp->s_next_psn = qp->s_psn;
+		ss = &qp->s_sge;
+		len = qp->s_len;
+		if (len > pmtu) {
+			len = pmtu;
+			break;
+		}
+		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			qp->s_state = OP(RDMA_WRITE_LAST);
+		else {
+			qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
+			/* Immediate data comes after the BTH */
+			ohdr->u.imm_data = wqe->wr.ex.imm_data;
+			hwords += 1;
+			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+				bth0 |= IB_BTH_SOLICITED;
+		}
+		bth2 |= IB_BTH_REQ_ACK;
+		qp->s_cur++;
+		if (qp->s_cur >= qp->s_size)
+			qp->s_cur = 0;
+		break;
+
+	case OP(RDMA_READ_RESPONSE_MIDDLE):
+		/*
+		 * qp->s_state is normally set to the opcode of the
+		 * last packet constructed for new requests and therefore
+		 * is never set to RDMA read response.
+		 * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
+		 * thread to indicate a RDMA read needs to be restarted from
+		 * an earlier PSN without interferring with the sending thread.
+		 * See qib_restart_rc().
+		 */
+		len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
+		ohdr->u.rc.reth.vaddr =
+			cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+		ohdr->u.rc.reth.rkey =
+			cpu_to_be32(wqe->wr.wr.rdma.rkey);
+		ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
+		qp->s_state = OP(RDMA_READ_REQUEST);
+		hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+		bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK;
+		qp->s_psn = wqe->lpsn + 1;
+		ss = NULL;
+		len = 0;
+		qp->s_cur++;
+		if (qp->s_cur == qp->s_size)
+			qp->s_cur = 0;
+		break;
+	}
+	qp->s_sending_hpsn = bth2;
+	delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
+	if (delta && delta % QIB_PSN_CREDIT == 0)
+		bth2 |= IB_BTH_REQ_ACK;
+	if (qp->s_flags & QIB_S_SEND_ONE) {
+		qp->s_flags &= ~QIB_S_SEND_ONE;
+		qp->s_flags |= QIB_S_WAIT_ACK;
+		bth2 |= IB_BTH_REQ_ACK;
+	}
+	qp->s_len -= len;
+	qp->s_hdrwords = hwords;
+	qp->s_cur_sge = ss;
+	qp->s_cur_size = len;
+	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
+done:
+	ret = 1;
+	goto unlock;
+
+bail:
+	qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	return ret;
+}
+
+/**
+ * qib_send_rc_ack - Construct an ACK packet and send it
+ * @qp: a pointer to the QP
+ *
+ * This is called from qib_rc_rcv() and qib_kreceive().
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
+ */
+void qib_send_rc_ack(struct qib_qp *qp)
+{
+	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	u64 pbc;
+	u16 lrh0;
+	u32 bth0;
+	u32 hwords;
+	u32 pbufn;
+	u32 __iomem *piobuf;
+	struct qib_ib_header hdr;
+	struct qib_other_headers *ohdr;
+	u32 control;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+		goto unlock;
+
+	/* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+	if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
+		goto queue_ack;
+
+	/* Construct the header with s_lock held so APM doesn't change it. */
+	ohdr = &hdr.u.oth;
+	lrh0 = QIB_LRH_BTH;
+	/* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
+	hwords = 6;
+	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+		hwords += qib_make_grh(ibp, &hdr.u.l.grh,
+				       &qp->remote_ah_attr.grh, hwords, 0);
+		ohdr = &hdr.u.l.oth;
+		lrh0 = QIB_LRH_GRH;
+	}
+	/* read pkey_index w/o lock (its atomic) */
+	bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
+	if (qp->s_mig_state == IB_MIG_MIGRATED)
+		bth0 |= IB_BTH_MIG_REQ;
+	if (qp->r_nak_state)
+		ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+					    (qp->r_nak_state <<
+					     QIB_AETH_CREDIT_SHIFT));
+	else
+		ohdr->u.aeth = qib_compute_aeth(qp);
+	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
+		qp->remote_ah_attr.sl << 4;
+	hdr.lrh[0] = cpu_to_be16(lrh0);
+	hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
+	hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+	ohdr->bth[0] = cpu_to_be32(bth0);
+	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+	ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
+
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
+	/* Don't try to send ACKs if the link isn't ACTIVE */
+	if (!(ppd->lflags & QIBL_LINKACTIVE))
+		goto done;
+
+	control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC,
+				       qp->s_srate, lrh0 >> 12);
+	/* length is + 1 for the control dword */
+	pbc = ((u64) control << 32) | (hwords + 1);
+
+	piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
+	if (!piobuf) {
+		/*
+		 * We are out of PIO buffers at the moment.
+		 * Pass responsibility for sending the ACK to the
+		 * send tasklet so that when a PIO buffer becomes
+		 * available, the ACK is sent ahead of other outgoing
+		 * packets.
+		 */
+		spin_lock_irqsave(&qp->s_lock, flags);
+		goto queue_ack;
+	}
+
+	/*
+	 * Write the pbc.
+	 * We have to flush after the PBC for correctness
+	 * on some cpus or WC buffer can be written out of order.
+	 */
+	writeq(pbc, piobuf);
+
+	if (dd->flags & QIB_PIO_FLUSH_WC) {
+		u32 *hdrp = (u32 *) &hdr;
+
+		qib_flush_wc();
+		qib_pio_copy(piobuf + 2, hdrp, hwords - 1);
+		qib_flush_wc();
+		__raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
+	} else
+		qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords);
+
+	if (dd->flags & QIB_USE_SPCL_TRIG) {
+		u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
+
+		qib_flush_wc();
+		__raw_writel(0xaebecede, piobuf + spcl_off);
+	}
+
+	qib_flush_wc();
+	qib_sendbuf_done(dd, pbufn);
+
+	ibp->n_unicast_xmit++;
+	goto done;
+
+queue_ack:
+	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+		ibp->n_rc_qacks++;
+		qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING;
+		qp->s_nak_state = qp->r_nak_state;
+		qp->s_ack_psn = qp->r_ack_psn;
+
+		/* Schedule the send tasklet. */
+		qib_schedule_send(qp);
+	}
+unlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+	return;
+}
+
+/**
+ * reset_psn - reset the QP state to send starting from PSN
+ * @qp: the QP
+ * @psn: the packet sequence number to restart at
+ *
+ * This is called from qib_rc_rcv() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held.
+ */
+static void reset_psn(struct qib_qp *qp, u32 psn)
+{
+	u32 n = qp->s_acked;
+	struct qib_swqe *wqe = get_swqe_ptr(qp, n);
+	u32 opcode;
+
+	qp->s_cur = n;
+
+	/*
+	 * If we are starting the request from the beginning,
+	 * let the normal send code handle initialization.
+	 */
+	if (qib_cmp24(psn, wqe->psn) <= 0) {
+		qp->s_state = OP(SEND_LAST);
+		goto done;
+	}
+
+	/* Find the work request opcode corresponding to the given PSN. */
+	opcode = wqe->wr.opcode;
+	for (;;) {
+		int diff;
+
+		if (++n == qp->s_size)
+			n = 0;
+		if (n == qp->s_tail)
+			break;
+		wqe = get_swqe_ptr(qp, n);
+		diff = qib_cmp24(psn, wqe->psn);
+		if (diff < 0)
+			break;
+		qp->s_cur = n;
+		/*
+		 * If we are starting the request from the beginning,
+		 * let the normal send code handle initialization.
+		 */
+		if (diff == 0) {
+			qp->s_state = OP(SEND_LAST);
+			goto done;
+		}
+		opcode = wqe->wr.opcode;
+	}
+
+	/*
+	 * Set the state to restart in the middle of a request.
+	 * Don't change the s_sge, s_cur_sge, or s_cur_size.
+	 * See qib_make_rc_req().
+	 */
+	switch (opcode) {
+	case IB_WR_SEND:
+	case IB_WR_SEND_WITH_IMM:
+		qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
+		break;
+
+	case IB_WR_RDMA_WRITE:
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
+		break;
+
+	case IB_WR_RDMA_READ:
+		qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+		break;
+
+	default:
+		/*
+		 * This case shouldn't happen since its only
+		 * one PSN per req.
+		 */
+		qp->s_state = OP(SEND_LAST);
+	}
+done:
+	qp->s_psn = psn;
+	/*
+	 * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer
+	 * asynchronously before the send tasklet can get scheduled.
+	 * Doing it in qib_make_rc_req() is too late.
+	 */
+	if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
+	    (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
+		qp->s_flags |= QIB_S_WAIT_PSN;
+}
+
+/*
+ * Back up requester to resend the last un-ACKed request.
+ * The QP s_lock should be held and interrupts disabled.
+ */
+static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
+{
+	struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+	struct qib_ibport *ibp;
+
+	if (qp->s_retry == 0) {
+		if (qp->s_mig_state == IB_MIG_ARMED) {
+			qib_migrate_qp(qp);
+			qp->s_retry = qp->s_retry_cnt;
+		} else if (qp->s_last == qp->s_acked) {
+			qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+			qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			return;
+		} else /* XXX need to handle delayed completion */
+			return;
+	} else
+		qp->s_retry--;
+
+	ibp = to_iport(qp->ibqp.device, qp->port_num);
+	if (wqe->wr.opcode == IB_WR_RDMA_READ)
+		ibp->n_rc_resends++;
+	else
+		ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+
+	qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR |
+			 QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN |
+			 QIB_S_WAIT_ACK);
+	if (wait)
+		qp->s_flags |= QIB_S_SEND_ONE;
+	reset_psn(qp, psn);
+}
+
+/*
+ * This is called from s_timer for missing responses.
+ */
+static void rc_timeout(unsigned long arg)
+{
+	struct qib_qp *qp = (struct qib_qp *)arg;
+	struct qib_ibport *ibp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	if (qp->s_flags & QIB_S_TIMER) {
+		ibp = to_iport(qp->ibqp.device, qp->port_num);
+		ibp->n_rc_timeouts++;
+		qp->s_flags &= ~QIB_S_TIMER;
+		del_timer(&qp->s_timer);
+		qib_restart_rc(qp, qp->s_last_psn + 1, 1);
+		qib_schedule_send(qp);
+	}
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+/*
+ * This is called from s_timer for RNR timeouts.
+ */
+void qib_rc_rnr_retry(unsigned long arg)
+{
+	struct qib_qp *qp = (struct qib_qp *)arg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	if (qp->s_flags & QIB_S_WAIT_RNR) {
+		qp->s_flags &= ~QIB_S_WAIT_RNR;
+		del_timer(&qp->s_timer);
+		qib_schedule_send(qp);
+	}
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+/*
+ * Set qp->s_sending_psn to the next PSN after the given one.
+ * This would be psn+1 except when RDMA reads are present.
+ */
+static void reset_sending_psn(struct qib_qp *qp, u32 psn)
+{
+	struct qib_swqe *wqe;
+	u32 n = qp->s_last;
+
+	/* Find the work request corresponding to the given PSN. */
+	for (;;) {
+		wqe = get_swqe_ptr(qp, n);
+		if (qib_cmp24(psn, wqe->lpsn) <= 0) {
+			if (wqe->wr.opcode == IB_WR_RDMA_READ)
+				qp->s_sending_psn = wqe->lpsn + 1;
+			else
+				qp->s_sending_psn = psn + 1;
+			break;
+		}
+		if (++n == qp->s_size)
+			n = 0;
+		if (n == qp->s_tail)
+			break;
+	}
+}
+
+/*
+ * This should be called with the QP s_lock held and interrupts disabled.
+ */
+void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
+{
+	struct qib_other_headers *ohdr;
+	struct qib_swqe *wqe;
+	struct ib_wc wc;
+	unsigned i;
+	u32 opcode;
+	u32 psn;
+
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+		return;
+
+	/* Find out where the BTH is */
+	if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
+		ohdr = &hdr->u.oth;
+	else
+		ohdr = &hdr->u.l.oth;
+
+	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
+		WARN_ON(!qp->s_rdma_ack_cnt);
+		qp->s_rdma_ack_cnt--;
+		return;
+	}
+
+	psn = be32_to_cpu(ohdr->bth[2]);
+	reset_sending_psn(qp, psn);
+
+	/*
+	 * Start timer after a packet requesting an ACK has been sent and
+	 * there are still requests that haven't been acked.
+	 */
+	if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
+	    !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)))
+		start_timer(qp);
+
+	while (qp->s_last != qp->s_acked) {
+		wqe = get_swqe_ptr(qp, qp->s_last);
+		if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
+		    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
+			break;
+		for (i = 0; i < wqe->wr.num_sge; i++) {
+			struct qib_sge *sge = &wqe->sg_list[i];
+
+			atomic_dec(&sge->mr->refcount);
+		}
+		/* Post a send completion queue entry if requested. */
+		if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+			memset(&wc, 0, sizeof wc);
+			wc.wr_id = wqe->wr.wr_id;
+			wc.status = IB_WC_SUCCESS;
+			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+			wc.byte_len = wqe->length;
+			wc.qp = &qp->ibqp;
+			qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+		}
+		if (++qp->s_last >= qp->s_size)
+			qp->s_last = 0;
+	}
+	/*
+	 * If we were waiting for sends to complete before resending,
+	 * and they are now complete, restart sending.
+	 */
+	if (qp->s_flags & QIB_S_WAIT_PSN &&
+	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+		qp->s_flags &= ~QIB_S_WAIT_PSN;
+		qp->s_sending_psn = qp->s_psn;
+		qp->s_sending_hpsn = qp->s_psn - 1;
+		qib_schedule_send(qp);
+	}
+}
+
+static inline void update_last_psn(struct qib_qp *qp, u32 psn)
+{
+	qp->s_last_psn = psn;
+}
+
+/*
+ * Generate a SWQE completion.
+ * This is similar to qib_send_complete but has to check to be sure
+ * that the SGEs are not being referenced if the SWQE is being resent.
+ */
+static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
+					 struct qib_swqe *wqe,
+					 struct qib_ibport *ibp)
+{
+	struct ib_wc wc;
+	unsigned i;
+
+	/*
+	 * Don't decrement refcount and don't generate a
+	 * completion if the SWQE is being resent until the send
+	 * is finished.
+	 */
+	if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
+	    qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+		for (i = 0; i < wqe->wr.num_sge; i++) {
+			struct qib_sge *sge = &wqe->sg_list[i];
+
+			atomic_dec(&sge->mr->refcount);
+		}
+		/* Post a send completion queue entry if requested. */
+		if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+		    (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+			memset(&wc, 0, sizeof wc);
+			wc.wr_id = wqe->wr.wr_id;
+			wc.status = IB_WC_SUCCESS;
+			wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+			wc.byte_len = wqe->length;
+			wc.qp = &qp->ibqp;
+			qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+		}
+		if (++qp->s_last >= qp->s_size)
+			qp->s_last = 0;
+	} else
+		ibp->n_rc_delayed_comp++;
+
+	qp->s_retry = qp->s_retry_cnt;
+	update_last_psn(qp, wqe->lpsn);
+
+	/*
+	 * If we are completing a request which is in the process of
+	 * being resent, we can stop resending it since we know the
+	 * responder has already seen it.
+	 */
+	if (qp->s_acked == qp->s_cur) {
+		if (++qp->s_cur >= qp->s_size)
+			qp->s_cur = 0;
+		qp->s_acked = qp->s_cur;
+		wqe = get_swqe_ptr(qp, qp->s_cur);
+		if (qp->s_acked != qp->s_tail) {
+			qp->s_state = OP(SEND_LAST);
+			qp->s_psn = wqe->psn;
+		}
+	} else {
+		if (++qp->s_acked >= qp->s_size)
+			qp->s_acked = 0;
+		if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
+			qp->s_draining = 0;
+		wqe = get_swqe_ptr(qp, qp->s_acked);
+	}
+	return wqe;
+}
+
+/**
+ * do_rc_ack - process an incoming RC ACK
+ * @qp: the QP the ACK came in on
+ * @psn: the packet sequence number of the ACK
+ * @opcode: the opcode of the request that resulted in the ACK
+ *
+ * This is called from qib_rc_rcv_resp() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held.
+ * Returns 1 if OK, 0 if current operation should be aborted (NAK).
+ */
+static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
+		     u64 val, struct qib_ctxtdata *rcd)
+{
+	struct qib_ibport *ibp;
+	enum ib_wc_status status;
+	struct qib_swqe *wqe;
+	int ret = 0;
+	u32 ack_psn;
+	int diff;
+
+	/* Remove QP from retry timer */
+	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
+		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+		del_timer(&qp->s_timer);
+	}
+
+	/*
+	 * Note that NAKs implicitly ACK outstanding SEND and RDMA write
+	 * requests and implicitly NAK RDMA read and atomic requests issued
+	 * before the NAK'ed request.  The MSN won't include the NAK'ed
+	 * request but will include an ACK'ed request(s).
+	 */
+	ack_psn = psn;
+	if (aeth >> 29)
+		ack_psn--;
+	wqe = get_swqe_ptr(qp, qp->s_acked);
+	ibp = to_iport(qp->ibqp.device, qp->port_num);
+
+	/*
+	 * The MSN might be for a later WQE than the PSN indicates so
+	 * only complete WQEs that the PSN finishes.
+	 */
+	while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+		/*
+		 * RDMA_READ_RESPONSE_ONLY is a special case since
+		 * we want to generate completion events for everything
+		 * before the RDMA read, copy the data, then generate
+		 * the completion for the read.
+		 */
+		if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+		    opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+		    diff == 0) {
+			ret = 1;
+			goto bail;
+		}
+		/*
+		 * If this request is a RDMA read or atomic, and the ACK is
+		 * for a later operation, this ACK NAKs the RDMA read or
+		 * atomic.  In other words, only a RDMA_READ_LAST or ONLY
+		 * can ACK a RDMA read and likewise for atomic ops.  Note
+		 * that the NAK case can only happen if relaxed ordering is
+		 * used and requests are sent after an RDMA read or atomic
+		 * is sent but before the response is received.
+		 */
+		if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
+		     (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
+		    ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		      wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
+		     (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
+			/* Retry this request. */
+			if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) {
+				qp->r_flags |= QIB_R_RDMAR_SEQ;
+				qib_restart_rc(qp, qp->s_last_psn + 1, 0);
+				if (list_empty(&qp->rspwait)) {
+					qp->r_flags |= QIB_R_RSP_SEND;
+					atomic_inc(&qp->refcount);
+					list_add_tail(&qp->rspwait,
+						      &rcd->qp_wait_list);
+				}
+			}
+			/*
+			 * No need to process the ACK/NAK since we are
+			 * restarting an earlier request.
+			 */
+			goto bail;
+		}
+		if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+			u64 *vaddr = wqe->sg_list[0].vaddr;
+			*vaddr = val;
+		}
+		if (qp->s_num_rd_atomic &&
+		    (wqe->wr.opcode == IB_WR_RDMA_READ ||
+		     wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		     wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+			qp->s_num_rd_atomic--;
+			/* Restart sending task if fence is complete */
+			if ((qp->s_flags & QIB_S_WAIT_FENCE) &&
+			    !qp->s_num_rd_atomic) {
+				qp->s_flags &= ~(QIB_S_WAIT_FENCE |
+						 QIB_S_WAIT_ACK);
+				qib_schedule_send(qp);
+			} else if (qp->s_flags & QIB_S_WAIT_RDMAR) {
+				qp->s_flags &= ~(QIB_S_WAIT_RDMAR |
+						 QIB_S_WAIT_ACK);
+				qib_schedule_send(qp);
+			}
+		}
+		wqe = do_rc_completion(qp, wqe, ibp);
+		if (qp->s_acked == qp->s_tail)
+			break;
+	}
+
+	switch (aeth >> 29) {
+	case 0:         /* ACK */
+		ibp->n_rc_acks++;
+		if (qp->s_acked != qp->s_tail) {
+			/*
+			 * We are expecting more ACKs so
+			 * reset the retransmit timer.
+			 */
+			start_timer(qp);
+			/*
+			 * We can stop resending the earlier packets and
+			 * continue with the next packet the receiver wants.
+			 */
+			if (qib_cmp24(qp->s_psn, psn) <= 0)
+				reset_psn(qp, psn + 1);
+		} else if (qib_cmp24(qp->s_psn, psn) <= 0) {
+			qp->s_state = OP(SEND_LAST);
+			qp->s_psn = psn + 1;
+		}
+		if (qp->s_flags & QIB_S_WAIT_ACK) {
+			qp->s_flags &= ~QIB_S_WAIT_ACK;
+			qib_schedule_send(qp);
+		}
+		qib_get_credit(qp, aeth);
+		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
+		qp->s_retry = qp->s_retry_cnt;
+		update_last_psn(qp, psn);
+		ret = 1;
+		goto bail;
+
+	case 1:         /* RNR NAK */
+		ibp->n_rnr_naks++;
+		if (qp->s_acked == qp->s_tail)
+			goto bail;
+		if (qp->s_flags & QIB_S_WAIT_RNR)
+			goto bail;
+		if (qp->s_rnr_retry == 0) {
+			status = IB_WC_RNR_RETRY_EXC_ERR;
+			goto class_b;
+		}
+		if (qp->s_rnr_retry_cnt < 7)
+			qp->s_rnr_retry--;
+
+		/* The last valid PSN is the previous PSN. */
+		update_last_psn(qp, psn - 1);
+
+		ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+
+		reset_psn(qp, psn);
+
+		qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK);
+		qp->s_flags |= QIB_S_WAIT_RNR;
+		qp->s_timer.function = qib_rc_rnr_retry;
+		qp->s_timer.expires = jiffies + usecs_to_jiffies(
+			ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
+					   QIB_AETH_CREDIT_MASK]);
+		add_timer(&qp->s_timer);
+		goto bail;
+
+	case 3:         /* NAK */
+		if (qp->s_acked == qp->s_tail)
+			goto bail;
+		/* The last valid PSN is the previous PSN. */
+		update_last_psn(qp, psn - 1);
+		switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
+			QIB_AETH_CREDIT_MASK) {
+		case 0: /* PSN sequence error */
+			ibp->n_seq_naks++;
+			/*
+			 * Back up to the responder's expected PSN.
+			 * Note that we might get a NAK in the middle of an
+			 * RDMA READ response which terminates the RDMA
+			 * READ.
+			 */
+			qib_restart_rc(qp, psn, 0);
+			qib_schedule_send(qp);
+			break;
+
+		case 1: /* Invalid Request */
+			status = IB_WC_REM_INV_REQ_ERR;
+			ibp->n_other_naks++;
+			goto class_b;
+
+		case 2: /* Remote Access Error */
+			status = IB_WC_REM_ACCESS_ERR;
+			ibp->n_other_naks++;
+			goto class_b;
+
+		case 3: /* Remote Operation Error */
+			status = IB_WC_REM_OP_ERR;
+			ibp->n_other_naks++;
+class_b:
+			if (qp->s_last == qp->s_acked) {
+				qib_send_complete(qp, wqe, status);
+				qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+			}
+			break;
+
+		default:
+			/* Ignore other reserved NAK error codes */
+			goto reserved;
+		}
+		qp->s_retry = qp->s_retry_cnt;
+		qp->s_rnr_retry = qp->s_rnr_retry_cnt;
+		goto bail;
+
+	default:                /* 2: reserved */
+reserved:
+		/* Ignore reserved NAK codes. */
+		goto bail;
+	}
+
+bail:
+	return ret;
+}
+
+/*
+ * We have seen an out of sequence RDMA read middle or last packet.
+ * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
+ */
+static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
+			 struct qib_ctxtdata *rcd)
+{
+	struct qib_swqe *wqe;
+
+	/* Remove QP from retry timer */
+	if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
+		qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+		del_timer(&qp->s_timer);
+	}
+
+	wqe = get_swqe_ptr(qp, qp->s_acked);
+
+	while (qib_cmp24(psn, wqe->lpsn) > 0) {
+		if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+		    wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+		    wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+			break;
+		wqe = do_rc_completion(qp, wqe, ibp);
+	}
+
+	ibp->n_rdma_seq++;
+	qp->r_flags |= QIB_R_RDMAR_SEQ;
+	qib_restart_rc(qp, qp->s_last_psn + 1, 0);
+	if (list_empty(&qp->rspwait)) {
+		qp->r_flags |= QIB_R_RSP_SEND;
+		atomic_inc(&qp->refcount);
+		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+	}
+}
+
+/**
+ * qib_rc_rcv_resp - process an incoming RC response packet
+ * @ibp: the port this packet came in on
+ * @ohdr: the other headers for this packet
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP for this packet
+ * @opcode: the opcode for this packet
+ * @psn: the packet sequence number for this packet
+ * @hdrsize: the header length
+ * @pmtu: the path MTU
+ *
+ * This is called from qib_rc_rcv() to process an incoming RC response
+ * packet for the given QP.
+ * Called at interrupt level.
+ */
+static void qib_rc_rcv_resp(struct qib_ibport *ibp,
+			    struct qib_other_headers *ohdr,
+			    void *data, u32 tlen,
+			    struct qib_qp *qp,
+			    u32 opcode,
+			    u32 psn, u32 hdrsize, u32 pmtu,
+			    struct qib_ctxtdata *rcd)
+{
+	struct qib_swqe *wqe;
+	enum ib_wc_status status;
+	unsigned long flags;
+	int diff;
+	u32 pad;
+	u32 aeth;
+	u64 val;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	/* Double check we can process this now that we hold the s_lock. */
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+		goto ack_done;
+
+	/* Ignore invalid responses. */
+	if (qib_cmp24(psn, qp->s_next_psn) >= 0)
+		goto ack_done;
+
+	/* Ignore duplicate responses. */
+	diff = qib_cmp24(psn, qp->s_last_psn);
+	if (unlikely(diff <= 0)) {
+		/* Update credits for "ghost" ACKs */
+		if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
+			aeth = be32_to_cpu(ohdr->u.aeth);
+			if ((aeth >> 29) == 0)
+				qib_get_credit(qp, aeth);
+		}
+		goto ack_done;
+	}
+
+	/*
+	 * Skip everything other than the PSN we expect, if we are waiting
+	 * for a reply to a restarted RDMA read or atomic op.
+	 */
+	if (qp->r_flags & QIB_R_RDMAR_SEQ) {
+		if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
+			goto ack_done;
+		qp->r_flags &= ~QIB_R_RDMAR_SEQ;
+	}
+
+	if (unlikely(qp->s_acked == qp->s_tail))
+		goto ack_done;
+	wqe = get_swqe_ptr(qp, qp->s_acked);
+	status = IB_WC_SUCCESS;
+
+	switch (opcode) {
+	case OP(ACKNOWLEDGE):
+	case OP(ATOMIC_ACKNOWLEDGE):
+	case OP(RDMA_READ_RESPONSE_FIRST):
+		aeth = be32_to_cpu(ohdr->u.aeth);
+		if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+			__be32 *p = ohdr->u.at.atomic_ack_eth;
+
+			val = ((u64) be32_to_cpu(p[0]) << 32) |
+				be32_to_cpu(p[1]);
+		} else
+			val = 0;
+		if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
+		    opcode != OP(RDMA_READ_RESPONSE_FIRST))
+			goto ack_done;
+		hdrsize += 4;
+		wqe = get_swqe_ptr(qp, qp->s_acked);
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+		/*
+		 * If this is a response to a resent RDMA read, we
+		 * have to be careful to copy the data to the right
+		 * location.
+		 */
+		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+						  wqe, psn, pmtu);
+		goto read_middle;
+
+	case OP(RDMA_READ_RESPONSE_MIDDLE):
+		/* no AETH, no ACK */
+		if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
+			goto ack_seq_err;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+read_middle:
+		if (unlikely(tlen != (hdrsize + pmtu + 4)))
+			goto ack_len_err;
+		if (unlikely(pmtu >= qp->s_rdma_read_len))
+			goto ack_len_err;
+
+		/*
+		 * We got a response so update the timeout.
+		 * 4.096 usec. * (1 << qp->timeout)
+		 */
+		qp->s_flags |= QIB_S_TIMER;
+		mod_timer(&qp->s_timer, jiffies +
+			usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+					 1000UL));
+		if (qp->s_flags & QIB_S_WAIT_ACK) {
+			qp->s_flags &= ~QIB_S_WAIT_ACK;
+			qib_schedule_send(qp);
+		}
+
+		if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
+			qp->s_retry = qp->s_retry_cnt;
+
+		/*
+		 * Update the RDMA receive state but do the copy w/o
+		 * holding the locks and blocking interrupts.
+		 */
+		qp->s_rdma_read_len -= pmtu;
+		update_last_psn(qp, psn);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+		qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+		goto bail;
+
+	case OP(RDMA_READ_RESPONSE_ONLY):
+		aeth = be32_to_cpu(ohdr->u.aeth);
+		if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
+			goto ack_done;
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+		/*
+		 * Check that the data size is >= 0 && <= pmtu.
+		 * Remember to account for the AETH header (4) and
+		 * ICRC (4).
+		 */
+		if (unlikely(tlen < (hdrsize + pad + 8)))
+			goto ack_len_err;
+		/*
+		 * If this is a response to a resent RDMA read, we
+		 * have to be careful to copy the data to the right
+		 * location.
+		 */
+		wqe = get_swqe_ptr(qp, qp->s_acked);
+		qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+						  wqe, psn, pmtu);
+		goto read_last;
+
+	case OP(RDMA_READ_RESPONSE_LAST):
+		/* ACKs READ req. */
+		if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
+			goto ack_seq_err;
+		if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+			goto ack_op_err;
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+		/*
+		 * Check that the data size is >= 1 && <= pmtu.
+		 * Remember to account for the AETH header (4) and
+		 * ICRC (4).
+		 */
+		if (unlikely(tlen <= (hdrsize + pad + 8)))
+			goto ack_len_err;
+read_last:
+		tlen -= hdrsize + pad + 8;
+		if (unlikely(tlen != qp->s_rdma_read_len))
+			goto ack_len_err;
+		aeth = be32_to_cpu(ohdr->u.aeth);
+		qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+		WARN_ON(qp->s_rdma_read_sge.num_sge);
+		(void) do_rc_ack(qp, aeth, psn,
+				 OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
+		goto ack_done;
+	}
+
+ack_op_err:
+	status = IB_WC_LOC_QP_OP_ERR;
+	goto ack_err;
+
+ack_seq_err:
+	rdma_seq_err(qp, ibp, psn, rcd);
+	goto ack_done;
+
+ack_len_err:
+	status = IB_WC_LOC_LEN_ERR;
+ack_err:
+	if (qp->s_last == qp->s_acked) {
+		qib_send_complete(qp, wqe, status);
+		qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+	}
+ack_done:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+bail:
+	return;
+}
+
+/**
+ * qib_rc_rcv_error - process an incoming duplicate or error RC packet
+ * @ohdr: the other headers for this packet
+ * @data: the packet data
+ * @qp: the QP for this packet
+ * @opcode: the opcode for this packet
+ * @psn: the packet sequence number for this packet
+ * @diff: the difference between the PSN and the expected PSN
+ *
+ * This is called from qib_rc_rcv() to process an unexpected
+ * incoming RC packet for the given QP.
+ * Called at interrupt level.
+ * Return 1 if no more processing is needed; otherwise return 0 to
+ * schedule a response to be sent.
+ */
+static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
+			    void *data,
+			    struct qib_qp *qp,
+			    u32 opcode,
+			    u32 psn,
+			    int diff,
+			    struct qib_ctxtdata *rcd)
+{
+	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct qib_ack_entry *e;
+	unsigned long flags;
+	u8 i, prev;
+	int old_req;
+
+	if (diff > 0) {
+		/*
+		 * Packet sequence error.
+		 * A NAK will ACK earlier sends and RDMA writes.
+		 * Don't queue the NAK if we already sent one.
+		 */
+		if (!qp->r_nak_state) {
+			ibp->n_rc_seqnak++;
+			qp->r_nak_state = IB_NAK_PSN_ERROR;
+			/* Use the expected PSN. */
+			qp->r_ack_psn = qp->r_psn;
+			/*
+			 * Wait to send the sequence NAK until all packets
+			 * in the receive queue have been processed.
+			 * Otherwise, we end up propagating congestion.
+			 */
+			if (list_empty(&qp->rspwait)) {
+				qp->r_flags |= QIB_R_RSP_NAK;
+				atomic_inc(&qp->refcount);
+				list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+			}
+		}
+		goto done;
+	}
+
+	/*
+	 * Handle a duplicate request.  Don't re-execute SEND, RDMA
+	 * write or atomic op.  Don't NAK errors, just silently drop
+	 * the duplicate request.  Note that r_sge, r_len, and
+	 * r_rcv_len may be in use so don't modify them.
+	 *
+	 * We are supposed to ACK the earliest duplicate PSN but we
+	 * can coalesce an outstanding duplicate ACK.  We have to
+	 * send the earliest so that RDMA reads can be restarted at
+	 * the requester's expected PSN.
+	 *
+	 * First, find where this duplicate PSN falls within the
+	 * ACKs previously sent.
+	 * old_req is true if there is an older response that is scheduled
+	 * to be sent before sending this one.
+	 */
+	e = NULL;
+	old_req = 1;
+	ibp->n_rc_dupreq++;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	/* Double check we can process this now that we hold the s_lock. */
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+		goto unlock_done;
+
+	for (i = qp->r_head_ack_queue; ; i = prev) {
+		if (i == qp->s_tail_ack_queue)
+			old_req = 0;
+		if (i)
+			prev = i - 1;
+		else
+			prev = QIB_MAX_RDMA_ATOMIC;
+		if (prev == qp->r_head_ack_queue) {
+			e = NULL;
+			break;
+		}
+		e = &qp->s_ack_queue[prev];
+		if (!e->opcode) {
+			e = NULL;
+			break;
+		}
+		if (qib_cmp24(psn, e->psn) >= 0) {
+			if (prev == qp->s_tail_ack_queue &&
+			    qib_cmp24(psn, e->lpsn) <= 0)
+				old_req = 0;
+			break;
+		}
+	}
+	switch (opcode) {
+	case OP(RDMA_READ_REQUEST): {
+		struct ib_reth *reth;
+		u32 offset;
+		u32 len;
+
+		/*
+		 * If we didn't find the RDMA read request in the ack queue,
+		 * we can ignore this request.
+		 */
+		if (!e || e->opcode != OP(RDMA_READ_REQUEST))
+			goto unlock_done;
+		/* RETH comes after BTH */
+		reth = &ohdr->u.rc.reth;
+		/*
+		 * Address range must be a subset of the original
+		 * request and start on pmtu boundaries.
+		 * We reuse the old ack_queue slot since the requester
+		 * should not back up and request an earlier PSN for the
+		 * same request.
+		 */
+		offset = ((psn - e->psn) & QIB_PSN_MASK) *
+			ib_mtu_enum_to_int(qp->path_mtu);
+		len = be32_to_cpu(reth->length);
+		if (unlikely(offset + len != e->rdma_sge.sge_length))
+			goto unlock_done;
+		if (e->rdma_sge.mr) {
+			atomic_dec(&e->rdma_sge.mr->refcount);
+			e->rdma_sge.mr = NULL;
+		}
+		if (len != 0) {
+			u32 rkey = be32_to_cpu(reth->rkey);
+			u64 vaddr = be64_to_cpu(reth->vaddr);
+			int ok;
+
+			ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+					 IB_ACCESS_REMOTE_READ);
+			if (unlikely(!ok))
+				goto unlock_done;
+		} else {
+			e->rdma_sge.vaddr = NULL;
+			e->rdma_sge.length = 0;
+			e->rdma_sge.sge_length = 0;
+		}
+		e->psn = psn;
+		if (old_req)
+			goto unlock_done;
+		qp->s_tail_ack_queue = prev;
+		break;
+	}
+
+	case OP(COMPARE_SWAP):
+	case OP(FETCH_ADD): {
+		/*
+		 * If we didn't find the atomic request in the ack queue
+		 * or the send tasklet is already backed up to send an
+		 * earlier entry, we can ignore this request.
+		 */
+		if (!e || e->opcode != (u8) opcode || old_req)
+			goto unlock_done;
+		qp->s_tail_ack_queue = prev;
+		break;
+	}
+
+	default:
+		/*
+		 * Ignore this operation if it doesn't request an ACK
+		 * or an earlier RDMA read or atomic is going to be resent.
+		 */
+		if (!(psn & IB_BTH_REQ_ACK) || old_req)
+			goto unlock_done;
+		/*
+		 * Resend the most recent ACK if this request is
+		 * after all the previous RDMA reads and atomics.
+		 */
+		if (i == qp->r_head_ack_queue) {
+			spin_unlock_irqrestore(&qp->s_lock, flags);
+			qp->r_nak_state = 0;
+			qp->r_ack_psn = qp->r_psn - 1;
+			goto send_ack;
+		}
+		/*
+		 * Try to send a simple ACK to work around a Mellanox bug
+		 * which doesn't accept a RDMA read response or atomic
+		 * response as an ACK for earlier SENDs or RDMA writes.
+		 */
+		if (!(qp->s_flags & QIB_S_RESP_PENDING)) {
+			spin_unlock_irqrestore(&qp->s_lock, flags);
+			qp->r_nak_state = 0;
+			qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
+			goto send_ack;
+		}
+		/*
+		 * Resend the RDMA read or atomic op which
+		 * ACKs this duplicate request.
+		 */
+		qp->s_tail_ack_queue = i;
+		break;
+	}
+	qp->s_ack_state = OP(ACKNOWLEDGE);
+	qp->s_flags |= QIB_S_RESP_PENDING;
+	qp->r_nak_state = 0;
+	qib_schedule_send(qp);
+
+unlock_done:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+	return 1;
+
+send_ack:
+	return 0;
+}
+
+void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
+{
+	unsigned long flags;
+	int lastwqe;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	lastwqe = qib_error_qp(qp, err);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
+	if (lastwqe) {
+		struct ib_event ev;
+
+		ev.device = qp->ibqp.device;
+		ev.element.qp = &qp->ibqp;
+		ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+		qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+	}
+}
+
+static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
+{
+	unsigned next;
+
+	next = n + 1;
+	if (next > QIB_MAX_RDMA_ATOMIC)
+		next = 0;
+	qp->s_tail_ack_queue = next;
+	qp->s_ack_state = OP(ACKNOWLEDGE);
+}
+
+/**
+ * qib_rc_rcv - process an incoming RC packet
+ * @rcd: the context pointer
+ * @hdr: the header of this packet
+ * @has_grh: true if the header has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP for this packet
+ *
+ * This is called from qib_qp_rcv() to process an incoming RC packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
+	struct qib_other_headers *ohdr;
+	u32 opcode;
+	u32 hdrsize;
+	u32 psn;
+	u32 pad;
+	struct ib_wc wc;
+	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+	int diff;
+	struct ib_reth *reth;
+	unsigned long flags;
+	int ret;
+
+	/* Check for GRH */
+	if (!has_grh) {
+		ohdr = &hdr->u.oth;
+		hdrsize = 8 + 12;       /* LRH + BTH */
+	} else {
+		ohdr = &hdr->u.l.oth;
+		hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
+	}
+
+	opcode = be32_to_cpu(ohdr->bth[0]);
+	spin_lock_irqsave(&qp->s_lock, flags);
+	if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
+		goto sunlock;
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
+	psn = be32_to_cpu(ohdr->bth[2]);
+	opcode >>= 24;
+
+	/* Prevent simultaneous processing after APM on different CPUs */
+	spin_lock(&qp->r_lock);
+
+	/*
+	 * Process responses (ACKs) before anything else.  Note that the
+	 * packet sequence number will be for something in the send work
+	 * queue rather than the expected receive packet sequence number.
+	 * In other words, this QP is the requester.
+	 */
+	if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+	    opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
+		qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
+				hdrsize, pmtu, rcd);
+		goto runlock;
+	}
+
+	/* Compute 24 bits worth of difference. */
+	diff = qib_cmp24(psn, qp->r_psn);
+	if (unlikely(diff)) {
+		if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
+			goto runlock;
+		goto send_ack;
+	}
+
+	/* Check for opcode sequence errors. */
+	switch (qp->r_state) {
+	case OP(SEND_FIRST):
+	case OP(SEND_MIDDLE):
+		if (opcode == OP(SEND_MIDDLE) ||
+		    opcode == OP(SEND_LAST) ||
+		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+			break;
+		goto nack_inv;
+
+	case OP(RDMA_WRITE_FIRST):
+	case OP(RDMA_WRITE_MIDDLE):
+		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
+		    opcode == OP(RDMA_WRITE_LAST) ||
+		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+			break;
+		goto nack_inv;
+
+	default:
+		if (opcode == OP(SEND_MIDDLE) ||
+		    opcode == OP(SEND_LAST) ||
+		    opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
+		    opcode == OP(RDMA_WRITE_MIDDLE) ||
+		    opcode == OP(RDMA_WRITE_LAST) ||
+		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+			goto nack_inv;
+		/*
+		 * Note that it is up to the requester to not send a new
+		 * RDMA read or atomic operation before receiving an ACK
+		 * for the previous operation.
+		 */
+		break;
+	}
+
+	memset(&wc, 0, sizeof wc);
+
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
+		qp->r_flags |= QIB_R_COMM_EST;
+		if (qp->ibqp.event_handler) {
+			struct ib_event ev;
+
+			ev.device = qp->ibqp.device;
+			ev.element.qp = &qp->ibqp;
+			ev.event = IB_EVENT_COMM_EST;
+			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+		}
+	}
+
+	/* OK, process the packet. */
+	switch (opcode) {
+	case OP(SEND_FIRST):
+		ret = qib_get_rwqe(qp, 0);
+		if (ret < 0)
+			goto nack_op_err;
+		if (!ret)
+			goto rnr_nak;
+		qp->r_rcv_len = 0;
+		/* FALLTHROUGH */
+	case OP(SEND_MIDDLE):
+	case OP(RDMA_WRITE_MIDDLE):
+send_middle:
+		/* Check for invalid length PMTU or posted rwqe len. */
+		if (unlikely(tlen != (hdrsize + pmtu + 4)))
+			goto nack_inv;
+		qp->r_rcv_len += pmtu;
+		if (unlikely(qp->r_rcv_len > qp->r_len))
+			goto nack_inv;
+		qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+		break;
+
+	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
+		/* consume RWQE */
+		ret = qib_get_rwqe(qp, 1);
+		if (ret < 0)
+			goto nack_op_err;
+		if (!ret)
+			goto rnr_nak;
+		goto send_last_imm;
+
+	case OP(SEND_ONLY):
+	case OP(SEND_ONLY_WITH_IMMEDIATE):
+		ret = qib_get_rwqe(qp, 0);
+		if (ret < 0)
+			goto nack_op_err;
+		if (!ret)
+			goto rnr_nak;
+		qp->r_rcv_len = 0;
+		if (opcode == OP(SEND_ONLY))
+			goto send_last;
+		/* FALLTHROUGH */
+	case OP(SEND_LAST_WITH_IMMEDIATE):
+send_last_imm:
+		wc.ex.imm_data = ohdr->u.imm_data;
+		hdrsize += 4;
+		wc.wc_flags = IB_WC_WITH_IMM;
+		/* FALLTHROUGH */
+	case OP(SEND_LAST):
+	case OP(RDMA_WRITE_LAST):
+send_last:
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+		/* Check for invalid length. */
+		/* XXX LAST len should be >= 1 */
+		if (unlikely(tlen < (hdrsize + pad + 4)))
+			goto nack_inv;
+		/* Don't count the CRC. */
+		tlen -= (hdrsize + pad + 4);
+		wc.byte_len = tlen + qp->r_rcv_len;
+		if (unlikely(wc.byte_len > qp->r_len))
+			goto nack_inv;
+		qib_copy_sge(&qp->r_sge, data, tlen, 1);
+		while (qp->r_sge.num_sge) {
+			atomic_dec(&qp->r_sge.sge.mr->refcount);
+			if (--qp->r_sge.num_sge)
+				qp->r_sge.sge = *qp->r_sge.sg_list++;
+		}
+		qp->r_msn++;
+		if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+			break;
+		wc.wr_id = qp->r_wr_id;
+		wc.status = IB_WC_SUCCESS;
+		if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
+		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+			wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+		else
+			wc.opcode = IB_WC_RECV;
+		wc.qp = &qp->ibqp;
+		wc.src_qp = qp->remote_qpn;
+		wc.slid = qp->remote_ah_attr.dlid;
+		wc.sl = qp->remote_ah_attr.sl;
+		/* Signal completion event if the solicited bit is set. */
+		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+			     (ohdr->bth[0] &
+			      cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+		break;
+
+	case OP(RDMA_WRITE_FIRST):
+	case OP(RDMA_WRITE_ONLY):
+	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+			goto nack_inv;
+		/* consume RWQE */
+		reth = &ohdr->u.rc.reth;
+		hdrsize += sizeof(*reth);
+		qp->r_len = be32_to_cpu(reth->length);
+		qp->r_rcv_len = 0;
+		qp->r_sge.sg_list = NULL;
+		if (qp->r_len != 0) {
+			u32 rkey = be32_to_cpu(reth->rkey);
+			u64 vaddr = be64_to_cpu(reth->vaddr);
+			int ok;
+
+			/* Check rkey & NAK */
+			ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+					 rkey, IB_ACCESS_REMOTE_WRITE);
+			if (unlikely(!ok))
+				goto nack_acc;
+			qp->r_sge.num_sge = 1;
+		} else {
+			qp->r_sge.num_sge = 0;
+			qp->r_sge.sge.mr = NULL;
+			qp->r_sge.sge.vaddr = NULL;
+			qp->r_sge.sge.length = 0;
+			qp->r_sge.sge.sge_length = 0;
+		}
+		if (opcode == OP(RDMA_WRITE_FIRST))
+			goto send_middle;
+		else if (opcode == OP(RDMA_WRITE_ONLY))
+			goto send_last;
+		ret = qib_get_rwqe(qp, 1);
+		if (ret < 0)
+			goto nack_op_err;
+		if (!ret)
+			goto rnr_nak;
+		goto send_last_imm;
+
+	case OP(RDMA_READ_REQUEST): {
+		struct qib_ack_entry *e;
+		u32 len;
+		u8 next;
+
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+			goto nack_inv;
+		next = qp->r_head_ack_queue + 1;
+		/* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
+		if (next > QIB_MAX_RDMA_ATOMIC)
+			next = 0;
+		spin_lock_irqsave(&qp->s_lock, flags);
+		/* Double check we can process this while holding the s_lock. */
+		if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+			goto srunlock;
+		if (unlikely(next == qp->s_tail_ack_queue)) {
+			if (!qp->s_ack_queue[next].sent)
+				goto nack_inv_unlck;
+			qib_update_ack_queue(qp, next);
+		}
+		e = &qp->s_ack_queue[qp->r_head_ack_queue];
+		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+			atomic_dec(&e->rdma_sge.mr->refcount);
+			e->rdma_sge.mr = NULL;
+		}
+		reth = &ohdr->u.rc.reth;
+		len = be32_to_cpu(reth->length);
+		if (len) {
+			u32 rkey = be32_to_cpu(reth->rkey);
+			u64 vaddr = be64_to_cpu(reth->vaddr);
+			int ok;
+
+			/* Check rkey & NAK */
+			ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+					 rkey, IB_ACCESS_REMOTE_READ);
+			if (unlikely(!ok))
+				goto nack_acc_unlck;
+			/*
+			 * Update the next expected PSN.  We add 1 later
+			 * below, so only add the remainder here.
+			 */
+			if (len > pmtu)
+				qp->r_psn += (len - 1) / pmtu;
+		} else {
+			e->rdma_sge.mr = NULL;
+			e->rdma_sge.vaddr = NULL;
+			e->rdma_sge.length = 0;
+			e->rdma_sge.sge_length = 0;
+		}
+		e->opcode = opcode;
+		e->sent = 0;
+		e->psn = psn;
+		e->lpsn = qp->r_psn;
+		/*
+		 * We need to increment the MSN here instead of when we
+		 * finish sending the result since a duplicate request would
+		 * increment it more than once.
+		 */
+		qp->r_msn++;
+		qp->r_psn++;
+		qp->r_state = opcode;
+		qp->r_nak_state = 0;
+		qp->r_head_ack_queue = next;
+
+		/* Schedule the send tasklet. */
+		qp->s_flags |= QIB_S_RESP_PENDING;
+		qib_schedule_send(qp);
+
+		goto srunlock;
+	}
+
+	case OP(COMPARE_SWAP):
+	case OP(FETCH_ADD): {
+		struct ib_atomic_eth *ateth;
+		struct qib_ack_entry *e;
+		u64 vaddr;
+		atomic64_t *maddr;
+		u64 sdata;
+		u32 rkey;
+		u8 next;
+
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+			goto nack_inv;
+		next = qp->r_head_ack_queue + 1;
+		if (next > QIB_MAX_RDMA_ATOMIC)
+			next = 0;
+		spin_lock_irqsave(&qp->s_lock, flags);
+		/* Double check we can process this while holding the s_lock. */
+		if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+			goto srunlock;
+		if (unlikely(next == qp->s_tail_ack_queue)) {
+			if (!qp->s_ack_queue[next].sent)
+				goto nack_inv_unlck;
+			qib_update_ack_queue(qp, next);
+		}
+		e = &qp->s_ack_queue[qp->r_head_ack_queue];
+		if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+			atomic_dec(&e->rdma_sge.mr->refcount);
+			e->rdma_sge.mr = NULL;
+		}
+		ateth = &ohdr->u.atomic_eth;
+		vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+			be32_to_cpu(ateth->vaddr[1]);
+		if (unlikely(vaddr & (sizeof(u64) - 1)))
+			goto nack_inv_unlck;
+		rkey = be32_to_cpu(ateth->rkey);
+		/* Check rkey & NAK */
+		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  vaddr, rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
+			goto nack_acc_unlck;
+		/* Perform atomic OP and save result. */
+		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		sdata = be64_to_cpu(ateth->swap_data);
+		e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+			(u64) atomic64_add_return(sdata, maddr) - sdata :
+			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+				      be64_to_cpu(ateth->compare_data),
+				      sdata);
+		atomic_dec(&qp->r_sge.sge.mr->refcount);
+		qp->r_sge.num_sge = 0;
+		e->opcode = opcode;
+		e->sent = 0;
+		e->psn = psn;
+		e->lpsn = psn;
+		qp->r_msn++;
+		qp->r_psn++;
+		qp->r_state = opcode;
+		qp->r_nak_state = 0;
+		qp->r_head_ack_queue = next;
+
+		/* Schedule the send tasklet. */
+		qp->s_flags |= QIB_S_RESP_PENDING;
+		qib_schedule_send(qp);
+
+		goto srunlock;
+	}
+
+	default:
+		/* NAK unknown opcodes. */
+		goto nack_inv;
+	}
+	qp->r_psn++;
+	qp->r_state = opcode;
+	qp->r_ack_psn = psn;
+	qp->r_nak_state = 0;
+	/* Send an ACK if requested or required. */
+	if (psn & (1 << 31))
+		goto send_ack;
+	goto runlock;
+
+rnr_nak:
+	qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+	qp->r_ack_psn = qp->r_psn;
+	/* Queue RNR NAK for later */
+	if (list_empty(&qp->rspwait)) {
+		qp->r_flags |= QIB_R_RSP_NAK;
+		atomic_inc(&qp->refcount);
+		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+	}
+	goto runlock;
+
+nack_op_err:
+	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+	qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
+	qp->r_ack_psn = qp->r_psn;
+	/* Queue NAK for later */
+	if (list_empty(&qp->rspwait)) {
+		qp->r_flags |= QIB_R_RSP_NAK;
+		atomic_inc(&qp->refcount);
+		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+	}
+	goto runlock;
+
+nack_inv_unlck:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+	qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+	qp->r_ack_psn = qp->r_psn;
+	/* Queue NAK for later */
+	if (list_empty(&qp->rspwait)) {
+		qp->r_flags |= QIB_R_RSP_NAK;
+		atomic_inc(&qp->refcount);
+		list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+	}
+	goto runlock;
+
+nack_acc_unlck:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_acc:
+	qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
+	qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+	qp->r_ack_psn = qp->r_psn;
+send_ack:
+	qib_send_rc_ack(qp);
+runlock:
+	spin_unlock(&qp->r_lock);
+	return;
+
+srunlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	spin_unlock(&qp->r_lock);
+	return;
+
+sunlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 32/52] IB/qib: Add qib_ruc.c
From: Ralph Campbell @ 2010-05-07  0:01 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_ruc.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_ruc.c |  817 +++++++++++++++++++++++++++++++++++
 1 files changed, 817 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_ruc.c

diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
new file mode 100644
index 0000000..eb78d93
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+
+#include "qib.h"
+#include "qib_mad.h"
+
+/*
+ * Convert the AETH RNR timeout code into the number of microseconds.
+ */
+const u32 ib_qib_rnr_table[32] = {
+	655360,	/* 00: 655.36 */
+	10,	/* 01:    .01 */
+	20,	/* 02     .02 */
+	30,	/* 03:    .03 */
+	40,	/* 04:    .04 */
+	60,	/* 05:    .06 */
+	80,	/* 06:    .08 */
+	120,	/* 07:    .12 */
+	160,	/* 08:    .16 */
+	240,	/* 09:    .24 */
+	320,	/* 0A:    .32 */
+	480,	/* 0B:    .48 */
+	640,	/* 0C:    .64 */
+	960,	/* 0D:    .96 */
+	1280,	/* 0E:   1.28 */
+	1920,	/* 0F:   1.92 */
+	2560,	/* 10:   2.56 */
+	3840,	/* 11:   3.84 */
+	5120,	/* 12:   5.12 */
+	7680,	/* 13:   7.68 */
+	10240,	/* 14:  10.24 */
+	15360,	/* 15:  15.36 */
+	20480,	/* 16:  20.48 */
+	30720,	/* 17:  30.72 */
+	40960,	/* 18:  40.96 */
+	61440,	/* 19:  61.44 */
+	81920,	/* 1A:  81.92 */
+	122880,	/* 1B: 122.88 */
+	163840,	/* 1C: 163.84 */
+	245760,	/* 1D: 245.76 */
+	327680,	/* 1E: 327.68 */
+	491520	/* 1F: 491.52 */
+};
+
+/*
+ * Validate a RWQE and fill in the SGE state.
+ * Return 1 if OK.
+ */
+static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
+{
+	int i, j, ret;
+	struct ib_wc wc;
+	struct qib_lkey_table *rkt;
+	struct qib_pd *pd;
+	struct qib_sge_state *ss;
+
+	rkt = &to_idev(qp->ibqp.device)->lk_table;
+	pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+	ss = &qp->r_sge;
+	ss->sg_list = qp->r_sg_list;
+	qp->r_len = 0;
+	for (i = j = 0; i < wqe->num_sge; i++) {
+		if (wqe->sg_list[i].length == 0)
+			continue;
+		/* Check LKEY */
+		if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+				 &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+			goto bad_lkey;
+		qp->r_len += wqe->sg_list[i].length;
+		j++;
+	}
+	ss->num_sge = j;
+	ss->total_len = qp->r_len;
+	ret = 1;
+	goto bail;
+
+bad_lkey:
+	while (j) {
+		struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+
+		atomic_dec(&sge->mr->refcount);
+	}
+	ss->num_sge = 0;
+	memset(&wc, 0, sizeof(wc));
+	wc.wr_id = wqe->wr_id;
+	wc.status = IB_WC_LOC_PROT_ERR;
+	wc.opcode = IB_WC_RECV;
+	wc.qp = &qp->ibqp;
+	/* Signal solicited completion event. */
+	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+	ret = 0;
+bail:
+	return ret;
+}
+
+/**
+ * qib_get_rwqe - copy the next RWQE into the QP's RWQE
+ * @qp: the QP
+ * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
+ *
+ * Return -1 if there is a local error, 0 if no RWQE is available,
+ * otherwise return 1.
+ *
+ * Can be called from interrupt level.
+ */
+int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
+{
+	unsigned long flags;
+	struct qib_rq *rq;
+	struct qib_rwq *wq;
+	struct qib_srq *srq;
+	struct qib_rwqe *wqe;
+	void (*handler)(struct ib_event *, void *);
+	u32 tail;
+	int ret;
+
+	if (qp->ibqp.srq) {
+		srq = to_isrq(qp->ibqp.srq);
+		handler = srq->ibsrq.event_handler;
+		rq = &srq->rq;
+	} else {
+		srq = NULL;
+		handler = NULL;
+		rq = &qp->r_rq;
+	}
+
+	spin_lock_irqsave(&rq->lock, flags);
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+		ret = 0;
+		goto unlock;
+	}
+
+	wq = rq->wq;
+	tail = wq->tail;
+	/* Validate tail before using it since it is user writable. */
+	if (tail >= rq->size)
+		tail = 0;
+	if (unlikely(tail == wq->head)) {
+		ret = 0;
+		goto unlock;
+	}
+	/* Make sure entry is read after head index is read. */
+	smp_rmb();
+	wqe = get_rwqe_ptr(rq, tail);
+	/*
+	 * Even though we update the tail index in memory, the verbs
+	 * consumer is not supposed to post more entries until a
+	 * completion is generated.
+	 */
+	if (++tail >= rq->size)
+		tail = 0;
+	wq->tail = tail;
+	if (!wr_id_only && !qib_init_sge(qp, wqe)) {
+		ret = -1;
+		goto unlock;
+	}
+	qp->r_wr_id = wqe->wr_id;
+
+	ret = 1;
+	set_bit(QIB_R_WRID_VALID, &qp->r_aflags);
+	if (handler) {
+		u32 n;
+
+		/*
+		 * Validate head pointer value and compute
+		 * the number of remaining WQEs.
+		 */
+		n = wq->head;
+		if (n >= rq->size)
+			n = 0;
+		if (n < tail)
+			n += rq->size - tail;
+		else
+			n -= tail;
+		if (n < srq->limit) {
+			struct ib_event ev;
+
+			srq->limit = 0;
+			spin_unlock_irqrestore(&rq->lock, flags);
+			ev.device = qp->ibqp.device;
+			ev.element.srq = qp->ibqp.srq;
+			ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+			handler(&ev, srq->ibsrq.srq_context);
+			goto bail;
+		}
+	}
+unlock:
+	spin_unlock_irqrestore(&rq->lock, flags);
+bail:
+	return ret;
+}
+
+/*
+ * Switch to alternate path.
+ * The QP s_lock should be held and interrupts disabled.
+ */
+void qib_migrate_qp(struct qib_qp *qp)
+{
+	struct ib_event ev;
+
+	qp->s_mig_state = IB_MIG_MIGRATED;
+	qp->remote_ah_attr = qp->alt_ah_attr;
+	qp->port_num = qp->alt_ah_attr.port_num;
+	qp->s_pkey_index = qp->s_alt_pkey_index;
+
+	ev.device = qp->ibqp.device;
+	ev.element.qp = &qp->ibqp;
+	ev.event = IB_EVENT_PATH_MIG;
+	qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+}
+
+static __be64 get_sguid(struct qib_ibport *ibp, unsigned index)
+{
+	if (!index) {
+		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+		return ppd->guid;
+	} else
+		return ibp->guids[index - 1];
+}
+
+static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
+{
+	return (gid->global.interface_id == id &&
+		(gid->global.subnet_prefix == gid_prefix ||
+		 gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
+}
+
+/*
+ *
+ * This should be called with the QP s_lock held.
+ */
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+		      int has_grh, struct qib_qp *qp, u32 bth0)
+{
+	__be64 guid;
+
+	if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
+		if (!has_grh) {
+			if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+				goto err;
+		} else {
+			if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+				goto err;
+			guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+				goto err;
+			if (!gid_ok(&hdr->u.l.grh.sgid,
+			    qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
+			    qp->alt_ah_attr.grh.dgid.global.interface_id))
+				goto err;
+		}
+		if (!qib_pkey_ok((u16)bth0,
+				 qib_get_pkey(ibp, qp->s_alt_pkey_index))) {
+			qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+				      (u16)bth0,
+				      (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+				      0, qp->ibqp.qp_num,
+				      hdr->lrh[3], hdr->lrh[1]);
+			goto err;
+		}
+		/* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
+		if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
+		    ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+			goto err;
+		qib_migrate_qp(qp);
+	} else {
+		if (!has_grh) {
+			if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+				goto err;
+		} else {
+			if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+				goto err;
+			guid = get_sguid(ibp,
+					 qp->remote_ah_attr.grh.sgid_index);
+			if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+				goto err;
+			if (!gid_ok(&hdr->u.l.grh.sgid,
+			    qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
+			    qp->remote_ah_attr.grh.dgid.global.interface_id))
+				goto err;
+		}
+		if (!qib_pkey_ok((u16)bth0,
+				 qib_get_pkey(ibp, qp->s_pkey_index))) {
+			qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+				      (u16)bth0,
+				      (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+				      0, qp->ibqp.qp_num,
+				      hdr->lrh[3], hdr->lrh[1]);
+			goto err;
+		}
+		/* Validate the SLID. See Ch. 9.6.1.5 */
+		if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+		    ppd_from_ibp(ibp)->port != qp->port_num)
+			goto err;
+		if (qp->s_mig_state == IB_MIG_REARM &&
+		    !(bth0 & IB_BTH_MIG_REQ))
+			qp->s_mig_state = IB_MIG_ARMED;
+	}
+
+	return 0;
+
+err:
+	return 1;
+}
+
+/**
+ * qib_ruc_loopback - handle UC and RC lookback requests
+ * @sqp: the sending QP
+ *
+ * This is called from qib_do_send() to
+ * forward a WQE addressed to the same HCA.
+ * Note that although we are single threaded due to the tasklet, we still
+ * have to protect against post_send().  We don't have to worry about
+ * receive interrupts since this is a connected protocol and all packets
+ * will pass through here.
+ */
+static void qib_ruc_loopback(struct qib_qp *sqp)
+{
+	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
+	struct qib_qp *qp;
+	struct qib_swqe *wqe;
+	struct qib_sge *sge;
+	unsigned long flags;
+	struct ib_wc wc;
+	u64 sdata;
+	atomic64_t *maddr;
+	enum ib_wc_status send_status;
+	int release;
+	int ret;
+
+	/*
+	 * Note that we check the responder QP state after
+	 * checking the requester's state.
+	 */
+	qp = qib_lookup_qpn(ibp, sqp->remote_qpn);
+
+	spin_lock_irqsave(&sqp->s_lock, flags);
+
+	/* Return if we are already busy processing a work request. */
+	if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) ||
+	    !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+		goto unlock;
+
+	sqp->s_flags |= QIB_S_BUSY;
+
+again:
+	if (sqp->s_last == sqp->s_head)
+		goto clr_busy;
+	wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+	/* Return if it is not OK to start a new work reqeust. */
+	if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND))
+			goto clr_busy;
+		/* We are in the error state, flush the work request. */
+		send_status = IB_WC_WR_FLUSH_ERR;
+		goto flush_send;
+	}
+
+	/*
+	 * We can rely on the entry not changing without the s_lock
+	 * being held until we update s_last.
+	 * We increment s_cur to indicate s_last is in progress.
+	 */
+	if (sqp->s_last == sqp->s_cur) {
+		if (++sqp->s_cur >= sqp->s_size)
+			sqp->s_cur = 0;
+	}
+	spin_unlock_irqrestore(&sqp->s_lock, flags);
+
+	if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) ||
+	    qp->ibqp.qp_type != sqp->ibqp.qp_type) {
+		ibp->n_pkt_drops++;
+		/*
+		 * For RC, the requester would timeout and retry so
+		 * shortcut the timeouts and just signal too many retries.
+		 */
+		if (sqp->ibqp.qp_type == IB_QPT_RC)
+			send_status = IB_WC_RETRY_EXC_ERR;
+		else
+			send_status = IB_WC_SUCCESS;
+		goto serr;
+	}
+
+	memset(&wc, 0, sizeof wc);
+	send_status = IB_WC_SUCCESS;
+
+	release = 1;
+	sqp->s_sge.sge = wqe->sg_list[0];
+	sqp->s_sge.sg_list = wqe->sg_list + 1;
+	sqp->s_sge.num_sge = wqe->wr.num_sge;
+	sqp->s_len = wqe->length;
+	switch (wqe->wr.opcode) {
+	case IB_WR_SEND_WITH_IMM:
+		wc.wc_flags = IB_WC_WITH_IMM;
+		wc.ex.imm_data = wqe->wr.ex.imm_data;
+		/* FALLTHROUGH */
+	case IB_WR_SEND:
+		ret = qib_get_rwqe(qp, 0);
+		if (ret < 0)
+			goto op_err;
+		if (!ret)
+			goto rnr_nak;
+		break;
+
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+			goto inv_err;
+		wc.wc_flags = IB_WC_WITH_IMM;
+		wc.ex.imm_data = wqe->wr.ex.imm_data;
+		ret = qib_get_rwqe(qp, 1);
+		if (ret < 0)
+			goto op_err;
+		if (!ret)
+			goto rnr_nak;
+		/* FALLTHROUGH */
+	case IB_WR_RDMA_WRITE:
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+			goto inv_err;
+		if (wqe->length == 0)
+			break;
+		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+					  wqe->wr.wr.rdma.remote_addr,
+					  wqe->wr.wr.rdma.rkey,
+					  IB_ACCESS_REMOTE_WRITE)))
+			goto acc_err;
+		qp->r_sge.sg_list = NULL;
+		qp->r_sge.num_sge = 1;
+		qp->r_sge.total_len = wqe->length;
+		break;
+
+	case IB_WR_RDMA_READ:
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+			goto inv_err;
+		if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+					  wqe->wr.wr.rdma.remote_addr,
+					  wqe->wr.wr.rdma.rkey,
+					  IB_ACCESS_REMOTE_READ)))
+			goto acc_err;
+		release = 0;
+		sqp->s_sge.sg_list = NULL;
+		sqp->s_sge.num_sge = 1;
+		qp->r_sge.sge = wqe->sg_list[0];
+		qp->r_sge.sg_list = wqe->sg_list + 1;
+		qp->r_sge.num_sge = wqe->wr.num_sge;
+		qp->r_sge.total_len = wqe->length;
+		break;
+
+	case IB_WR_ATOMIC_CMP_AND_SWP:
+	case IB_WR_ATOMIC_FETCH_AND_ADD:
+		if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+			goto inv_err;
+		if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+					  wqe->wr.wr.atomic.remote_addr,
+					  wqe->wr.wr.atomic.rkey,
+					  IB_ACCESS_REMOTE_ATOMIC)))
+			goto acc_err;
+		/* Perform atomic OP and save result. */
+		maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+		sdata = wqe->wr.wr.atomic.compare_add;
+		*(u64 *) sqp->s_sge.sge.vaddr =
+			(wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+			(u64) atomic64_add_return(sdata, maddr) - sdata :
+			(u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+				      sdata, wqe->wr.wr.atomic.swap);
+		atomic_dec(&qp->r_sge.sge.mr->refcount);
+		qp->r_sge.num_sge = 0;
+		goto send_comp;
+
+	default:
+		send_status = IB_WC_LOC_QP_OP_ERR;
+		goto serr;
+	}
+
+	sge = &sqp->s_sge.sge;
+	while (sqp->s_len) {
+		u32 len = sqp->s_len;
+
+		if (len > sge->length)
+			len = sge->length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (!release)
+				atomic_dec(&sge->mr->refcount);
+			if (--sqp->s_sge.num_sge)
+				*sge = *sqp->s_sge.sg_list++;
+		} else if (sge->length == 0 && sge->mr->lkey) {
+			if (++sge->n >= QIB_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		sqp->s_len -= len;
+	}
+	if (release)
+		while (qp->r_sge.num_sge) {
+			atomic_dec(&qp->r_sge.sge.mr->refcount);
+			if (--qp->r_sge.num_sge)
+				qp->r_sge.sge = *qp->r_sge.sg_list++;
+		}
+
+	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+		goto send_comp;
+
+	if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+	else
+		wc.opcode = IB_WC_RECV;
+	wc.wr_id = qp->r_wr_id;
+	wc.status = IB_WC_SUCCESS;
+	wc.byte_len = wqe->length;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = qp->remote_qpn;
+	wc.slid = qp->remote_ah_attr.dlid;
+	wc.sl = qp->remote_ah_attr.sl;
+	wc.port_num = 1;
+	/* Signal completion event if the solicited bit is set. */
+	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		       wqe->wr.send_flags & IB_SEND_SOLICITED);
+
+send_comp:
+	spin_lock_irqsave(&sqp->s_lock, flags);
+	ibp->n_loop_pkts++;
+flush_send:
+	sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+	qib_send_complete(sqp, wqe, send_status);
+	goto again;
+
+rnr_nak:
+	/* Handle RNR NAK */
+	if (qp->ibqp.qp_type == IB_QPT_UC)
+		goto send_comp;
+	ibp->n_rnr_naks++;
+	/*
+	 * Note: we don't need the s_lock held since the BUSY flag
+	 * makes this single threaded.
+	 */
+	if (sqp->s_rnr_retry == 0) {
+		send_status = IB_WC_RNR_RETRY_EXC_ERR;
+		goto serr;
+	}
+	if (sqp->s_rnr_retry_cnt < 7)
+		sqp->s_rnr_retry--;
+	spin_lock_irqsave(&sqp->s_lock, flags);
+	if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK))
+		goto clr_busy;
+	sqp->s_flags |= QIB_S_WAIT_RNR;
+	sqp->s_timer.function = qib_rc_rnr_retry;
+	sqp->s_timer.expires = jiffies +
+		usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
+	add_timer(&sqp->s_timer);
+	goto clr_busy;
+
+op_err:
+	send_status = IB_WC_REM_OP_ERR;
+	wc.status = IB_WC_LOC_QP_OP_ERR;
+	goto err;
+
+inv_err:
+	send_status = IB_WC_REM_INV_REQ_ERR;
+	wc.status = IB_WC_LOC_QP_OP_ERR;
+	goto err;
+
+acc_err:
+	send_status = IB_WC_REM_ACCESS_ERR;
+	wc.status = IB_WC_LOC_PROT_ERR;
+err:
+	/* responder goes to error state */
+	qib_rc_error(qp, wc.status);
+
+serr:
+	spin_lock_irqsave(&sqp->s_lock, flags);
+	qib_send_complete(sqp, wqe, send_status);
+	if (sqp->ibqp.qp_type == IB_QPT_RC) {
+		int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+		sqp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock_irqrestore(&sqp->s_lock, flags);
+		if (lastwqe) {
+			struct ib_event ev;
+
+			ev.device = sqp->ibqp.device;
+			ev.element.qp = &sqp->ibqp;
+			ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+			sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+		}
+		goto done;
+	}
+clr_busy:
+	sqp->s_flags &= ~QIB_S_BUSY;
+unlock:
+	spin_unlock_irqrestore(&sqp->s_lock, flags);
+done:
+	if (qp && atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+/**
+ * qib_make_grh - construct a GRH header
+ * @ibp: a pointer to the IB port
+ * @hdr: a pointer to the GRH header being constructed
+ * @grh: the global route address to send to
+ * @hwords: the number of 32 bit words of header being sent
+ * @nwords: the number of 32 bit words of data being sent
+ *
+ * Return the size of the header in 32 bit words.
+ */
+u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
+		 struct ib_global_route *grh, u32 hwords, u32 nwords)
+{
+	hdr->version_tclass_flow =
+		cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
+			    (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
+			    (grh->flow_label << IB_GRH_FLOW_SHIFT));
+	hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
+	/* next_hdr is defined by C8-7 in ch. 8.4.1 */
+	hdr->next_hdr = IB_GRH_NEXT_HDR;
+	hdr->hop_limit = grh->hop_limit;
+	/* The SGID is 32-bit aligned. */
+	hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+	hdr->sgid.global.interface_id = grh->sgid_index ?
+		ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid;
+	hdr->dgid = grh->dgid;
+
+	/* GRH header size in 32-bit words. */
+	return sizeof(struct ib_grh) / sizeof(u32);
+}
+
+void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+			 u32 bth0, u32 bth2)
+{
+	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	u16 lrh0;
+	u32 nwords;
+	u32 extra_bytes;
+
+	/* Construct the header. */
+	extra_bytes = -qp->s_cur_size & 3;
+	nwords = (qp->s_cur_size + extra_bytes) >> 2;
+	lrh0 = QIB_LRH_BTH;
+	if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
+					       &qp->remote_ah_attr.grh,
+					       qp->s_hdrwords, nwords);
+		lrh0 = QIB_LRH_GRH;
+	}
+	lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
+		qp->remote_ah_attr.sl << 4;
+	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+	qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+				       qp->remote_ah_attr.src_path_bits);
+	bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
+	bth0 |= extra_bytes << 20;
+	if (qp->s_mig_state == IB_MIG_MIGRATED)
+		bth0 |= IB_BTH_MIG_REQ;
+	ohdr->bth[0] = cpu_to_be32(bth0);
+	ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+	ohdr->bth[2] = cpu_to_be32(bth2);
+}
+
+/**
+ * qib_do_send - perform a send on a QP
+ * @work: contains a pointer to the QP
+ *
+ * Process entries in the send work queue until credit or queue is
+ * exhausted.  Only allow one CPU to send a packet per QP (tasklet).
+ * Otherwise, two threads could send packets out of order.
+ */
+void qib_do_send(struct work_struct *work)
+{
+	struct qib_qp *qp = container_of(work, struct qib_qp, s_work);
+	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	int (*make_req)(struct qib_qp *qp);
+	unsigned long flags;
+
+	if ((qp->ibqp.qp_type == IB_QPT_RC ||
+	     qp->ibqp.qp_type == IB_QPT_UC) &&
+	    (qp->remote_ah_attr.dlid & ~((1 << ppd->lmc) - 1)) == ppd->lid) {
+		qib_ruc_loopback(qp);
+		return;
+	}
+
+	if (qp->ibqp.qp_type == IB_QPT_RC)
+		make_req = qib_make_rc_req;
+	else if (qp->ibqp.qp_type == IB_QPT_UC)
+		make_req = qib_make_uc_req;
+	else
+		make_req = qib_make_ud_req;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	/* Return if we are already busy processing a work request. */
+	if (!qib_send_ok(qp)) {
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+		return;
+	}
+
+	qp->s_flags |= QIB_S_BUSY;
+
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
+	do {
+		/* Check for a constructed packet to be sent. */
+		if (qp->s_hdrwords != 0) {
+			/*
+			 * If the packet cannot be sent now, return and
+			 * the send tasklet will be woken up later.
+			 */
+			if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
+					   qp->s_cur_sge, qp->s_cur_size))
+				break;
+			/* Record that s_hdr is empty. */
+			qp->s_hdrwords = 0;
+		}
+	} while (make_req(qp));
+}
+
+/*
+ * This should be called with s_lock held.
+ */
+void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+		       enum ib_wc_status status)
+{
+	u32 old_last, last;
+	unsigned i;
+
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+		return;
+
+	for (i = 0; i < wqe->wr.num_sge; i++) {
+		struct qib_sge *sge = &wqe->sg_list[i];
+
+		atomic_dec(&sge->mr->refcount);
+	}
+	if (qp->ibqp.qp_type == IB_QPT_UD ||
+	    qp->ibqp.qp_type == IB_QPT_SMI ||
+	    qp->ibqp.qp_type == IB_QPT_GSI)
+		atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+
+	/* See ch. 11.2.4.1 and 10.7.3.1 */
+	if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+	    (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+	    status != IB_WC_SUCCESS) {
+		struct ib_wc wc;
+
+		memset(&wc, 0, sizeof wc);
+		wc.wr_id = wqe->wr.wr_id;
+		wc.status = status;
+		wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+		wc.qp = &qp->ibqp;
+		if (status == IB_WC_SUCCESS)
+			wc.byte_len = wqe->length;
+		qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+			     status != IB_WC_SUCCESS);
+	}
+
+	last = qp->s_last;
+	old_last = last;
+	if (++last >= qp->s_size)
+		last = 0;
+	qp->s_last = last;
+	if (qp->s_acked == old_last)
+		qp->s_acked = last;
+	if (qp->s_cur == old_last)
+		qp->s_cur = last;
+	if (qp->s_tail == old_last)
+		qp->s_tail = last;
+	if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+		qp->s_draining = 0;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 33/52] IB/qib: Add qib_sd7220.c
From: Ralph Campbell @ 2010-05-07  0:01 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_sd7220.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_sd7220.c | 1415 ++++++++++++++++++++++++++++++++
 1 files changed, 1415 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_sd7220.c

diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
new file mode 100644
index 0000000..17af3cd
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -0,0 +1,1415 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the SerDes
+ * on the QLogic_IB 7220 chip.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_7220.h"
+
+/*
+ * Same as in qib_iba7220.c, but just the registers needed here.
+ * Could move whole set to qib_7220.h, but decided better to keep
+ * local.
+ */
+#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64))
+#define kr_hwerrclear KREG_IDX(HwErrClear)
+#define kr_hwerrmask KREG_IDX(HwErrMask)
+#define kr_hwerrstatus KREG_IDX(HwErrStatus)
+#define kr_ibcstatus KREG_IDX(IBCStatus)
+#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl)
+#define kr_scratch KREG_IDX(Scratch)
+#define kr_xgxs_cfg KREG_IDX(XGXSCfg)
+/* these are used only here, not in qib_iba7220.c */
+#define kr_ibsd_epb_access_ctrl KREG_IDX(ibsd_epb_access_ctrl)
+#define kr_ibsd_epb_transaction_reg KREG_IDX(ibsd_epb_transaction_reg)
+#define kr_pciesd_epb_transaction_reg KREG_IDX(pciesd_epb_transaction_reg)
+#define kr_pciesd_epb_access_ctrl KREG_IDX(pciesd_epb_access_ctrl)
+#define kr_serdes_ddsrxeq0 KREG_IDX(SerDes_DDSRXEQ0)
+
+/*
+ * The IBSerDesMappTable is a memory that holds values to be stored in
+ * various SerDes registers by IBC.
+ */
+#define kr_serdes_maptable KREG_IDX(IBSerDesMappTable)
+
+/*
+ * Below used for sdnum parameter, selecting one of the two sections
+ * used for PCIe, or the single SerDes used for IB.
+ */
+#define PCIE_SERDES0 0
+#define PCIE_SERDES1 1
+
+/*
+ * The EPB requires addressing in a particular form. EPB_LOC() is intended
+ * to make #definitions a little more readable.
+ */
+#define EPB_ADDR_SHF 8
+#define EPB_LOC(chn, elt, reg) \
+	(((elt & 0xf) | ((chn & 7) << 4) | ((reg & 0x3f) << 9)) << \
+	 EPB_ADDR_SHF)
+#define EPB_IB_QUAD0_CS_SHF (25)
+#define EPB_IB_QUAD0_CS (1U <<  EPB_IB_QUAD0_CS_SHF)
+#define EPB_IB_UC_CS_SHF (26)
+#define EPB_PCIE_UC_CS_SHF (27)
+#define EPB_GLOBAL_WR (1U << (EPB_ADDR_SHF + 8))
+
+/* Forward declarations. */
+static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc,
+			      u32 data, u32 mask);
+static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
+			     int mask);
+static int qib_sd_trimdone_poll(struct qib_devdata *dd);
+static void qib_sd_trimdone_monitor(struct qib_devdata *dd, const char *where);
+static int qib_sd_setvals(struct qib_devdata *dd);
+static int qib_sd_early(struct qib_devdata *dd);
+static int qib_sd_dactrim(struct qib_devdata *dd);
+static int qib_internal_presets(struct qib_devdata *dd);
+/* Tweak the register (CMUCTRL5) that contains the TRIMSELF controls */
+static int qib_sd_trimself(struct qib_devdata *dd, int val);
+static int epb_access(struct qib_devdata *dd, int sdnum, int claim);
+
+/*
+ * Below keeps track of whether the "once per power-on" initialization has
+ * been done, because uC code Version 1.32.17 or higher allows the uC to
+ * be reset at will, and Automatic Equalization may require it. So the
+ * state of the reset "pin", is no longer valid. Instead, we check for the
+ * actual uC code having been loaded.
+ */
+static int qib_ibsd_ucode_loaded(struct qib_pportdata *ppd)
+{
+	struct qib_devdata *dd = ppd->dd;
+	if (!dd->cspec->serdes_first_init_done && (qib_sd7220_ib_vfy(dd) > 0))
+		dd->cspec->serdes_first_init_done = 1;
+	return dd->cspec->serdes_first_init_done;
+}
+
+/* repeat #define for local use. "Real" #define is in qib_iba7220.c */
+#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR      0x0000004000000000ULL
+#define IB_MPREG5 (EPB_LOC(6, 0, 0xE) | (1L << EPB_IB_UC_CS_SHF))
+#define IB_MPREG6 (EPB_LOC(6, 0, 0xF) | (1U << EPB_IB_UC_CS_SHF))
+#define UC_PAR_CLR_D 8
+#define UC_PAR_CLR_M 0xC
+#define IB_CTRL2(chn) (EPB_LOC(chn, 7, 3) | EPB_IB_QUAD0_CS)
+#define START_EQ1(chan) EPB_LOC(chan, 7, 0x27)
+
+void qib_sd7220_clr_ibpar(struct qib_devdata *dd)
+{
+	int ret;
+
+	/* clear, then re-enable parity errs */
+	ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6,
+		UC_PAR_CLR_D, UC_PAR_CLR_M);
+	if (ret < 0) {
+		qib_dev_err(dd, "Failed clearing IBSerDes Parity err\n");
+		goto bail;
+	}
+	ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0,
+		UC_PAR_CLR_M);
+
+	qib_read_kreg32(dd, kr_scratch);
+	udelay(4);
+	qib_write_kreg(dd, kr_hwerrclear,
+		QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+	qib_read_kreg32(dd, kr_scratch);
+bail:
+	return;
+}
+
+/*
+ * After a reset or other unusual event, the epb interface may need
+ * to be re-synchronized, between the host and the uC.
+ * returns <0 for failure to resync within IBSD_RESYNC_TRIES (not expected)
+ */
+#define IBSD_RESYNC_TRIES 3
+#define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS)
+#define IB_CMUDONE(chn) (EPB_LOC((chn), 7, 0xF) | EPB_IB_QUAD0_CS)
+
+static int qib_resync_ibepb(struct qib_devdata *dd)
+{
+	int ret, pat, tries, chn;
+	u32 loc;
+
+	ret = -1;
+	chn = 0;
+	for (tries = 0; tries < (4 * IBSD_RESYNC_TRIES); ++tries) {
+		loc = IB_PGUDP(chn);
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+		if (ret < 0) {
+			qib_dev_err(dd, "Failed read in resync\n");
+			continue;
+		}
+		if (ret != 0xF0 && ret != 0x55 && tries == 0)
+			qib_dev_err(dd, "unexpected pattern in resync\n");
+		pat = ret ^ 0xA5; /* alternate F0 and 55 */
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF);
+		if (ret < 0) {
+			qib_dev_err(dd, "Failed write in resync\n");
+			continue;
+		}
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+		if (ret < 0) {
+			qib_dev_err(dd, "Failed re-read in resync\n");
+			continue;
+		}
+		if (ret != pat) {
+			qib_dev_err(dd, "Failed compare1 in resync\n");
+			continue;
+		}
+		loc = IB_CMUDONE(chn);
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+		if (ret < 0) {
+			qib_dev_err(dd, "Failed CMUDONE rd in resync\n");
+			continue;
+		}
+		if ((ret & 0x70) != ((chn << 4) | 0x40)) {
+			qib_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n",
+				    ret, chn);
+			continue;
+		}
+		if (++chn == 4)
+			break;  /* Success */
+	}
+	return (ret > 0) ? 0 : ret;
+}
+
+/*
+ * Localize the stuff that should be done to change IB uC reset
+ * returns <0 for errors.
+ */
+static int qib_ibsd_reset(struct qib_devdata *dd, int assert_rst)
+{
+	u64 rst_val;
+	int ret = 0;
+	unsigned long flags;
+
+	rst_val = qib_read_kreg64(dd, kr_ibserdesctrl);
+	if (assert_rst) {
+		/*
+		 * Vendor recommends "interrupting" uC before reset, to
+		 * minimize possible glitches.
+		 */
+		spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+		epb_access(dd, IB_7220_SERDES, 1);
+		rst_val |= 1ULL;
+		/* Squelch possible parity error from _asserting_ reset */
+		qib_write_kreg(dd, kr_hwerrmask,
+			       dd->cspec->hwerrmask &
+			       ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+		qib_write_kreg(dd, kr_ibserdesctrl, rst_val);
+		/* flush write, delay to ensure it took effect */
+		qib_read_kreg32(dd, kr_scratch);
+		udelay(2);
+		/* once it's reset, can remove interrupt */
+		epb_access(dd, IB_7220_SERDES, -1);
+		spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+	} else {
+		/*
+		 * Before we de-assert reset, we need to deal with
+		 * possible glitch on the Parity-error line.
+		 * Suppress it around the reset, both in chip-level
+		 * hwerrmask and in IB uC control reg. uC will allow
+		 * it again during startup.
+		 */
+		u64 val;
+		rst_val &= ~(1ULL);
+		qib_write_kreg(dd, kr_hwerrmask,
+			       dd->cspec->hwerrmask &
+			       ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+
+		ret = qib_resync_ibepb(dd);
+		if (ret < 0)
+			qib_dev_err(dd, "unable to re-sync IB EPB\n");
+
+		/* set uC control regs to suppress parity errs */
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1);
+		if (ret < 0)
+			goto bail;
+		/* IB uC code past Version 1.32.17 allow suppression of wdog */
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80,
+			0x80);
+		if (ret < 0) {
+			qib_dev_err(dd, "Failed to set WDOG disable\n");
+			goto bail;
+		}
+		qib_write_kreg(dd, kr_ibserdesctrl, rst_val);
+		/* flush write, delay for startup */
+		qib_read_kreg32(dd, kr_scratch);
+		udelay(1);
+		/* clear, then re-enable parity errs */
+		qib_sd7220_clr_ibpar(dd);
+		val = qib_read_kreg64(dd, kr_hwerrstatus);
+		if (val & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) {
+			qib_dev_err(dd, "IBUC Parity still set after RST\n");
+			dd->cspec->hwerrmask &=
+				~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR;
+		}
+		qib_write_kreg(dd, kr_hwerrmask,
+			dd->cspec->hwerrmask);
+	}
+
+bail:
+	return ret;
+}
+
+static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
+       const char *where)
+{
+	int ret, chn, baduns;
+	u64 val;
+
+	if (!where)
+		where = "?";
+
+	/* give time for reset to settle out in EPB */
+	udelay(2);
+
+	ret = qib_resync_ibepb(dd);
+	if (ret < 0)
+		qib_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where);
+
+	/* Do "sacrificial read" to get EPB in sane state after reset */
+	ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0);
+	if (ret < 0)
+		qib_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where);
+
+	/* Check/show "summary" Trim-done bit in IBCStatus */
+	val = qib_read_kreg64(dd, kr_ibcstatus);
+	if (!(val & (1ULL << 11)))
+		qib_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where);
+	/*
+	 * Do "dummy read/mod/wr" to get EPB in sane state after reset
+	 * The default value for MPREG6 is 0.
+	 */
+	udelay(2);
+
+	ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80);
+	if (ret < 0)
+		qib_dev_err(dd, "Failed Dummy RMW, (%s)\n", where);
+	udelay(10);
+
+	baduns = 0;
+
+	for (chn = 3; chn >= 0; --chn) {
+		/* Read CTRL reg for each channel to check TRIMDONE */
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+			IB_CTRL2(chn), 0, 0);
+		if (ret < 0)
+			qib_dev_err(dd, "Failed checking TRIMDONE, chn %d"
+				    " (%s)\n", chn, where);
+
+		if (!(ret & 0x10)) {
+			int probe;
+
+			baduns |= (1 << chn);
+			qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
+				" (%s)\n", chn, ret, where);
+			probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+				IB_PGUDP(0), 0, 0);
+			qib_dev_err(dd, "probe is %d (%02X)\n",
+				probe, probe);
+			probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+				IB_CTRL2(chn), 0, 0);
+			qib_dev_err(dd, "re-read: %d (%02X)\n",
+				probe, probe);
+			ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+				IB_CTRL2(chn), 0x10, 0x10);
+			if (ret < 0)
+				qib_dev_err(dd,
+					"Err on TRIMDONE rewrite1\n");
+		}
+	}
+	for (chn = 3; chn >= 0; --chn) {
+		/* Read CTRL reg for each channel to check TRIMDONE */
+		if (baduns & (1 << chn)) {
+			qib_dev_err(dd,
+				"Reseting TRIMDONE on chn %d (%s)\n",
+				chn, where);
+			ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+				IB_CTRL2(chn), 0x10, 0x10);
+			if (ret < 0)
+				qib_dev_err(dd, "Failed re-setting "
+					"TRIMDONE, chn %d (%s)\n",
+					chn, where);
+		}
+	}
+}
+
+/*
+ * Below is portion of IBA7220-specific bringup_serdes() that actually
+ * deals with registers and memory within the SerDes itself.
+ * Post IB uC code version 1.32.17, was_reset being 1 is not really
+ * informative, so we double-check.
+ */
+int qib_sd7220_init(struct qib_devdata *dd)
+{
+	int ret = 1; /* default to failure */
+	int first_reset, was_reset;
+
+	/* SERDES MPU reset recorded in D0 */
+	was_reset = (qib_read_kreg64(dd, kr_ibserdesctrl) & 1);
+	if (!was_reset) {
+		/* entered with reset not asserted, we need to do it */
+		qib_ibsd_reset(dd, 1);
+		qib_sd_trimdone_monitor(dd, "Driver-reload");
+	}
+	/* Substitute our deduced value for was_reset */
+	ret = qib_ibsd_ucode_loaded(dd->pport);
+	if (ret < 0)
+		goto bail;
+
+	first_reset = !ret; /* First reset if IBSD uCode not yet loaded */
+	/*
+	 * Alter some regs per vendor latest doc, reset-defaults
+	 * are not right for IB.
+	 */
+	ret = qib_sd_early(dd);
+	if (ret < 0) {
+		qib_dev_err(dd, "Failed to set IB SERDES early defaults\n");
+		goto bail;
+	}
+	/*
+	 * Set DAC manual trim IB.
+	 * We only do this once after chip has been reset (usually
+	 * same as once per system boot).
+	 */
+	if (first_reset) {
+		ret = qib_sd_dactrim(dd);
+		if (ret < 0) {
+			qib_dev_err(dd, "Failed IB SERDES DAC trim\n");
+			goto bail;
+		}
+	}
+	/*
+	 * Set various registers (DDS and RXEQ) that will be
+	 * controlled by IBC (in 1.2 mode) to reasonable preset values
+	 * Calling the "internal" version avoids the "check for needed"
+	 * and "trimdone monitor" that might be counter-productive.
+	 */
+	ret = qib_internal_presets(dd);
+	if (ret < 0) {
+		qib_dev_err(dd, "Failed to set IB SERDES presets\n");
+		goto bail;
+	}
+	ret = qib_sd_trimself(dd, 0x80);
+	if (ret < 0) {
+		qib_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n");
+		goto bail;
+	}
+
+	/* Load image, then try to verify */
+	ret = 0;        /* Assume success */
+	if (first_reset) {
+		int vfy;
+		int trim_done;
+
+		ret = qib_sd7220_ib_load(dd);
+		if (ret < 0) {
+			qib_dev_err(dd, "Failed to load IB SERDES image\n");
+			goto bail;
+		} else {
+			/* Loaded image, try to verify */
+			vfy = qib_sd7220_ib_vfy(dd);
+			if (vfy != ret) {
+				qib_dev_err(dd, "SERDES PRAM VFY failed\n");
+				goto bail;
+			} /* end if verified */
+		} /* end if loaded */
+
+		/*
+		 * Loaded and verified. Almost good...
+		 * hold "success" in ret
+		 */
+		ret = 0;
+		/*
+		 * Prev steps all worked, continue bringup
+		 * De-assert RESET to uC, only in first reset, to allow
+		 * trimming.
+		 *
+		 * Since our default setup sets START_EQ1 to
+		 * PRESET, we need to clear that for this very first run.
+		 */
+		ret = ibsd_mod_allchnls(dd, START_EQ1(0), 0, 0x38);
+		if (ret < 0) {
+			qib_dev_err(dd, "Failed clearing START_EQ1\n");
+			goto bail;
+		}
+
+		qib_ibsd_reset(dd, 0);
+		/*
+		 * If this is not the first reset, trimdone should be set
+		 * already. We may need to check about this.
+		 */
+		trim_done = qib_sd_trimdone_poll(dd);
+		/*
+		 * Whether or not trimdone succeeded, we need to put the
+		 * uC back into reset to avoid a possible fight with the
+		 * IBC state-machine.
+		 */
+		qib_ibsd_reset(dd, 1);
+
+		if (!trim_done) {
+			qib_dev_err(dd, "No TRIMDONE seen\n");
+			goto bail;
+		}
+		/*
+		 * DEBUG: check each time we reset if trimdone bits have
+		 * gotten cleared, and re-set them.
+		 */
+		qib_sd_trimdone_monitor(dd, "First-reset");
+		/* Remember so we do not re-do the load, dactrim, etc. */
+		dd->cspec->serdes_first_init_done = 1;
+	}
+	/*
+	 * setup for channel training and load values for
+	 * RxEq and DDS in tables used by IBC in IB1.2 mode
+	 */
+	ret = 0;
+	if (qib_sd_setvals(dd) >= 0)
+		goto done;
+bail:
+	ret = 1;
+done:
+	/* start relock timer regardless, but start at 1 second */
+	set_7220_relock_poll(dd, -1);
+	return ret;
+}
+
+#define EPB_ACC_REQ 1
+#define EPB_ACC_GNT 0x100
+#define EPB_DATA_MASK 0xFF
+#define EPB_RD (1ULL << 24)
+#define EPB_TRANS_RDY (1ULL << 31)
+#define EPB_TRANS_ERR (1ULL << 30)
+#define EPB_TRANS_TRIES 5
+
+/*
+ * query, claim, release ownership of the EPB (External Parallel Bus)
+ * for a specified SERDES.
+ * the "claim" parameter is >0 to claim, <0 to release, 0 to query.
+ * Returns <0 for errors, >0 if we had ownership, else 0.
+ */
+static int epb_access(struct qib_devdata *dd, int sdnum, int claim)
+{
+	u16 acc;
+	u64 accval;
+	int owned = 0;
+	u64 oct_sel = 0;
+
+	switch (sdnum) {
+	case IB_7220_SERDES:
+		/*
+		 * The IB SERDES "ownership" is fairly simple. A single each
+		 * request/grant.
+		 */
+		acc = kr_ibsd_epb_access_ctrl;
+		break;
+
+	case PCIE_SERDES0:
+	case PCIE_SERDES1:
+		/* PCIe SERDES has two "octants", need to select which */
+		acc = kr_pciesd_epb_access_ctrl;
+		oct_sel = (2 << (sdnum - PCIE_SERDES0));
+		break;
+
+	default:
+		return 0;
+	}
+
+	/* Make sure any outstanding transaction was seen */
+	qib_read_kreg32(dd, kr_scratch);
+	udelay(15);
+
+	accval = qib_read_kreg32(dd, acc);
+
+	owned = !!(accval & EPB_ACC_GNT);
+	if (claim < 0) {
+		/* Need to release */
+		u64 pollval;
+		/*
+		 * The only writeable bits are the request and CS.
+		 * Both should be clear
+		 */
+		u64 newval = 0;
+		qib_write_kreg(dd, acc, newval);
+		/* First read after write is not trustworthy */
+		pollval = qib_read_kreg32(dd, acc);
+		udelay(5);
+		pollval = qib_read_kreg32(dd, acc);
+		if (pollval & EPB_ACC_GNT)
+			owned = -1;
+	} else if (claim > 0) {
+		/* Need to claim */
+		u64 pollval;
+		u64 newval = EPB_ACC_REQ | oct_sel;
+		qib_write_kreg(dd, acc, newval);
+		/* First read after write is not trustworthy */
+		pollval = qib_read_kreg32(dd, acc);
+		udelay(5);
+		pollval = qib_read_kreg32(dd, acc);
+		if (!(pollval & EPB_ACC_GNT))
+			owned = -1;
+	}
+	return owned;
+}
+
+/*
+ * Lemma to deal with race condition of write..read to epb regs
+ */
+static int epb_trans(struct qib_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
+{
+	int tries;
+	u64 transval;
+
+	qib_write_kreg(dd, reg, i_val);
+	/* Throw away first read, as RDY bit may be stale */
+	transval = qib_read_kreg64(dd, reg);
+
+	for (tries = EPB_TRANS_TRIES; tries; --tries) {
+		transval = qib_read_kreg32(dd, reg);
+		if (transval & EPB_TRANS_RDY)
+			break;
+		udelay(5);
+	}
+	if (transval & EPB_TRANS_ERR)
+		return -1;
+	if (tries > 0 && o_vp)
+		*o_vp = transval;
+	return tries;
+}
+
+/**
+ * qib_sd7220_reg_mod - modify SERDES register
+ * @dd: the qlogic_ib device
+ * @sdnum: which SERDES to access
+ * @loc: location - channel, element, register, as packed by EPB_LOC() macro.
+ * @wd: Write Data - value to set in register
+ * @mask: ones where data should be spliced into reg.
+ *
+ * Basic register read/modify/write, with un-needed acesses elided. That is,
+ * a mask of zero will prevent write, while a mask of 0xFF will prevent read.
+ * returns current (presumed, if a write was done) contents of selected
+ * register, or <0 if errors.
+ */
+static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc,
+			      u32 wd, u32 mask)
+{
+	u16 trans;
+	u64 transval;
+	int owned;
+	int tries, ret;
+	unsigned long flags;
+
+	switch (sdnum) {
+	case IB_7220_SERDES:
+		trans = kr_ibsd_epb_transaction_reg;
+		break;
+
+	case PCIE_SERDES0:
+	case PCIE_SERDES1:
+		trans = kr_pciesd_epb_transaction_reg;
+		break;
+
+	default:
+		return -1;
+	}
+
+	/*
+	 * All access is locked in software (vs other host threads) and
+	 * hardware (vs uC access).
+	 */
+	spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+
+	owned = epb_access(dd, sdnum, 1);
+	if (owned < 0) {
+		spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+		return -1;
+	}
+	ret = 0;
+	for (tries = EPB_TRANS_TRIES; tries; --tries) {
+		transval = qib_read_kreg32(dd, trans);
+		if (transval & EPB_TRANS_RDY)
+			break;
+		udelay(5);
+	}
+
+	if (tries > 0) {
+		tries = 1;      /* to make read-skip work */
+		if (mask != 0xFF) {
+			/*
+			 * Not a pure write, so need to read.
+			 * loc encodes chip-select as well as address
+			 */
+			transval = loc | EPB_RD;
+			tries = epb_trans(dd, trans, transval, &transval);
+		}
+		if (tries > 0 && mask != 0) {
+			/*
+			 * Not a pure read, so need to write.
+			 */
+			wd = (wd & mask) | (transval & ~mask);
+			transval = loc | (wd & EPB_DATA_MASK);
+			tries = epb_trans(dd, trans, transval, &transval);
+		}
+	}
+	/* else, failed to see ready, what error-handling? */
+
+	/*
+	 * Release bus. Failure is an error.
+	 */
+	if (epb_access(dd, sdnum, -1) < 0)
+		ret = -1;
+	else
+		ret = transval & EPB_DATA_MASK;
+
+	spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+	if (tries <= 0)
+		ret = -1;
+	return ret;
+}
+
+#define EPB_ROM_R (2)
+#define EPB_ROM_W (1)
+/*
+ * Below, all uC-related, use appropriate UC_CS, depending
+ * on which SerDes is used.
+ */
+#define EPB_UC_CTL EPB_LOC(6, 0, 0)
+#define EPB_MADDRL EPB_LOC(6, 0, 2)
+#define EPB_MADDRH EPB_LOC(6, 0, 3)
+#define EPB_ROMDATA EPB_LOC(6, 0, 4)
+#define EPB_RAMDATA EPB_LOC(6, 0, 5)
+
+/* Transfer date to/from uC Program RAM of IB or PCIe SerDes */
+static int qib_sd7220_ram_xfer(struct qib_devdata *dd, int sdnum, u32 loc,
+			       u8 *buf, int cnt, int rd_notwr)
+{
+	u16 trans;
+	u64 transval;
+	u64 csbit;
+	int owned;
+	int tries;
+	int sofar;
+	int addr;
+	int ret;
+	unsigned long flags;
+	const char *op;
+
+	/* Pick appropriate transaction reg and "Chip select" for this serdes */
+	switch (sdnum) {
+	case IB_7220_SERDES:
+		csbit = 1ULL << EPB_IB_UC_CS_SHF;
+		trans = kr_ibsd_epb_transaction_reg;
+		break;
+
+	case PCIE_SERDES0:
+	case PCIE_SERDES1:
+		/* PCIe SERDES has uC "chip select" in different bit, too */
+		csbit = 1ULL << EPB_PCIE_UC_CS_SHF;
+		trans = kr_pciesd_epb_transaction_reg;
+		break;
+
+	default:
+		return -1;
+	}
+
+	op = rd_notwr ? "Rd" : "Wr";
+	spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+
+	owned = epb_access(dd, sdnum, 1);
+	if (owned < 0) {
+		spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+		return -1;
+	}
+
+	/*
+	 * In future code, we may need to distinguish several address ranges,
+	 * and select various memories based on this. For now, just trim
+	 * "loc" (location including address and memory select) to
+	 * "addr" (address within memory). we will only support PRAM
+	 * The memory is 8KB.
+	 */
+	addr = loc & 0x1FFF;
+	for (tries = EPB_TRANS_TRIES; tries; --tries) {
+		transval = qib_read_kreg32(dd, trans);
+		if (transval & EPB_TRANS_RDY)
+			break;
+		udelay(5);
+	}
+
+	sofar = 0;
+	if (tries > 0) {
+		/*
+		 * Every "memory" access is doubly-indirect.
+		 * We set two bytes of address, then read/write
+		 * one or mores bytes of data.
+		 */
+
+		/* First, we set control to "Read" or "Write" */
+		transval = csbit | EPB_UC_CTL |
+			(rd_notwr ? EPB_ROM_R : EPB_ROM_W);
+		tries = epb_trans(dd, trans, transval, &transval);
+		while (tries > 0 && sofar < cnt) {
+			if (!sofar) {
+				/* Only set address at start of chunk */
+				int addrbyte = (addr + sofar) >> 8;
+				transval = csbit | EPB_MADDRH | addrbyte;
+				tries = epb_trans(dd, trans, transval,
+						  &transval);
+				if (tries <= 0)
+					break;
+				addrbyte = (addr + sofar) & 0xFF;
+				transval = csbit | EPB_MADDRL | addrbyte;
+				tries = epb_trans(dd, trans, transval,
+						 &transval);
+				if (tries <= 0)
+					break;
+			}
+
+			if (rd_notwr)
+				transval = csbit | EPB_ROMDATA | EPB_RD;
+			else
+				transval = csbit | EPB_ROMDATA | buf[sofar];
+			tries = epb_trans(dd, trans, transval, &transval);
+			if (tries <= 0)
+				break;
+			if (rd_notwr)
+				buf[sofar] = transval & EPB_DATA_MASK;
+			++sofar;
+		}
+		/* Finally, clear control-bit for Read or Write */
+		transval = csbit | EPB_UC_CTL;
+		tries = epb_trans(dd, trans, transval, &transval);
+	}
+
+	ret = sofar;
+	/* Release bus. Failure is an error */
+	if (epb_access(dd, sdnum, -1) < 0)
+		ret = -1;
+
+	spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+	if (tries <= 0)
+		ret = -1;
+	return ret;
+}
+
+#define PROG_CHUNK 64
+
+int qib_sd7220_prog_ld(struct qib_devdata *dd, int sdnum,
+		       u8 *img, int len, int offset)
+{
+	int cnt, sofar, req;
+
+	sofar = 0;
+	while (sofar < len) {
+		req = len - sofar;
+		if (req > PROG_CHUNK)
+			req = PROG_CHUNK;
+		cnt = qib_sd7220_ram_xfer(dd, sdnum, offset + sofar,
+					  img + sofar, req, 0);
+		if (cnt < req) {
+			sofar = -1;
+			break;
+		}
+		sofar += req;
+	}
+	return sofar;
+}
+
+#define VFY_CHUNK 64
+#define SD_PRAM_ERROR_LIMIT 42
+
+int qib_sd7220_prog_vfy(struct qib_devdata *dd, int sdnum,
+			const u8 *img, int len, int offset)
+{
+	int cnt, sofar, req, idx, errors;
+	unsigned char readback[VFY_CHUNK];
+
+	errors = 0;
+	sofar = 0;
+	while (sofar < len) {
+		req = len - sofar;
+		if (req > VFY_CHUNK)
+			req = VFY_CHUNK;
+		cnt = qib_sd7220_ram_xfer(dd, sdnum, sofar + offset,
+					  readback, req, 1);
+		if (cnt < req) {
+			/* failed in read itself */
+			sofar = -1;
+			break;
+		}
+		for (idx = 0; idx < cnt; ++idx) {
+			if (readback[idx] != img[idx+sofar])
+				++errors;
+		}
+		sofar += cnt;
+	}
+	return errors ? -errors : sofar;
+}
+
+/*
+ * IRQ not set up at this point in init, so we poll.
+ */
+#define IB_SERDES_TRIM_DONE (1ULL << 11)
+#define TRIM_TMO (30)
+
+static int qib_sd_trimdone_poll(struct qib_devdata *dd)
+{
+	int trim_tmo, ret;
+	uint64_t val;
+
+	/*
+	 * Default to failure, so IBC will not start
+	 * without IB_SERDES_TRIM_DONE.
+	 */
+	ret = 0;
+	for (trim_tmo = 0; trim_tmo < TRIM_TMO; ++trim_tmo) {
+		val = qib_read_kreg64(dd, kr_ibcstatus);
+		if (val & IB_SERDES_TRIM_DONE) {
+			ret = 1;
+			break;
+		}
+		msleep(10);
+	}
+	if (trim_tmo >= TRIM_TMO) {
+		qib_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo);
+		ret = 0;
+	}
+	return ret;
+}
+
+#define TX_FAST_ELT (9)
+
+/*
+ * Set the "negotiation" values for SERDES. These are used by the IB1.2
+ * link negotiation. Macros below are attempt to keep the values a
+ * little more human-editable.
+ * First, values related to Drive De-emphasis Settings.
+ */
+
+#define NUM_DDS_REGS 6
+#define DDS_REG_MAP 0x76A910 /* LSB-first list of regs (in elt 9) to mod */
+
+#define DDS_VAL(amp_d, main_d, ipst_d, ipre_d, amp_s, main_s, ipst_s, ipre_s) \
+	{ { ((amp_d & 0x1F) << 1) | 1, ((amp_s & 0x1F) << 1) | 1, \
+	  (main_d << 3) | 4 | (ipre_d >> 2), \
+	  (main_s << 3) | 4 | (ipre_s >> 2), \
+	  ((ipst_d & 0xF) << 1) | ((ipre_d & 3) << 6) | 0x21, \
+	  ((ipst_s & 0xF) << 1) | ((ipre_s & 3) << 6) | 0x21 } }
+
+static struct dds_init {
+	uint8_t reg_vals[NUM_DDS_REGS];
+} dds_init_vals[] = {
+	/*       DDR(FDR)       SDR(HDR)   */
+	/* Vendor recommends below for 3m cable */
+#define DDS_3M 0
+	DDS_VAL(31, 19, 12, 0, 29, 22,  9, 0),
+	DDS_VAL(31, 12, 15, 4, 31, 15, 15, 1),
+	DDS_VAL(31, 13, 15, 3, 31, 16, 15, 0),
+	DDS_VAL(31, 14, 15, 2, 31, 17, 14, 0),
+	DDS_VAL(31, 15, 15, 1, 31, 18, 13, 0),
+	DDS_VAL(31, 16, 15, 0, 31, 19, 12, 0),
+	DDS_VAL(31, 17, 14, 0, 31, 20, 11, 0),
+	DDS_VAL(31, 18, 13, 0, 30, 21, 10, 0),
+	DDS_VAL(31, 20, 11, 0, 28, 23,  8, 0),
+	DDS_VAL(31, 21, 10, 0, 27, 24,  7, 0),
+	DDS_VAL(31, 22,  9, 0, 26, 25,  6, 0),
+	DDS_VAL(30, 23,  8, 0, 25, 26,  5, 0),
+	DDS_VAL(29, 24,  7, 0, 23, 27,  4, 0),
+	/* Vendor recommends below for 1m cable */
+#define DDS_1M 13
+	DDS_VAL(28, 25,  6, 0, 21, 28,  3, 0),
+	DDS_VAL(27, 26,  5, 0, 19, 29,  2, 0),
+	DDS_VAL(25, 27,  4, 0, 17, 30,  1, 0)
+};
+
+/*
+ * Now the RXEQ section of the table.
+ */
+/* Hardware packs an element number and register address thus: */
+#define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4))
+#define RXEQ_VAL(elt, adr, val0, val1, val2, val3) \
+	{RXEQ_INIT_RDESC((elt), (adr)), {(val0), (val1), (val2), (val3)} }
+
+#define RXEQ_VAL_ALL(elt, adr, val)  \
+	{RXEQ_INIT_RDESC((elt), (adr)), {(val), (val), (val), (val)} }
+
+#define RXEQ_SDR_DFELTH 0
+#define RXEQ_SDR_TLTH 0
+#define RXEQ_SDR_G1CNT_Z1CNT 0x11
+#define RXEQ_SDR_ZCNT 23
+
+static struct rxeq_init {
+	u16 rdesc;      /* in form used in SerDesDDSRXEQ */
+	u8  rdata[4];
+} rxeq_init_vals[] = {
+	/* Set Rcv Eq. to Preset node */
+	RXEQ_VAL_ALL(7, 0x27, 0x10),
+	/* Set DFELTHFDR/HDR thresholds */
+	RXEQ_VAL(7, 8,    0, 0, 0, 0), /* FDR, was 0, 1, 2, 3 */
+	RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */
+	/* Set TLTHFDR/HDR theshold */
+	RXEQ_VAL(7, 9,    2, 2, 2, 2), /* FDR, was 0, 2, 4, 6 */
+	RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR, was  0, 1, 2, 3 */
+	/* Set Preamp setting 2 (ZFR/ZCNT) */
+	RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR, was 12, 16, 20, 24 */
+	RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR, was 12, 16, 20, 24 */
+	/* Set Preamp DC gain and Setting 1 (GFR/GHR) */
+	RXEQ_VAL(7, 0x1E, 16, 16, 16, 16), /* FDR, was 16, 17, 18, 20 */
+	RXEQ_VAL(7, 0x1F, 16, 16, 16, 16), /* HDR, was 16, 17, 18, 20 */
+	/* Toggle RELOCK (in VCDL_CTRL0) to lock to data */
+	RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */
+	RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */
+};
+
+/* There are 17 values from vendor, but IBC only accesses the first 16 */
+#define DDS_ROWS (16)
+#define RXEQ_ROWS ARRAY_SIZE(rxeq_init_vals)
+
+static int qib_sd_setvals(struct qib_devdata *dd)
+{
+	int idx, midx;
+	int min_idx;     /* Minimum index for this portion of table */
+	uint32_t dds_reg_map;
+	u64 __iomem *taddr, *iaddr;
+	uint64_t data;
+	uint64_t sdctl;
+
+	taddr = dd->kregbase + kr_serdes_maptable;
+	iaddr = dd->kregbase + kr_serdes_ddsrxeq0;
+
+	/*
+	 * Init the DDS section of the table.
+	 * Each "row" of the table provokes NUM_DDS_REG writes, to the
+	 * registers indicated in DDS_REG_MAP.
+	 */
+	sdctl = qib_read_kreg64(dd, kr_ibserdesctrl);
+	sdctl = (sdctl & ~(0x1f << 8)) | (NUM_DDS_REGS << 8);
+	sdctl = (sdctl & ~(0x1f << 13)) | (RXEQ_ROWS << 13);
+	qib_write_kreg(dd, kr_ibserdesctrl, sdctl);
+
+	/*
+	 * Iterate down table within loop for each register to store.
+	 */
+	dds_reg_map = DDS_REG_MAP;
+	for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+		data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT;
+		writeq(data, iaddr + idx);
+		mmiowb();
+		qib_read_kreg32(dd, kr_scratch);
+		dds_reg_map >>= 4;
+		for (midx = 0; midx < DDS_ROWS; ++midx) {
+			u64 __iomem *daddr = taddr + ((midx << 4) + idx);
+			data = dds_init_vals[midx].reg_vals[idx];
+			writeq(data, daddr);
+			mmiowb();
+			qib_read_kreg32(dd, kr_scratch);
+		} /* End inner for (vals for this reg, each row) */
+	} /* end outer for (regs to be stored) */
+
+	/*
+	 * Init the RXEQ section of the table.
+	 * This runs in a different order, as the pattern of
+	 * register references is more complex, but there are only
+	 * four "data" values per register.
+	 */
+	min_idx = idx; /* RXEQ indices pick up where DDS left off */
+	taddr += 0x100; /* RXEQ data is in second half of table */
+	/* Iterate through RXEQ register addresses */
+	for (idx = 0; idx < RXEQ_ROWS; ++idx) {
+		int didx; /* "destination" */
+		int vidx;
+
+		/* didx is offset by min_idx to address RXEQ range of regs */
+		didx = idx + min_idx;
+		/* Store the next RXEQ register address */
+		writeq(rxeq_init_vals[idx].rdesc, iaddr + didx);
+		mmiowb();
+		qib_read_kreg32(dd, kr_scratch);
+		/* Iterate through RXEQ values */
+		for (vidx = 0; vidx < 4; vidx++) {
+			data = rxeq_init_vals[idx].rdata[vidx];
+			writeq(data, taddr + (vidx << 6) + idx);
+			mmiowb();
+			qib_read_kreg32(dd, kr_scratch);
+		}
+	} /* end outer for (Reg-writes for RXEQ) */
+	return 0;
+}
+
+#define CMUCTRL5 EPB_LOC(7, 0, 0x15)
+#define RXHSCTRL0(chan) EPB_LOC(chan, 6, 0)
+#define VCDL_DAC2(chan) EPB_LOC(chan, 6, 5)
+#define VCDL_CTRL0(chan) EPB_LOC(chan, 6, 6)
+#define VCDL_CTRL2(chan) EPB_LOC(chan, 6, 8)
+#define START_EQ2(chan) EPB_LOC(chan, 7, 0x28)
+
+/*
+ * Repeat a "store" across all channels of the IB SerDes.
+ * Although nominally it inherits the "read value" of the last
+ * channel it modified, the only really useful return is <0 for
+ * failure, >= 0 for success. The parameter 'loc' is assumed to
+ * be the location in some channel of the register to be modified
+ * The caller can specify use of the "gang write" option of EPB,
+ * in which case we use the specified channel data for any fields
+ * not explicitely written.
+ */
+static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
+			     int mask)
+{
+	int ret = -1;
+	int chnl;
+
+	if (loc & EPB_GLOBAL_WR) {
+		/*
+		 * Our caller has assured us that we can set all four
+		 * channels at once. Trust that. If mask is not 0xFF,
+		 * we will read the _specified_ channel for our starting
+		 * value.
+		 */
+		loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+		chnl = (loc >> (4 + EPB_ADDR_SHF)) & 7;
+		if (mask != 0xFF) {
+			ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+						 loc & ~EPB_GLOBAL_WR, 0, 0);
+			if (ret < 0) {
+				int sloc = loc >> EPB_ADDR_SHF;
+
+				qib_dev_err(dd, "pre-read failed: elt %d,"
+					    " addr 0x%X, chnl %d\n",
+					    (sloc & 0xF),
+					    (sloc >> 9) & 0x3f, chnl);
+				return ret;
+			}
+			val = (ret & ~mask) | (val & mask);
+		}
+		loc &=  ~(7 << (4+EPB_ADDR_SHF));
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+		if (ret < 0) {
+			int sloc = loc >> EPB_ADDR_SHF;
+
+			qib_dev_err(dd, "Global WR failed: elt %d,"
+				    " addr 0x%X, val %02X\n",
+				    (sloc & 0xF), (sloc >> 9) & 0x3f, val);
+		}
+		return ret;
+	}
+	/* Clear "channel" and set CS so we can simply iterate */
+	loc &=  ~(7 << (4+EPB_ADDR_SHF));
+	loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+	for (chnl = 0; chnl < 4; ++chnl) {
+		int cloc = loc | (chnl << (4+EPB_ADDR_SHF));
+
+		ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask);
+		if (ret < 0) {
+			int sloc = loc >> EPB_ADDR_SHF;
+
+			qib_dev_err(dd, "Write failed: elt %d,"
+				    " addr 0x%X, chnl %d, val 0x%02X,"
+				    " mask 0x%02X\n",
+				    (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
+				    val & 0xFF, mask & 0xFF);
+			break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Set the Tx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from first row of init table.
+ */
+static int set_dds_vals(struct qib_devdata *dd, struct dds_init *ddi)
+{
+	int ret;
+	int idx, reg, data;
+	uint32_t regmap;
+
+	regmap = DDS_REG_MAP;
+	for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+		reg = (regmap & 0xF);
+		regmap >>= 4;
+		data = ddi->reg_vals[idx];
+		/* Vendor says RMW not needed for these regs, use 0xFF mask */
+		ret = ibsd_mod_allchnls(dd, EPB_LOC(0, 9, reg), data, 0xFF);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+/*
+ * Set the Rx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from selected column of init table.
+ */
+static int set_rxeq_vals(struct qib_devdata *dd, int vsel)
+{
+	int ret;
+	int ridx;
+	int cnt = ARRAY_SIZE(rxeq_init_vals);
+
+	for (ridx = 0; ridx < cnt; ++ridx) {
+		int elt, reg, val, loc;
+
+		elt = rxeq_init_vals[ridx].rdesc & 0xF;
+		reg = rxeq_init_vals[ridx].rdesc >> 4;
+		loc = EPB_LOC(0, elt, reg);
+		val = rxeq_init_vals[ridx].rdata[vsel];
+		/* mask of 0xFF, because hardware does full-byte store. */
+		ret = ibsd_mod_allchnls(dd, loc, val, 0xFF);
+		if (ret < 0)
+			break;
+	}
+	return ret;
+}
+
+/*
+ * Set the default values (row 0) for DDR Driver Demphasis.
+ * we do this initially and whenever we turn off IB-1.2
+ *
+ * The "default" values for Rx equalization are also stored to
+ * SerDes registers. Formerly (and still default), we used set 2.
+ * For experimenting with cables and link-partners, we allow changing
+ * that via a module parameter.
+ */
+static unsigned qib_rxeq_set = 2;
+module_param_named(rxeq_default_set, qib_rxeq_set, uint,
+		   S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxeq_default_set,
+		 "Which set [0..3] of Rx Equalization values is default");
+
+static int qib_internal_presets(struct qib_devdata *dd)
+{
+	int ret = 0;
+
+	ret = set_dds_vals(dd, dds_init_vals + DDS_3M);
+
+	if (ret < 0)
+		qib_dev_err(dd, "Failed to set default DDS values\n");
+	ret = set_rxeq_vals(dd, qib_rxeq_set & 3);
+	if (ret < 0)
+		qib_dev_err(dd, "Failed to set default RXEQ values\n");
+	return ret;
+}
+
+int qib_sd7220_presets(struct qib_devdata *dd)
+{
+	int ret = 0;
+
+	if (!dd->cspec->presets_needed)
+		return ret;
+	dd->cspec->presets_needed = 0;
+	/* Assert uC reset, so we don't clash with it. */
+	qib_ibsd_reset(dd, 1);
+	udelay(2);
+	qib_sd_trimdone_monitor(dd, "link-down");
+
+	ret = qib_internal_presets(dd);
+	return ret;
+}
+
+static int qib_sd_trimself(struct qib_devdata *dd, int val)
+{
+	int loc = CMUCTRL5 | (1U << EPB_IB_QUAD0_CS_SHF);
+
+	return qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+}
+
+static int qib_sd_early(struct qib_devdata *dd)
+{
+	int ret;
+
+	ret = ibsd_mod_allchnls(dd, RXHSCTRL0(0) | EPB_GLOBAL_WR, 0xD4, 0xFF);
+	if (ret < 0)
+		goto bail;
+	ret = ibsd_mod_allchnls(dd, START_EQ1(0) | EPB_GLOBAL_WR, 0x10, 0xFF);
+	if (ret < 0)
+		goto bail;
+	ret = ibsd_mod_allchnls(dd, START_EQ2(0) | EPB_GLOBAL_WR, 0x30, 0xFF);
+bail:
+	return ret;
+}
+
+#define BACTRL(chnl) EPB_LOC(chnl, 6, 0x0E)
+#define LDOUTCTRL1(chnl) EPB_LOC(chnl, 7, 6)
+#define RXHSSTATUS(chnl) EPB_LOC(chnl, 6, 0xF)
+
+static int qib_sd_dactrim(struct qib_devdata *dd)
+{
+	int ret;
+
+	ret = ibsd_mod_allchnls(dd, VCDL_DAC2(0) | EPB_GLOBAL_WR, 0x2D, 0xFF);
+	if (ret < 0)
+		goto bail;
+
+	/* more fine-tuning of what will be default */
+	ret = ibsd_mod_allchnls(dd, VCDL_CTRL2(0), 3, 0xF);
+	if (ret < 0)
+		goto bail;
+
+	ret = ibsd_mod_allchnls(dd, BACTRL(0) | EPB_GLOBAL_WR, 0x40, 0xFF);
+	if (ret < 0)
+		goto bail;
+
+	ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x04, 0xFF);
+	if (ret < 0)
+		goto bail;
+
+	ret = ibsd_mod_allchnls(dd, RXHSSTATUS(0) | EPB_GLOBAL_WR, 0x04, 0xFF);
+	if (ret < 0)
+		goto bail;
+
+	/*
+	 * Delay for max possible number of steps, with slop.
+	 * Each step is about 4usec.
+	 */
+	udelay(415);
+
+	ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x00, 0xFF);
+
+bail:
+	return ret;
+}
+
+#define RELOCK_FIRST_MS 3
+#define RXLSPPM(chan) EPB_LOC(chan, 0, 2)
+void toggle_7220_rclkrls(struct qib_devdata *dd)
+{
+	int loc = RXLSPPM(0) | EPB_GLOBAL_WR;
+	int ret;
+
+	ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+	if (ret < 0)
+		qib_dev_err(dd, "RCLKRLS failed to clear D7\n");
+	else {
+		udelay(1);
+		ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+	}
+	/* And again for good measure */
+	udelay(1);
+	ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+	if (ret < 0)
+		qib_dev_err(dd, "RCLKRLS failed to clear D7\n");
+	else {
+		udelay(1);
+		ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+	}
+	/* Now reset xgxs and IBC to complete the recovery */
+	dd->f_xgxs_reset(dd->pport);
+}
+
+/*
+ * Shut down the timer that polls for relock occasions, if needed
+ * this is "hooked" from qib_7220_quiet_serdes(), which is called
+ * just before qib_shutdown_device() in qib_driver.c shuts down all
+ * the other timers
+ */
+void shutdown_7220_relock_poll(struct qib_devdata *dd)
+{
+	struct qib_relock *irp = &dd->cspec->relock_st;
+
+	if (atomic_read(&irp->relock_timer_active)) {
+		del_timer_sync(&irp->relock_timer);
+		atomic_set(&irp->relock_timer_active, 0);
+	}
+}
+
+static unsigned qib_relock_by_timer = 1;
+module_param_named(relock_by_timer, qib_relock_by_timer, uint,
+		   S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up");
+
+static void qib_run_relock(unsigned long opaque)
+{
+	struct qib_devdata *dd = (struct qib_devdata *)opaque;
+	struct qib_pportdata *ppd = dd->pport;
+	struct qib_relock *irp = &dd->cspec->relock_st;
+	int timeoff;
+
+	/*
+	 * Check link-training state for "stuck" state, when down.
+	 * if found, try relock and schedule another try at
+	 * exponentially growing delay, maxed at one second.
+	 * if not stuck, our work is done.
+	 */
+	if ((dd->flags & QIB_INITTED) && !(ppd->lflags &
+	    (QIBL_IB_AUTONEG_INPROG | QIBL_LINKINIT | QIBL_LINKARMED |
+	     QIBL_LINKACTIVE))) {
+		if (qib_relock_by_timer) {
+			if (!(ppd->lflags & QIBL_IB_LINK_DISABLED))
+				toggle_7220_rclkrls(dd);
+		}
+		/* re-set timer for next check */
+		timeoff = irp->relock_interval << 1;
+		if (timeoff > HZ)
+			timeoff = HZ;
+		irp->relock_interval = timeoff;
+	} else
+		timeoff = HZ;
+	mod_timer(&irp->relock_timer, jiffies + timeoff);
+}
+
+void set_7220_relock_poll(struct qib_devdata *dd, int ibup)
+{
+	struct qib_relock *irp = &dd->cspec->relock_st;
+
+	if (ibup > 0) {
+		/* We are now up, relax timer to 1 second interval */
+		if (atomic_read(&irp->relock_timer_active))
+			mod_timer(&irp->relock_timer, jiffies + HZ);
+	} else {
+		/* Transition to down, (re-)set timer to short interval. */
+		int timeout;
+
+		timeout = (HZ * ((ibup == -1) ? 1000 : RELOCK_FIRST_MS)) / 1000;
+		if (timeout == 0)
+			timeout = 1;
+		/* If timer has not yet been started, do so. */
+		if (atomic_inc_return(&irp->relock_timer_active) == 1) {
+			init_timer(&irp->relock_timer);
+			irp->relock_timer.function = qib_run_relock;
+			irp->relock_timer.data = (unsigned long) dd;
+			irp->relock_interval = timeout;
+			irp->relock_timer.expires = jiffies + timeout;
+			add_timer(&irp->relock_timer);
+		} else {
+			irp->relock_interval = timeout;
+			mod_timer(&irp->relock_timer, jiffies + timeout);
+			atomic_dec(&irp->relock_timer_active);
+		}
+	}
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 34/52] IB/qib: Add qib_sd7220_img.c
From: Ralph Campbell @ 2010-05-07  0:01 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_sd7220_img.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_sd7220_img.c | 1081 ++++++++++++++++++++++++++++
 1 files changed, 1081 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_sd7220_img.c

diff --git a/drivers/infiniband/hw/qib/qib_sd7220_img.c b/drivers/infiniband/hw/qib/qib_sd7220_img.c
new file mode 100644
index 0000000..a1118fb
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sd7220_img.c
@@ -0,0 +1,1081 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains the memory image from the vendor, to be copied into
+ * the IB SERDES of the IBA7220 during initialization.
+ * The file also includes the two functions which use this image.
+ */
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_7220.h"
+
+static unsigned char qib_sd7220_ib_img[] = {
+/*0000*/0x02, 0x0A, 0x29, 0x02, 0x0A, 0x87, 0xE5, 0xE6,
+	0x30, 0xE6, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
+/*0010*/0x00, 0xE5, 0xE2, 0x30, 0xE4, 0x04, 0x7E, 0x01,
+	0x80, 0x02, 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x08,
+/*0020*/0x53, 0xF9, 0xF7, 0xE4, 0xF5, 0xFE, 0x80, 0x08,
+	0x7F, 0x0A, 0x12, 0x17, 0x31, 0x12, 0x0E, 0xA2,
+/*0030*/0x75, 0xFC, 0x08, 0xE4, 0xF5, 0xFD, 0xE5, 0xE7,
+	0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0x22, 0x00,
+/*0040*/0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x75,
+	0x51, 0x01, 0xE4, 0xF5, 0x52, 0xF5, 0x53, 0xF5,
+/*0050*/0x52, 0xF5, 0x7E, 0x7F, 0x04, 0x02, 0x04, 0x38,
+	0xC2, 0x36, 0x05, 0x52, 0xE5, 0x52, 0xD3, 0x94,
+/*0060*/0x0C, 0x40, 0x05, 0x75, 0x52, 0x01, 0xD2, 0x36,
+	0x90, 0x07, 0x0C, 0x74, 0x07, 0xF0, 0xA3, 0x74,
+/*0070*/0xFF, 0xF0, 0xE4, 0xF5, 0x0C, 0xA3, 0xF0, 0x90,
+	0x07, 0x14, 0xF0, 0xA3, 0xF0, 0x75, 0x0B, 0x20,
+/*0080*/0xF5, 0x09, 0xE4, 0xF5, 0x08, 0xE5, 0x08, 0xD3,
+	0x94, 0x30, 0x40, 0x03, 0x02, 0x04, 0x04, 0x12,
+/*0090*/0x00, 0x06, 0x15, 0x0B, 0xE5, 0x08, 0x70, 0x04,
+	0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x09,
+/*00A0*/0x70, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00,
+	0xEE, 0x5F, 0x60, 0x05, 0x12, 0x18, 0x71, 0xD2,
+/*00B0*/0x35, 0x53, 0xE1, 0xF7, 0xE5, 0x08, 0x45, 0x09,
+	0xFF, 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24,
+/*00C0*/0x83, 0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83,
+	0xEF, 0xF0, 0x85, 0xE2, 0x20, 0xE5, 0x52, 0xD3,
+/*00D0*/0x94, 0x01, 0x40, 0x0D, 0x12, 0x19, 0xF3, 0xE0,
+	0x54, 0xA0, 0x64, 0x40, 0x70, 0x03, 0x02, 0x03,
+/*00E0*/0xFB, 0x53, 0xF9, 0xF8, 0x90, 0x94, 0x70, 0xE4,
+	0xF0, 0xE0, 0xF5, 0x10, 0xAF, 0x09, 0x12, 0x1E,
+/*00F0*/0xB3, 0xAF, 0x08, 0xEF, 0x44, 0x08, 0xF5, 0x82,
+	0x75, 0x83, 0x80, 0xE0, 0xF5, 0x29, 0xEF, 0x44,
+/*0100*/0x07, 0x12, 0x1A, 0x3C, 0xF5, 0x22, 0x54, 0x40,
+	0xD3, 0x94, 0x00, 0x40, 0x1E, 0xE5, 0x29, 0x54,
+/*0110*/0xF0, 0x70, 0x21, 0x12, 0x19, 0xF3, 0xE0, 0x44,
+	0x80, 0xF0, 0xE5, 0x22, 0x54, 0x30, 0x65, 0x08,
+/*0120*/0x70, 0x09, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xBF,
+	0xF0, 0x80, 0x09, 0x12, 0x19, 0xF3, 0x74, 0x40,
+/*0130*/0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12, 0x75,
+	0x83, 0xAE, 0x74, 0xFF, 0xF0, 0xAF, 0x08, 0x7E,
+/*0140*/0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0xE0, 0xFD,
+	0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24, 0x81,
+/*0150*/0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83, 0xED,
+	0xF0, 0x90, 0x07, 0x0E, 0xE0, 0x04, 0xF0, 0xEF,
+/*0160*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0x98, 0xE0,
+	0xF5, 0x28, 0x12, 0x1A, 0x23, 0x40, 0x0C, 0x12,
+/*0170*/0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12, 0x1A, 0x32,
+	0x02, 0x03, 0xF6, 0xAF, 0x08, 0x7E, 0x00, 0x74,
+/*0180*/0x80, 0xCD, 0xEF, 0xCD, 0x8D, 0x82, 0xF5, 0x83,
+	0xE0, 0x30, 0xE0, 0x0A, 0x12, 0x19, 0xF3, 0xE0,
+/*0190*/0x44, 0x20, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x19,
+	0xF3, 0xE0, 0x54, 0xDF, 0xF0, 0xEE, 0x44, 0xAE,
+/*01A0*/0x12, 0x1A, 0x43, 0x30, 0xE4, 0x03, 0x02, 0x03,
+	0xFB, 0x74, 0x9E, 0x12, 0x1A, 0x05, 0x20, 0xE0,
+/*01B0*/0x03, 0x02, 0x03, 0xFB, 0x8F, 0x82, 0x8E, 0x83,
+	0xE0, 0x20, 0xE0, 0x03, 0x02, 0x03, 0xFB, 0x12,
+/*01C0*/0x19, 0xF3, 0xE0, 0x44, 0x10, 0xF0, 0xE5, 0xE3,
+	0x20, 0xE7, 0x08, 0xE5, 0x08, 0x12, 0x1A, 0x3A,
+/*01D0*/0x44, 0x04, 0xF0, 0xAF, 0x08, 0x7E, 0x00, 0xEF,
+	0x12, 0x1A, 0x3A, 0x20, 0xE2, 0x34, 0x12, 0x19,
+/*01E0*/0xF3, 0xE0, 0x44, 0x08, 0xF0, 0xE5, 0xE4, 0x30,
+	0xE6, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
+/*01F0*/0xE5, 0x7E, 0xC3, 0x94, 0x04, 0x50, 0x04, 0x7C,
+	0x01, 0x80, 0x02, 0x7C, 0x00, 0xEC, 0x4D, 0x60,
+/*0200*/0x05, 0xC2, 0x35, 0x02, 0x03, 0xFB, 0xEE, 0x44,
+	0xD2, 0x12, 0x1A, 0x43, 0x44, 0x40, 0xF0, 0x02,
+/*0210*/0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xF7,
+	0xF0, 0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0,
+/*0220*/0x54, 0xBF, 0xF0, 0x90, 0x07, 0x14, 0xE0, 0x04,
+	0xF0, 0xE5, 0x7E, 0x70, 0x03, 0x75, 0x7E, 0x01,
+/*0230*/0xAF, 0x08, 0x7E, 0x00, 0x12, 0x1A, 0x23, 0x40,
+	0x12, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12,
+/*0240*/0x19, 0xF2, 0xE0, 0x54, 0x02, 0x12, 0x1A, 0x32,
+	0x02, 0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x44,
+/*0250*/0x02, 0x12, 0x19, 0xF2, 0xE0, 0x54, 0xFE, 0xF0,
+	0xC2, 0x35, 0xEE, 0x44, 0x8A, 0x8F, 0x82, 0xF5,
+/*0260*/0x83, 0xE0, 0xF5, 0x17, 0x54, 0x8F, 0x44, 0x40,
+	0xF0, 0x74, 0x90, 0xFC, 0xE5, 0x08, 0x44, 0x07,
+/*0270*/0xFD, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0x54, 0x3F,
+	0x90, 0x07, 0x02, 0xF0, 0xE0, 0x54, 0xC0, 0x8D,
+/*0280*/0x82, 0x8C, 0x83, 0xF0, 0x74, 0x92, 0x12, 0x1A,
+	0x05, 0x90, 0x07, 0x03, 0x12, 0x1A, 0x19, 0x74,
+/*0290*/0x82, 0x12, 0x1A, 0x05, 0x90, 0x07, 0x04, 0x12,
+	0x1A, 0x19, 0x74, 0xB4, 0x12, 0x1A, 0x05, 0x90,
+/*02A0*/0x07, 0x05, 0x12, 0x1A, 0x19, 0x74, 0x94, 0xFE,
+	0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A, 0xF5,
+/*02B0*/0x10, 0x30, 0xE0, 0x04, 0xD2, 0x37, 0x80, 0x02,
+	0xC2, 0x37, 0xE5, 0x10, 0x54, 0x7F, 0x8F, 0x82,
+/*02C0*/0x8E, 0x83, 0xF0, 0x30, 0x44, 0x30, 0x12, 0x1A,
+	0x03, 0x54, 0x80, 0xD3, 0x94, 0x00, 0x40, 0x04,
+/*02D0*/0xD2, 0x39, 0x80, 0x02, 0xC2, 0x39, 0x8F, 0x82,
+	0x8E, 0x83, 0xE0, 0x44, 0x80, 0xF0, 0x12, 0x1A,
+/*02E0*/0x03, 0x54, 0x40, 0xD3, 0x94, 0x00, 0x40, 0x04,
+	0xD2, 0x3A, 0x80, 0x02, 0xC2, 0x3A, 0x8F, 0x82,
+/*02F0*/0x8E, 0x83, 0xE0, 0x44, 0x40, 0xF0, 0x74, 0x92,
+	0xFE, 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A,
+/*0300*/0x30, 0xE7, 0x04, 0xD2, 0x38, 0x80, 0x02, 0xC2,
+	0x38, 0x8F, 0x82, 0x8E, 0x83, 0xE0, 0x54, 0x7F,
+/*0310*/0xF0, 0x12, 0x1E, 0x46, 0xE4, 0xF5, 0x0A, 0x20,
+	0x03, 0x02, 0x80, 0x03, 0x30, 0x43, 0x03, 0x12,
+/*0320*/0x19, 0x95, 0x20, 0x02, 0x02, 0x80, 0x03, 0x30,
+	0x42, 0x03, 0x12, 0x0C, 0x8F, 0x30, 0x30, 0x06,
+/*0330*/0x12, 0x19, 0x95, 0x12, 0x0C, 0x8F, 0x12, 0x0D,
+	0x47, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xFB, 0xF0,
+/*0340*/0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40, 0x46, 0x43,
+	0xE1, 0x08, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x04,
+/*0350*/0xF0, 0xE5, 0xE4, 0x20, 0xE7, 0x2A, 0x12, 0x1A,
+	0x12, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
+/*0360*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+	0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
+/*0370*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
+	0x5E, 0x60, 0x05, 0x12, 0x1D, 0xD7, 0x80, 0x17,
+/*0380*/0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x44,
+	0x08, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12,
+/*0390*/0x75, 0x83, 0xD2, 0xE0, 0x54, 0xF7, 0xF0, 0x12,
+	0x1E, 0x46, 0x7F, 0x08, 0x12, 0x17, 0x31, 0x74,
+/*03A0*/0x8E, 0xFE, 0x12, 0x1A, 0x12, 0x8E, 0x83, 0xE0,
+	0xF5, 0x10, 0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44,
+/*03B0*/0x01, 0xFF, 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07,
+	0xF5, 0x82, 0xEF, 0xF0, 0xE5, 0x10, 0x54, 0xFE,
+/*03C0*/0xFF, 0xED, 0x44, 0x07, 0xF5, 0x82, 0xEF, 0x12,
+	0x1A, 0x11, 0x75, 0x83, 0x86, 0xE0, 0x44, 0x10,
+/*03D0*/0x12, 0x1A, 0x11, 0xE0, 0x44, 0x10, 0xF0, 0x12,
+	0x19, 0xF3, 0xE0, 0x54, 0xFD, 0x44, 0x01, 0xFF,
+/*03E0*/0x12, 0x19, 0xF3, 0xEF, 0x12, 0x1A, 0x32, 0x30,
+	0x32, 0x0C, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
+/*03F0*/0x75, 0x83, 0x82, 0x74, 0x05, 0xF0, 0xAF, 0x0B,
+	0x12, 0x18, 0xD7, 0x74, 0x10, 0x25, 0x08, 0xF5,
+/*0400*/0x08, 0x02, 0x00, 0x85, 0x05, 0x09, 0xE5, 0x09,
+	0xD3, 0x94, 0x07, 0x50, 0x03, 0x02, 0x00, 0x82,
+/*0410*/0xE5, 0x7E, 0xD3, 0x94, 0x00, 0x40, 0x04, 0x7F,
+	0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x7E, 0xC3,
+/*0420*/0x94, 0xFA, 0x50, 0x04, 0x7E, 0x01, 0x80, 0x02,
+	0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x02, 0x05, 0x7E,
+/*0430*/0x30, 0x35, 0x0B, 0x43, 0xE1, 0x01, 0x7F, 0x09,
+	0x12, 0x17, 0x31, 0x02, 0x00, 0x58, 0x53, 0xE1,
+/*0440*/0xFE, 0x02, 0x00, 0x58, 0x8E, 0x6A, 0x8F, 0x6B,
+	0x8C, 0x6C, 0x8D, 0x6D, 0x75, 0x6E, 0x01, 0x75,
+/*0450*/0x6F, 0x01, 0x75, 0x70, 0x01, 0xE4, 0xF5, 0x73,
+	0xF5, 0x74, 0xF5, 0x75, 0x90, 0x07, 0x2F, 0xF0,
+/*0460*/0xF5, 0x3C, 0xF5, 0x3E, 0xF5, 0x46, 0xF5, 0x47,
+	0xF5, 0x3D, 0xF5, 0x3F, 0xF5, 0x6F, 0xE5, 0x6F,
+/*0470*/0x70, 0x0F, 0xE5, 0x6B, 0x45, 0x6A, 0x12, 0x07,
+	0x2A, 0x75, 0x83, 0x80, 0x74, 0x3A, 0xF0, 0x80,
+/*0480*/0x09, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74,
+	0x1A, 0xF0, 0xE4, 0xF5, 0x6E, 0xC3, 0x74, 0x3F,
+/*0490*/0x95, 0x6E, 0xFF, 0x12, 0x08, 0x65, 0x75, 0x83,
+	0x82, 0xEF, 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x08,
+/*04A0*/0xC6, 0xE5, 0x33, 0xF0, 0x12, 0x08, 0xFA, 0x12,
+	0x08, 0xB1, 0x40, 0xE1, 0xE5, 0x6F, 0x70, 0x0B,
+/*04B0*/0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74, 0x36,
+	0xF0, 0x80, 0x09, 0x12, 0x07, 0x2A, 0x75, 0x83,
+/*04C0*/0x80, 0x74, 0x16, 0xF0, 0x75, 0x6E, 0x01, 0x12,
+	0x07, 0x2A, 0x75, 0x83, 0xB4, 0xE5, 0x6E, 0xF0,
+/*04D0*/0x12, 0x1A, 0x4D, 0x74, 0x3F, 0x25, 0x6E, 0xF5,
+	0x82, 0xE4, 0x34, 0x00, 0xF5, 0x83, 0xE5, 0x33,
+/*04E0*/0xF0, 0x74, 0xBF, 0x25, 0x6E, 0xF5, 0x82, 0xE4,
+	0x34, 0x00, 0x12, 0x08, 0xB1, 0x40, 0xD8, 0xE4,
+/*04F0*/0xF5, 0x70, 0xF5, 0x46, 0xF5, 0x47, 0xF5, 0x6E,
+	0x12, 0x08, 0xFA, 0xF5, 0x83, 0xE0, 0xFE, 0x12,
+/*0500*/0x08, 0xC6, 0xE0, 0x7C, 0x00, 0x24, 0x00, 0xFF,
+	0xEC, 0x3E, 0xFE, 0xAD, 0x3B, 0xD3, 0xEF, 0x9D,
+/*0510*/0xEE, 0x9C, 0x50, 0x04, 0x7B, 0x01, 0x80, 0x02,
+	0x7B, 0x00, 0xE5, 0x70, 0x70, 0x04, 0x7A, 0x01,
+/*0520*/0x80, 0x02, 0x7A, 0x00, 0xEB, 0x5A, 0x60, 0x06,
+	0x85, 0x6E, 0x46, 0x75, 0x70, 0x01, 0xD3, 0xEF,
+/*0530*/0x9D, 0xEE, 0x9C, 0x50, 0x04, 0x7F, 0x01, 0x80,
+	0x02, 0x7F, 0x00, 0xE5, 0x70, 0xB4, 0x01, 0x04,
+/*0540*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF, 0x5E,
+	0x60, 0x03, 0x85, 0x6E, 0x47, 0x05, 0x6E, 0xE5,
+/*0550*/0x6E, 0x64, 0x7F, 0x70, 0xA3, 0xE5, 0x46, 0x60,
+	0x05, 0xE5, 0x47, 0xB4, 0x7E, 0x03, 0x85, 0x46,
+/*0560*/0x47, 0xE5, 0x6F, 0x70, 0x08, 0x85, 0x46, 0x76,
+	0x85, 0x47, 0x77, 0x80, 0x0E, 0xC3, 0x74, 0x7F,
+/*0570*/0x95, 0x46, 0xF5, 0x78, 0xC3, 0x74, 0x7F, 0x95,
+	0x47, 0xF5, 0x79, 0xE5, 0x6F, 0x70, 0x37, 0xE5,
+/*0580*/0x46, 0x65, 0x47, 0x70, 0x0C, 0x75, 0x73, 0x01,
+	0x75, 0x74, 0x01, 0xF5, 0x3C, 0xF5, 0x3D, 0x80,
+/*0590*/0x35, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5, 0x47, 0x95,
+	0x46, 0xF5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
+/*05A0*/0x46, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
+	0xE4, 0xF5, 0x3D, 0x80, 0x40, 0xC3, 0x74, 0x3F,
+/*05B0*/0x95, 0x72, 0xF5, 0x3D, 0x80, 0x37, 0xE5, 0x46,
+	0x65, 0x47, 0x70, 0x0F, 0x75, 0x73, 0x01, 0x75,
+/*05C0*/0x75, 0x01, 0xF5, 0x3E, 0xF5, 0x3F, 0x75, 0x4E,
+	0x01, 0x80, 0x22, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5,
+/*05D0*/0x47, 0x95, 0x46, 0xF5, 0x3E, 0xC3, 0x13, 0xF5,
+	0x71, 0x25, 0x46, 0xF5, 0x72, 0xD3, 0x94, 0x3F,
+/*05E0*/0x50, 0x05, 0xE4, 0xF5, 0x3F, 0x80, 0x06, 0xE5,
+	0x72, 0x24, 0xC1, 0xF5, 0x3F, 0x05, 0x6F, 0xE5,
+/*05F0*/0x6F, 0xC3, 0x94, 0x02, 0x50, 0x03, 0x02, 0x04,
+	0x6E, 0xE5, 0x6D, 0x45, 0x6C, 0x70, 0x02, 0x80,
+/*0600*/0x04, 0xE5, 0x74, 0x45, 0x75, 0x90, 0x07, 0x2F,
+	0xF0, 0x7F, 0x01, 0xE5, 0x3E, 0x60, 0x04, 0xE5,
+/*0610*/0x3C, 0x70, 0x14, 0xE4, 0xF5, 0x3C, 0xF5, 0x3D,
+	0xF5, 0x3E, 0xF5, 0x3F, 0x12, 0x08, 0xD2, 0x70,
+/*0620*/0x04, 0xF0, 0x02, 0x06, 0xA4, 0x80, 0x7A, 0xE5,
+	0x3C, 0xC3, 0x95, 0x3E, 0x40, 0x07, 0xE5, 0x3C,
+/*0630*/0x95, 0x3E, 0xFF, 0x80, 0x06, 0xC3, 0xE5, 0x3E,
+	0x95, 0x3C, 0xFF, 0xE5, 0x76, 0xD3, 0x95, 0x79,
+/*0640*/0x40, 0x05, 0x85, 0x76, 0x7A, 0x80, 0x03, 0x85,
+	0x79, 0x7A, 0xE5, 0x77, 0xC3, 0x95, 0x78, 0x50,
+/*0650*/0x05, 0x85, 0x77, 0x7B, 0x80, 0x03, 0x85, 0x78,
+	0x7B, 0xE5, 0x7B, 0xD3, 0x95, 0x7A, 0x40, 0x30,
+/*0660*/0xE5, 0x7B, 0x95, 0x7A, 0xF5, 0x3C, 0xF5, 0x3E,
+	0xC3, 0xE5, 0x7B, 0x95, 0x7A, 0x90, 0x07, 0x19,
+/*0670*/0xF0, 0xE5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
+	0x7A, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
+/*0680*/0xE4, 0xF5, 0x3D, 0x80, 0x1F, 0xC3, 0x74, 0x3F,
+	0x95, 0x72, 0xF5, 0x3D, 0xF5, 0x3F, 0x80, 0x14,
+/*0690*/0xE4, 0xF5, 0x3C, 0xF5, 0x3E, 0x90, 0x07, 0x19,
+	0xF0, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
+/*06A0*/0x03, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x65, 0x75,
+	0x83, 0xD0, 0xE0, 0x54, 0x0F, 0xFE, 0xAD, 0x3C,
+/*06B0*/0x70, 0x02, 0x7E, 0x07, 0xBE, 0x0F, 0x02, 0x7E,
+	0x80, 0xEE, 0xFB, 0xEF, 0xD3, 0x9B, 0x74, 0x80,
+/*06C0*/0xF8, 0x98, 0x40, 0x1F, 0xE4, 0xF5, 0x3C, 0xF5,
+	0x3E, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
+/*06D0*/0x12, 0x74, 0x01, 0xF0, 0xE5, 0x08, 0xFB, 0xEB,
+	0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xD2, 0xE0,
+/*06E0*/0x44, 0x10, 0xF0, 0xE5, 0x08, 0xFB, 0xEB, 0x44,
+	0x09, 0xF5, 0x82, 0x75, 0x83, 0x9E, 0xED, 0xF0,
+/*06F0*/0xEB, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xCA,
+	0xED, 0xF0, 0x12, 0x08, 0x65, 0x75, 0x83, 0xCC,
+/*0700*/0xEF, 0xF0, 0x22, 0xE5, 0x08, 0x44, 0x07, 0xF5,
+	0x82, 0x75, 0x83, 0xBC, 0xE0, 0x54, 0xF0, 0xF0,
+/*0710*/0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83,
+	0xBE, 0xE0, 0x54, 0xF0, 0xF0, 0xE5, 0x08, 0x44,
+/*0720*/0x07, 0xF5, 0x82, 0x75, 0x83, 0xC0, 0xE0, 0x54,
+	0xF0, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
+/*0730*/0x22, 0xF0, 0x90, 0x07, 0x28, 0xE0, 0xFE, 0xA3,
+	0xE0, 0xF5, 0x82, 0x8E, 0x83, 0x22, 0x85, 0x42,
+/*0740*/0x42, 0x85, 0x41, 0x41, 0x85, 0x40, 0x40, 0x74,
+	0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E, 0xF5,
+/*0750*/0x83, 0xE5, 0x42, 0xF0, 0x74, 0xE0, 0x2F, 0xF5,
+	0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xE5,
+/*0760*/0x42, 0x29, 0xFD, 0xE4, 0x33, 0xFC, 0xE5, 0x3C,
+	0xC3, 0x9D, 0xEC, 0x64, 0x80, 0xF8, 0x74, 0x80,
+/*0770*/0x98, 0x22, 0xF5, 0x83, 0xE0, 0x90, 0x07, 0x22,
+	0x54, 0x1F, 0xFD, 0xE0, 0xFA, 0xA3, 0xE0, 0xF5,
+/*0780*/0x82, 0x8A, 0x83, 0xED, 0xF0, 0x22, 0x90, 0x07,
+	0x22, 0xE0, 0xFC, 0xA3, 0xE0, 0xF5, 0x82, 0x8C,
+/*0790*/0x83, 0x22, 0x90, 0x07, 0x24, 0xFF, 0xED, 0x44,
+	0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22, 0x85,
+/*07A0*/0x38, 0x38, 0x85, 0x39, 0x39, 0x85, 0x3A, 0x3A,
+	0x74, 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E,
+/*07B0*/0xF5, 0x83, 0x22, 0x90, 0x07, 0x26, 0xFF, 0xED,
+	0x44, 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22,
+/*07C0*/0xF0, 0x74, 0xA0, 0x2F, 0xF5, 0x82, 0x74, 0x02,
+	0x3E, 0xF5, 0x83, 0x22, 0x74, 0xC0, 0x25, 0x11,
+/*07D0*/0xF5, 0x82, 0xE4, 0x34, 0x01, 0xF5, 0x83, 0x22,
+	0x74, 0x00, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+/*07E0*/0x02, 0xF5, 0x83, 0x22, 0x74, 0x60, 0x25, 0x11,
+	0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
+/*07F0*/0x74, 0x80, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+	0x03, 0xF5, 0x83, 0x22, 0x74, 0xE0, 0x25, 0x11,
+/*0800*/0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
+	0x74, 0x40, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+/*0810*/0x06, 0xF5, 0x83, 0x22, 0x74, 0x80, 0x2F, 0xF5,
+	0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xAF,
+/*0820*/0x08, 0x7E, 0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+	0x22, 0xF5, 0x83, 0xE5, 0x82, 0x44, 0x07, 0xF5,
+/*0830*/0x82, 0xE5, 0x40, 0xF0, 0x22, 0x74, 0x40, 0x25,
+	0x11, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
+/*0840*/0x22, 0x74, 0xC0, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+	0x34, 0x03, 0xF5, 0x83, 0x22, 0x74, 0x00, 0x25,
+/*0850*/0x11, 0xF5, 0x82, 0xE4, 0x34, 0x06, 0xF5, 0x83,
+	0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+/*0860*/0x34, 0x06, 0xF5, 0x83, 0x22, 0xE5, 0x08, 0xFD,
+	0xED, 0x44, 0x07, 0xF5, 0x82, 0x22, 0xE5, 0x41,
+/*0870*/0xF0, 0xE5, 0x65, 0x64, 0x01, 0x45, 0x64, 0x22,
+	0x7E, 0x00, 0xFB, 0x7A, 0x00, 0xFD, 0x7C, 0x00,
+/*0880*/0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+	0x34, 0x02, 0x22, 0x74, 0xA0, 0x25, 0x11, 0xF5,
+/*0890*/0x82, 0xE4, 0x34, 0x03, 0x22, 0x85, 0x3E, 0x42,
+	0x85, 0x3F, 0x41, 0x8F, 0x40, 0x22, 0x85, 0x3C,
+/*08A0*/0x42, 0x85, 0x3D, 0x41, 0x8F, 0x40, 0x22, 0x75,
+	0x45, 0x3F, 0x90, 0x07, 0x20, 0xE4, 0xF0, 0xA3,
+/*08B0*/0x22, 0xF5, 0x83, 0xE5, 0x32, 0xF0, 0x05, 0x6E,
+	0xE5, 0x6E, 0xC3, 0x94, 0x40, 0x22, 0xF0, 0xE5,
+/*08C0*/0x08, 0x44, 0x06, 0xF5, 0x82, 0x22, 0x74, 0x00,
+	0x25, 0x6E, 0xF5, 0x82, 0xE4, 0x34, 0x00, 0xF5,
+/*08D0*/0x83, 0x22, 0xE5, 0x6D, 0x45, 0x6C, 0x90, 0x07,
+	0x2F, 0x22, 0xE4, 0xF9, 0xE5, 0x3C, 0xD3, 0x95,
+/*08E0*/0x3E, 0x22, 0x74, 0x80, 0x2E, 0xF5, 0x82, 0xE4,
+	0x34, 0x02, 0xF5, 0x83, 0xE0, 0x22, 0x74, 0xA0,
+/*08F0*/0x2E, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
+	0xE0, 0x22, 0x74, 0x80, 0x25, 0x6E, 0xF5, 0x82,
+/*0900*/0xE4, 0x34, 0x00, 0x22, 0x25, 0x42, 0xFD, 0xE4,
+	0x33, 0xFC, 0x22, 0x85, 0x42, 0x42, 0x85, 0x41,
+/*0910*/0x41, 0x85, 0x40, 0x40, 0x22, 0xED, 0x4C, 0x60,
+	0x03, 0x02, 0x09, 0xE5, 0xEF, 0x4E, 0x70, 0x37,
+/*0920*/0x90, 0x07, 0x26, 0x12, 0x07, 0x89, 0xE0, 0xFD,
+	0x12, 0x07, 0xCC, 0xED, 0xF0, 0x90, 0x07, 0x28,
+/*0930*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xD8,
+	0xED, 0xF0, 0x12, 0x07, 0x86, 0xE0, 0x54, 0x1F,
+/*0940*/0xFD, 0x12, 0x08, 0x81, 0xF5, 0x83, 0xED, 0xF0,
+	0x90, 0x07, 0x24, 0x12, 0x07, 0x89, 0xE0, 0x54,
+/*0950*/0x1F, 0xFD, 0x12, 0x08, 0x35, 0xED, 0xF0, 0xEF,
+	0x64, 0x04, 0x4E, 0x70, 0x37, 0x90, 0x07, 0x26,
+/*0960*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xE4,
+	0xED, 0xF0, 0x90, 0x07, 0x28, 0x12, 0x07, 0x89,
+/*0970*/0xE0, 0xFD, 0x12, 0x07, 0xF0, 0xED, 0xF0, 0x12,
+	0x07, 0x86, 0xE0, 0x54, 0x1F, 0xFD, 0x12, 0x08,
+/*0980*/0x8B, 0xF5, 0x83, 0xED, 0xF0, 0x90, 0x07, 0x24,
+	0x12, 0x07, 0x89, 0xE0, 0x54, 0x1F, 0xFD, 0x12,
+/*0990*/0x08, 0x41, 0xED, 0xF0, 0xEF, 0x64, 0x01, 0x4E,
+	0x70, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
+/*09A0*/0xEF, 0x64, 0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01,
+	0x80, 0x02, 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x78,
+/*09B0*/0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE0, 0xFF,
+	0x12, 0x07, 0xFC, 0xEF, 0x12, 0x07, 0x31, 0xE0,
+/*09C0*/0xFF, 0x12, 0x08, 0x08, 0xEF, 0xF0, 0x90, 0x07,
+	0x22, 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF,
+/*09D0*/0x12, 0x08, 0x4D, 0xEF, 0xF0, 0x90, 0x07, 0x24,
+	0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
+/*09E0*/0x08, 0x59, 0xEF, 0xF0, 0x22, 0x12, 0x07, 0xCC,
+	0xE4, 0xF0, 0x12, 0x07, 0xD8, 0xE4, 0xF0, 0x12,
+/*09F0*/0x08, 0x81, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08,
+	0x35, 0x74, 0x14, 0xF0, 0x12, 0x07, 0xE4, 0xE4,
+/*0A00*/0xF0, 0x12, 0x07, 0xF0, 0xE4, 0xF0, 0x12, 0x08,
+	0x8B, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08, 0x41,
+/*0A10*/0x74, 0x14, 0xF0, 0x12, 0x07, 0xFC, 0xE4, 0xF0,
+	0x12, 0x08, 0x08, 0xE4, 0xF0, 0x12, 0x08, 0x4D,
+/*0A20*/0xE4, 0xF0, 0x12, 0x08, 0x59, 0x74, 0x14, 0xF0,
+	0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFC, 0x10, 0xE4,
+/*0A30*/0xF5, 0xFD, 0x75, 0xFE, 0x30, 0xF5, 0xFF, 0xE5,
+	0xE7, 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0xE5,
+/*0A40*/0xE6, 0x20, 0xE7, 0x0B, 0x78, 0xFF, 0xE4, 0xF6,
+	0xD8, 0xFD, 0x53, 0xE6, 0xFE, 0x80, 0x09, 0x78,
+/*0A50*/0x08, 0xE4, 0xF6, 0xD8, 0xFD, 0x53, 0xE6, 0xFE,
+	0x75, 0x81, 0x80, 0xE4, 0xF5, 0xA8, 0xD2, 0xA8,
+/*0A60*/0xC2, 0xA9, 0xD2, 0xAF, 0xE5, 0xE2, 0x20, 0xE5,
+	0x05, 0x20, 0xE6, 0x02, 0x80, 0x03, 0x43, 0xE1,
+/*0A70*/0x02, 0xE5, 0xE2, 0x20, 0xE0, 0x0E, 0x90, 0x00,
+	0x00, 0x7F, 0x00, 0x7E, 0x08, 0xE4, 0xF0, 0xA3,
+/*0A80*/0xDF, 0xFC, 0xDE, 0xFA, 0x02, 0x0A, 0xDB, 0x43,
+	0xFA, 0x01, 0xC0, 0xE0, 0xC0, 0xF0, 0xC0, 0x83,
+/*0A90*/0xC0, 0x82, 0xC0, 0xD0, 0x12, 0x1C, 0xE7, 0xD0,
+	0xD0, 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0xF0, 0xD0,
+/*0AA0*/0xE0, 0x53, 0xFA, 0xFE, 0x32, 0x02, 0x1B, 0x55,
+	0xE4, 0x93, 0xA3, 0xF8, 0xE4, 0x93, 0xA3, 0xF6,
+/*0AB0*/0x08, 0xDF, 0xF9, 0x80, 0x29, 0xE4, 0x93, 0xA3,
+	0xF8, 0x54, 0x07, 0x24, 0x0C, 0xC8, 0xC3, 0x33,
+/*0AC0*/0xC4, 0x54, 0x0F, 0x44, 0x20, 0xC8, 0x83, 0x40,
+	0x04, 0xF4, 0x56, 0x80, 0x01, 0x46, 0xF6, 0xDF,
+/*0AD0*/0xE4, 0x80, 0x0B, 0x01, 0x02, 0x04, 0x08, 0x10,
+	0x20, 0x40, 0x80, 0x90, 0x00, 0x3F, 0xE4, 0x7E,
+/*0AE0*/0x01, 0x93, 0x60, 0xC1, 0xA3, 0xFF, 0x54, 0x3F,
+	0x30, 0xE5, 0x09, 0x54, 0x1F, 0xFE, 0xE4, 0x93,
+/*0AF0*/0xA3, 0x60, 0x01, 0x0E, 0xCF, 0x54, 0xC0, 0x25,
+	0xE0, 0x60, 0xAD, 0x40, 0xB8, 0x80, 0xFE, 0x8C,
+/*0B00*/0x64, 0x8D, 0x65, 0x8A, 0x66, 0x8B, 0x67, 0xE4,
+	0xF5, 0x69, 0xEF, 0x4E, 0x70, 0x03, 0x02, 0x1D,
+/*0B10*/0x55, 0xE4, 0xF5, 0x68, 0xE5, 0x67, 0x45, 0x66,
+	0x70, 0x32, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90,
+/*0B20*/0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE4,
+	0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4, 0x12,
+/*0B30*/0x08, 0x70, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0x92, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*0B40*/0xC6, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8,
+	0xE4, 0xF0, 0x80, 0x11, 0x90, 0x07, 0x26, 0x12,
+/*0B50*/0x07, 0x35, 0xE4, 0x12, 0x08, 0x70, 0x70, 0x05,
+	0x12, 0x07, 0x32, 0xE4, 0xF0, 0x12, 0x1D, 0x55,
+/*0B60*/0x12, 0x1E, 0xBF, 0xE5, 0x67, 0x45, 0x66, 0x70,
+	0x33, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90, 0xE5,
+/*0B70*/0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
+	0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x12,
+/*0B80*/0x08, 0x6E, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0x92, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
+/*0B90*/0x83, 0xC6, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
+	0x83, 0xC8, 0x80, 0x0E, 0x90, 0x07, 0x26, 0x12,
+/*0BA0*/0x07, 0x35, 0x12, 0x08, 0x6E, 0x70, 0x06, 0x12,
+	0x07, 0x32, 0xE5, 0x40, 0xF0, 0xAF, 0x69, 0x7E,
+/*0BB0*/0x00, 0xAD, 0x67, 0xAC, 0x66, 0x12, 0x04, 0x44,
+	0x12, 0x07, 0x2A, 0x75, 0x83, 0xCA, 0xE0, 0xD3,
+/*0BC0*/0x94, 0x00, 0x50, 0x0C, 0x05, 0x68, 0xE5, 0x68,
+	0xC3, 0x94, 0x05, 0x50, 0x03, 0x02, 0x0B, 0x14,
+/*0BD0*/0x22, 0x8C, 0x60, 0x8D, 0x61, 0x12, 0x08, 0xDA,
+	0x74, 0x20, 0x40, 0x0D, 0x2F, 0xF5, 0x82, 0x74,
+/*0BE0*/0x03, 0x3E, 0xF5, 0x83, 0xE5, 0x3E, 0xF0, 0x80,
+	0x0B, 0x2F, 0xF5, 0x82, 0x74, 0x03, 0x3E, 0xF5,
+/*0BF0*/0x83, 0xE5, 0x3C, 0xF0, 0xE5, 0x3C, 0xD3, 0x95,
+	0x3E, 0x40, 0x3C, 0xE5, 0x61, 0x45, 0x60, 0x70,
+/*0C00*/0x10, 0xE9, 0x12, 0x09, 0x04, 0xE5, 0x3E, 0x12,
+	0x07, 0x68, 0x40, 0x3B, 0x12, 0x08, 0x95, 0x80,
+/*0C10*/0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40, 0x1D,
+	0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05, 0x85,
+/*0C20*/0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F,
+	0x3A, 0x12, 0x08, 0x14, 0xE5, 0x3E, 0x12, 0x07,
+/*0C30*/0xC0, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x43, 0xE5,
+	0x61, 0x45, 0x60, 0x70, 0x19, 0x12, 0x07, 0x5F,
+/*0C40*/0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x27, 0x12,
+	0x09, 0x0B, 0x12, 0x08, 0x14, 0xE5, 0x42, 0x12,
+/*0C50*/0x07, 0xC0, 0xE5, 0x41, 0xF0, 0x22, 0xE5, 0x3C,
+	0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C, 0x38,
+/*0C60*/0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39, 0x80,
+	0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x08,
+/*0C70*/0x14, 0xE5, 0x3C, 0x12, 0x07, 0xC0, 0xE5, 0x3D,
+	0xF0, 0x22, 0x85, 0x38, 0x38, 0x85, 0x39, 0x39,
+/*0C80*/0x85, 0x3A, 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x38,
+	0x12, 0x07, 0xC0, 0xE5, 0x39, 0xF0, 0x22, 0x7F,
+/*0C90*/0x06, 0x12, 0x17, 0x31, 0x12, 0x1D, 0x23, 0x12,
+	0x0E, 0x04, 0x12, 0x0E, 0x33, 0xE0, 0x44, 0x0A,
+/*0CA0*/0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E, 0x04, 0x12,
+	0x0E, 0x0B, 0xEF, 0xF0, 0xE5, 0x28, 0x30, 0xE5,
+/*0CB0*/0x03, 0xD3, 0x80, 0x01, 0xC3, 0x40, 0x05, 0x75,
+	0x14, 0x20, 0x80, 0x03, 0x75, 0x14, 0x08, 0x12,
+/*0CC0*/0x0E, 0x04, 0x75, 0x83, 0x8A, 0xE5, 0x14, 0xF0,
+	0xB4, 0xFF, 0x05, 0x75, 0x12, 0x80, 0x80, 0x06,
+/*0CD0*/0xE5, 0x14, 0xC3, 0x13, 0xF5, 0x12, 0xE4, 0xF5,
+	0x16, 0xF5, 0x7F, 0x12, 0x19, 0x36, 0x12, 0x13,
+/*0CE0*/0xA3, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x50, 0x09,
+	0x05, 0x16, 0xE5, 0x16, 0xC3, 0x94, 0x14, 0x40,
+/*0CF0*/0xEA, 0xE5, 0xE4, 0x20, 0xE7, 0x28, 0x12, 0x0E,
+	0x04, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
+/*0D00*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+	0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
+/*0D10*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
+	0x5E, 0x60, 0x03, 0x12, 0x1D, 0xD7, 0xE5, 0x7F,
+/*0D20*/0xC3, 0x94, 0x11, 0x40, 0x14, 0x12, 0x0E, 0x04,
+	0x75, 0x83, 0xD2, 0xE0, 0x44, 0x80, 0xF0, 0xE5,
+/*0D30*/0xE4, 0x20, 0xE7, 0x0F, 0x12, 0x1D, 0xD7, 0x80,
+	0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0xD2, 0xE0,
+/*0D40*/0x54, 0x7F, 0xF0, 0x12, 0x1D, 0x23, 0x22, 0x74,
+	0x8A, 0x85, 0x08, 0x82, 0xF5, 0x83, 0xE5, 0x17,
+/*0D50*/0xF0, 0x12, 0x0E, 0x3A, 0xE4, 0xF0, 0x90, 0x07,
+	0x02, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x90,
+/*0D60*/0xEF, 0xF0, 0x74, 0x92, 0xFE, 0xE5, 0x08, 0x44,
+	0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0, 0x54,
+/*0D70*/0xC0, 0xFD, 0x90, 0x07, 0x03, 0xE0, 0x54, 0x3F,
+	0x4D, 0x8F, 0x82, 0x8E, 0x83, 0xF0, 0x90, 0x07,
+/*0D80*/0x04, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x82,
+	0xEF, 0xF0, 0x90, 0x07, 0x05, 0xE0, 0xFF, 0xED,
+/*0D90*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xB4, 0xEF,
+	0x12, 0x0E, 0x03, 0x75, 0x83, 0x80, 0xE0, 0x54,
+/*0DA0*/0xBF, 0xF0, 0x30, 0x37, 0x0A, 0x12, 0x0E, 0x91,
+	0x75, 0x83, 0x94, 0xE0, 0x44, 0x80, 0xF0, 0x30,
+/*0DB0*/0x38, 0x0A, 0x12, 0x0E, 0x91, 0x75, 0x83, 0x92,
+	0xE0, 0x44, 0x80, 0xF0, 0xE5, 0x28, 0x30, 0xE4,
+/*0DC0*/0x1A, 0x20, 0x39, 0x0A, 0x12, 0x0E, 0x04, 0x75,
+	0x83, 0x88, 0xE0, 0x54, 0x7F, 0xF0, 0x20, 0x3A,
+/*0DD0*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x88, 0xE0,
+	0x54, 0xBF, 0xF0, 0x74, 0x8C, 0xFE, 0x12, 0x0E,
+/*0DE0*/0x04, 0x8E, 0x83, 0xE0, 0x54, 0x0F, 0x12, 0x0E,
+	0x03, 0x75, 0x83, 0x86, 0xE0, 0x54, 0xBF, 0xF0,
+/*0DF0*/0xE5, 0x08, 0x44, 0x06, 0x12, 0x0D, 0xFD, 0x75,
+	0x83, 0x8A, 0xE4, 0xF0, 0x22, 0xF5, 0x82, 0x75,
+/*0E00*/0x83, 0x82, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07,
+	0xF5, 0x82, 0x22, 0x8E, 0x83, 0xE0, 0xF5, 0x10,
+/*0E10*/0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44, 0x01, 0xFF,
+	0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07, 0xF5, 0x82,
+/*0E20*/0x22, 0xE5, 0x15, 0xC4, 0x54, 0x07, 0xFF, 0xE5,
+	0x08, 0xFD, 0xED, 0x44, 0x08, 0xF5, 0x82, 0x75,
+/*0E30*/0x83, 0x82, 0x22, 0x75, 0x83, 0x80, 0xE0, 0x44,
+	0x40, 0xF0, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
+/*0E40*/0x75, 0x83, 0x8A, 0x22, 0xE5, 0x16, 0x25, 0xE0,
+	0x25, 0xE0, 0x24, 0xAF, 0xF5, 0x82, 0xE4, 0x34,
+/*0E50*/0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0D, 0x22,
+	0x43, 0xE1, 0x10, 0x43, 0xE1, 0x80, 0x53, 0xE1,
+/*0E60*/0xFD, 0x85, 0xE1, 0x10, 0x22, 0xE5, 0x16, 0x25,
+	0xE0, 0x25, 0xE0, 0x24, 0xB2, 0xF5, 0x82, 0xE4,
+/*0E70*/0x34, 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0x22, 0x85,
+	0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15, 0xF0,
+/*0E80*/0x22, 0xE5, 0xE2, 0x54, 0x20, 0xD3, 0x94, 0x00,
+	0x22, 0xE5, 0xE2, 0x54, 0x40, 0xD3, 0x94, 0x00,
+/*0E90*/0x22, 0xE5, 0x08, 0x44, 0x06, 0xF5, 0x82, 0x22,
+	0xFD, 0xE5, 0x08, 0xFB, 0xEB, 0x44, 0x07, 0xF5,
+/*0EA0*/0x82, 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFE, 0x30,
+	0x22, 0xEF, 0x4E, 0x70, 0x26, 0x12, 0x07, 0xCC,
+/*0EB0*/0xE0, 0xFD, 0x90, 0x07, 0x26, 0x12, 0x07, 0x7B,
+	0x12, 0x07, 0xD8, 0xE0, 0xFD, 0x90, 0x07, 0x28,
+/*0EC0*/0x12, 0x07, 0x7B, 0x12, 0x08, 0x81, 0x12, 0x07,
+	0x72, 0x12, 0x08, 0x35, 0xE0, 0x90, 0x07, 0x24,
+/*0ED0*/0x12, 0x07, 0x78, 0xEF, 0x64, 0x04, 0x4E, 0x70,
+	0x29, 0x12, 0x07, 0xE4, 0xE0, 0xFD, 0x90, 0x07,
+/*0EE0*/0x26, 0x12, 0x07, 0x7B, 0x12, 0x07, 0xF0, 0xE0,
+	0xFD, 0x90, 0x07, 0x28, 0x12, 0x07, 0x7B, 0x12,
+/*0EF0*/0x08, 0x8B, 0x12, 0x07, 0x72, 0x12, 0x08, 0x41,
+	0xE0, 0x54, 0x1F, 0xFD, 0x90, 0x07, 0x24, 0x12,
+/*0F00*/0x07, 0x7B, 0xEF, 0x64, 0x01, 0x4E, 0x70, 0x04,
+	0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0xEF, 0x64,
+/*0F10*/0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02,
+	0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x35, 0x12, 0x07,
+/*0F20*/0xFC, 0xE0, 0xFF, 0x90, 0x07, 0x26, 0x12, 0x07,
+	0x89, 0xEF, 0xF0, 0x12, 0x08, 0x08, 0xE0, 0xFF,
+/*0F30*/0x90, 0x07, 0x28, 0x12, 0x07, 0x89, 0xEF, 0xF0,
+	0x12, 0x08, 0x4D, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
+/*0F40*/0x07, 0x86, 0xEF, 0xF0, 0x12, 0x08, 0x59, 0xE0,
+	0x54, 0x1F, 0xFF, 0x90, 0x07, 0x24, 0x12, 0x07,
+/*0F50*/0x89, 0xEF, 0xF0, 0x22, 0xE4, 0xF5, 0x53, 0x12,
+	0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+/*0F60*/0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40, 0x04, 0x7E,
+	0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x70,
+/*0F70*/0x03, 0x02, 0x0F, 0xF6, 0x85, 0xE1, 0x10, 0x43,
+	0xE1, 0x02, 0x53, 0xE1, 0x0F, 0x85, 0xE1, 0x10,
+/*0F80*/0xE4, 0xF5, 0x51, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
+	0x52, 0x12, 0x0E, 0x89, 0x40, 0x1D, 0xAD, 0x52,
+/*0F90*/0xAF, 0x51, 0x12, 0x11, 0x18, 0xEF, 0x60, 0x08,
+	0x85, 0xE1, 0x10, 0x43, 0xE1, 0x40, 0x80, 0x0B,
+/*0FA0*/0x53, 0xE1, 0xBF, 0x12, 0x0E, 0x58, 0x12, 0x00,
+	0x06, 0x80, 0xFB, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
+/*0FB0*/0x51, 0xE5, 0xE4, 0x54, 0x3F, 0xF5, 0x52, 0x12,
+	0x0E, 0x81, 0x40, 0x1D, 0xAD, 0x52, 0xAF, 0x51,
+/*0FC0*/0x12, 0x11, 0x18, 0xEF, 0x60, 0x08, 0x85, 0xE1,
+	0x10, 0x43, 0xE1, 0x20, 0x80, 0x0B, 0x53, 0xE1,
+/*0FD0*/0xDF, 0x12, 0x0E, 0x58, 0x12, 0x00, 0x06, 0x80,
+	0xFB, 0x12, 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01,
+/*0FE0*/0x80, 0x02, 0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40,
+	0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
+/*0FF0*/0x4F, 0x60, 0x03, 0x12, 0x0E, 0x5B, 0x22, 0x12,
+	0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x22,
+/*1000*/0x02, 0x11, 0x00, 0x02, 0x10, 0x40, 0x02, 0x10,
+	0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1010*/0x01, 0x20, 0x01, 0x20, 0xE4, 0xF5, 0x57, 0x12,
+	0x16, 0xBD, 0x12, 0x16, 0x44, 0xE4, 0x12, 0x10,
+/*1020*/0x56, 0x12, 0x14, 0xB7, 0x90, 0x07, 0x26, 0x12,
+	0x07, 0x35, 0xE4, 0x12, 0x07, 0x31, 0xE4, 0xF0,
+/*1030*/0x12, 0x10, 0x56, 0x12, 0x14, 0xB7, 0x90, 0x07,
+	0x26, 0x12, 0x07, 0x35, 0xE5, 0x41, 0x12, 0x07,
+/*1040*/0x31, 0xE5, 0x40, 0xF0, 0xAF, 0x57, 0x7E, 0x00,
+	0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
+/*1050*/0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xFF, 0x90,
+	0x07, 0x20, 0xA3, 0xE0, 0xFD, 0xE4, 0xF5, 0x56,
+/*1060*/0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x12,
+	0x11, 0x51, 0x7F, 0x0F, 0x7D, 0x18, 0xE4, 0xF5,
+/*1070*/0x56, 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA,
+	0x12, 0x15, 0x41, 0xAF, 0x56, 0x7E, 0x00, 0x12,
+/*1080*/0x1A, 0xFF, 0xE4, 0xFF, 0xF5, 0x56, 0x7D, 0x1F,
+	0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x22,
+/*1090*/0x22, 0xE4, 0xF5, 0x55, 0xE5, 0x08, 0xFD, 0x74,
+	0xA0, 0xF5, 0x56, 0xED, 0x44, 0x07, 0xF5, 0x57,
+/*10A0*/0xE5, 0x28, 0x30, 0xE5, 0x03, 0xD3, 0x80, 0x01,
+	0xC3, 0x40, 0x05, 0x7F, 0x28, 0xEF, 0x80, 0x04,
+/*10B0*/0x7F, 0x14, 0xEF, 0xC3, 0x13, 0xF5, 0x54, 0xE4,
+	0xF9, 0x12, 0x0E, 0x18, 0x75, 0x83, 0x8E, 0xE0,
+/*10C0*/0xF5, 0x10, 0xCE, 0xEF, 0xCE, 0xEE, 0xD3, 0x94,
+	0x00, 0x40, 0x26, 0xE5, 0x10, 0x54, 0xFE, 0x12,
+/*10D0*/0x0E, 0x98, 0x75, 0x83, 0x8E, 0xED, 0xF0, 0xE5,
+	0x10, 0x44, 0x01, 0xFD, 0xEB, 0x44, 0x07, 0xF5,
+/*10E0*/0x82, 0xED, 0xF0, 0x85, 0x57, 0x82, 0x85, 0x56,
+	0x83, 0xE0, 0x30, 0xE3, 0x01, 0x09, 0x1E, 0x80,
+/*10F0*/0xD4, 0xC2, 0x34, 0xE9, 0xC3, 0x95, 0x54, 0x40,
+	0x02, 0xD2, 0x34, 0x22, 0x02, 0x00, 0x06, 0x22,
+/*1100*/0x30, 0x30, 0x11, 0x90, 0x10, 0x00, 0xE4, 0x93,
+	0xF5, 0x10, 0x90, 0x10, 0x10, 0xE4, 0x93, 0xF5,
+/*1110*/0x10, 0x12, 0x10, 0x90, 0x12, 0x11, 0x50, 0x22,
+	0xE4, 0xFC, 0xC3, 0xED, 0x9F, 0xFA, 0xEF, 0xF5,
+/*1120*/0x83, 0x75, 0x82, 0x00, 0x79, 0xFF, 0xE4, 0x93,
+	0xCC, 0x6C, 0xCC, 0xA3, 0xD9, 0xF8, 0xDA, 0xF6,
+/*1130*/0xE5, 0xE2, 0x30, 0xE4, 0x02, 0x8C, 0xE5, 0xED,
+	0x24, 0xFF, 0xFF, 0xEF, 0x75, 0x82, 0xFF, 0xF5,
+/*1140*/0x83, 0xE4, 0x93, 0x6C, 0x70, 0x03, 0x7F, 0x01,
+	0x22, 0x7F, 0x00, 0x22, 0x22, 0x11, 0x00, 0x00,
+/*1150*/0x22, 0x8E, 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D,
+	0x5B, 0x8A, 0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01,
+/*1160*/0xE4, 0xF5, 0x5F, 0xF5, 0x60, 0xF5, 0x62, 0x12,
+	0x07, 0x2A, 0x75, 0x83, 0xD0, 0xE0, 0xFF, 0xC4,
+/*1170*/0x54, 0x0F, 0xF5, 0x61, 0x12, 0x1E, 0xA5, 0x85,
+	0x59, 0x5E, 0xD3, 0xE5, 0x5E, 0x95, 0x5B, 0xE5,
+/*1180*/0x5A, 0x12, 0x07, 0x6B, 0x50, 0x4B, 0x12, 0x07,
+	0x03, 0x75, 0x83, 0xBC, 0xE0, 0x45, 0x5E, 0x12,
+/*1190*/0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x45, 0x5E,
+	0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0, 0x45,
+/*11A0*/0x5E, 0xF0, 0xAF, 0x5F, 0xE5, 0x60, 0x12, 0x08,
+	0x78, 0x12, 0x0A, 0xFF, 0xAF, 0x62, 0x7E, 0x00,
+/*11B0*/0xAD, 0x5D, 0xAC, 0x5C, 0x12, 0x04, 0x44, 0xE5,
+	0x61, 0xAF, 0x5E, 0x7E, 0x00, 0xB4, 0x03, 0x05,
+/*11C0*/0x12, 0x1E, 0x21, 0x80, 0x07, 0xAD, 0x5D, 0xAC,
+	0x5C, 0x12, 0x13, 0x17, 0x05, 0x5E, 0x02, 0x11,
+/*11D0*/0x7A, 0x12, 0x07, 0x03, 0x75, 0x83, 0xBC, 0xE0,
+	0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBE,
+/*11E0*/0xE0, 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83,
+	0xC0, 0xE0, 0x45, 0x40, 0xF0, 0x22, 0x8E, 0x58,
+/*11F0*/0x8F, 0x59, 0x75, 0x5A, 0x01, 0x79, 0x01, 0x75,
+	0x5B, 0x01, 0xE4, 0xFB, 0x12, 0x07, 0x2A, 0x75,
+/*1200*/0x83, 0xAE, 0xE0, 0x54, 0x1A, 0xFF, 0x12, 0x08,
+	0x65, 0xE0, 0xC4, 0x13, 0x54, 0x07, 0xFE, 0xEF,
+/*1210*/0x70, 0x0C, 0xEE, 0x65, 0x35, 0x70, 0x07, 0x90,
+	0x07, 0x2F, 0xE0, 0xB4, 0x01, 0x0D, 0xAF, 0x35,
+/*1220*/0x7E, 0x00, 0x12, 0x0E, 0xA9, 0xCF, 0xEB, 0xCF,
+	0x02, 0x1E, 0x60, 0xE5, 0x59, 0x64, 0x02, 0x45,
+/*1230*/0x58, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
+	0x00, 0xE5, 0x59, 0x45, 0x58, 0x70, 0x04, 0x7E,
+/*1240*/0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60,
+	0x23, 0x85, 0x41, 0x49, 0x85, 0x40, 0x4B, 0xE5,
+/*1250*/0x59, 0x45, 0x58, 0x70, 0x2C, 0xAF, 0x5A, 0xFE,
+	0xCD, 0xE9, 0xCD, 0xFC, 0xAB, 0x59, 0xAA, 0x58,
+/*1260*/0x12, 0x0A, 0xFF, 0xAF, 0x5B, 0x7E, 0x00, 0x12,
+	0x1E, 0x60, 0x80, 0x15, 0xAF, 0x5B, 0x7E, 0x00,
+/*1270*/0x12, 0x1E, 0x60, 0x90, 0x07, 0x26, 0x12, 0x07,
+	0x35, 0xE5, 0x49, 0x12, 0x07, 0x31, 0xE5, 0x4B,
+/*1280*/0xF0, 0xE4, 0xFD, 0xAF, 0x35, 0xFE, 0xFC, 0x12,
+	0x09, 0x15, 0x22, 0x8C, 0x64, 0x8D, 0x65, 0x12,
+/*1290*/0x08, 0xDA, 0x40, 0x3C, 0xE5, 0x65, 0x45, 0x64,
+	0x70, 0x10, 0x12, 0x09, 0x04, 0xC3, 0xE5, 0x3E,
+/*12A0*/0x12, 0x07, 0x69, 0x40, 0x3B, 0x12, 0x08, 0x95,
+	0x80, 0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40,
+/*12B0*/0x1D, 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05,
+	0x85, 0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39,
+/*12C0*/0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3E, 0x12,
+	0x07, 0x53, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x3B,
+/*12D0*/0xE5, 0x65, 0x45, 0x64, 0x70, 0x11, 0x12, 0x07,
+	0x5F, 0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x1F,
+/*12E0*/0x12, 0x07, 0x3E, 0xE5, 0x41, 0xF0, 0x22, 0xE5,
+	0x3C, 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C,
+/*12F0*/0x38, 0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39,
+	0x80, 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12,
+/*1300*/0x07, 0xA8, 0xE5, 0x3C, 0x12, 0x07, 0x53, 0xE5,
+	0x3D, 0xF0, 0x22, 0x12, 0x07, 0x9F, 0xE5, 0x38,
+/*1310*/0x12, 0x07, 0x53, 0xE5, 0x39, 0xF0, 0x22, 0x8C,
+	0x63, 0x8D, 0x64, 0x12, 0x08, 0xDA, 0x40, 0x3C,
+/*1320*/0xE5, 0x64, 0x45, 0x63, 0x70, 0x10, 0x12, 0x09,
+	0x04, 0xC3, 0xE5, 0x3E, 0x12, 0x07, 0x69, 0x40,
+/*1330*/0x3B, 0x12, 0x08, 0x95, 0x80, 0x18, 0xE5, 0x3E,
+	0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3E, 0x38,
+/*1340*/0xE5, 0x3E, 0x60, 0x05, 0x85, 0x3F, 0x39, 0x80,
+	0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x07,
+/*1350*/0xA8, 0xE5, 0x3E, 0x12, 0x07, 0x53, 0xE5, 0x3F,
+	0xF0, 0x22, 0x80, 0x3B, 0xE5, 0x64, 0x45, 0x63,
+/*1360*/0x70, 0x11, 0x12, 0x07, 0x5F, 0x40, 0x05, 0x12,
+	0x08, 0x9E, 0x80, 0x1F, 0x12, 0x07, 0x3E, 0xE5,
+/*1370*/0x41, 0xF0, 0x22, 0xE5, 0x3C, 0xC3, 0x95, 0x38,
+	0x40, 0x1D, 0x85, 0x3C, 0x38, 0xE5, 0x3C, 0x60,
+/*1380*/0x05, 0x85, 0x3D, 0x39, 0x80, 0x03, 0x85, 0x39,
+	0x39, 0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3C,
+/*1390*/0x12, 0x07, 0x53, 0xE5, 0x3D, 0xF0, 0x22, 0x12,
+	0x07, 0x9F, 0xE5, 0x38, 0x12, 0x07, 0x53, 0xE5,
+/*13A0*/0x39, 0xF0, 0x22, 0xE5, 0x0D, 0xFE, 0xE5, 0x08,
+	0x8E, 0x54, 0x44, 0x05, 0xF5, 0x55, 0x75, 0x15,
+/*13B0*/0x0F, 0xF5, 0x82, 0x12, 0x0E, 0x7A, 0x12, 0x17,
+	0xA3, 0x20, 0x31, 0x05, 0x75, 0x15, 0x03, 0x80,
+/*13C0*/0x03, 0x75, 0x15, 0x0B, 0xE5, 0x0A, 0xC3, 0x94,
+	0x01, 0x50, 0x38, 0x12, 0x14, 0x20, 0x20, 0x31,
+/*13D0*/0x06, 0x05, 0x15, 0x05, 0x15, 0x80, 0x04, 0x15,
+	0x15, 0x15, 0x15, 0xE5, 0x0A, 0xC3, 0x94, 0x01,
+/*13E0*/0x50, 0x21, 0x12, 0x14, 0x20, 0x20, 0x31, 0x04,
+	0x05, 0x15, 0x80, 0x02, 0x15, 0x15, 0xE5, 0x0A,
+/*13F0*/0xC3, 0x94, 0x01, 0x50, 0x0E, 0x12, 0x0E, 0x77,
+	0x12, 0x17, 0xA3, 0x20, 0x31, 0x05, 0x05, 0x15,
+/*1400*/0x12, 0x0E, 0x77, 0xE5, 0x15, 0xB4, 0x08, 0x04,
+	0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x15,
+/*1410*/0xB4, 0x07, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E,
+	0x00, 0xEE, 0x4F, 0x60, 0x02, 0x05, 0x7F, 0x22,
+/*1420*/0x85, 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15,
+	0xF0, 0x12, 0x17, 0xA3, 0x22, 0x12, 0x07, 0x2A,
+/*1430*/0x75, 0x83, 0xAE, 0x74, 0xFF, 0x12, 0x07, 0x29,
+	0xE0, 0x54, 0x1A, 0xF5, 0x34, 0xE0, 0xC4, 0x13,
+/*1440*/0x54, 0x07, 0xF5, 0x35, 0x24, 0xFE, 0x60, 0x24,
+	0x24, 0xFE, 0x60, 0x3C, 0x24, 0x04, 0x70, 0x63,
+/*1450*/0x75, 0x31, 0x2D, 0xE5, 0x08, 0xFD, 0x74, 0xB6,
+	0x12, 0x07, 0x92, 0x74, 0xBC, 0x90, 0x07, 0x22,
+/*1460*/0x12, 0x07, 0x95, 0x74, 0x90, 0x12, 0x07, 0xB3,
+	0x74, 0x92, 0x80, 0x3C, 0x75, 0x31, 0x3A, 0xE5,
+/*1470*/0x08, 0xFD, 0x74, 0xBA, 0x12, 0x07, 0x92, 0x74,
+	0xC0, 0x90, 0x07, 0x22, 0x12, 0x07, 0xB6, 0x74,
+/*1480*/0xC4, 0x12, 0x07, 0xB3, 0x74, 0xC8, 0x80, 0x20,
+	0x75, 0x31, 0x35, 0xE5, 0x08, 0xFD, 0x74, 0xB8,
+/*1490*/0x12, 0x07, 0x92, 0x74, 0xBE, 0xFF, 0xED, 0x44,
+	0x07, 0x90, 0x07, 0x22, 0xCF, 0xF0, 0xA3, 0xEF,
+/*14A0*/0xF0, 0x74, 0xC2, 0x12, 0x07, 0xB3, 0x74, 0xC6,
+	0xFF, 0xED, 0x44, 0x07, 0xA3, 0xCF, 0xF0, 0xA3,
+/*14B0*/0xEF, 0xF0, 0x22, 0x75, 0x34, 0x01, 0x22, 0x8E,
+	0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D, 0x5B, 0x8A,
+/*14C0*/0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01, 0xE4, 0xF5,
+	0x5F, 0x12, 0x1E, 0xA5, 0x85, 0x59, 0x5E, 0xD3,
+/*14D0*/0xE5, 0x5E, 0x95, 0x5B, 0xE5, 0x5A, 0x12, 0x07,
+	0x6B, 0x50, 0x57, 0xE5, 0x5D, 0x45, 0x5C, 0x70,
+/*14E0*/0x30, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x92, 0xE5,
+	0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE5,
+/*14F0*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8, 0xE5,
+	0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0x90, 0xE5,
+/*1500*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
+	0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x80,
+/*1510*/0x03, 0x12, 0x07, 0x32, 0xE5, 0x5E, 0xF0, 0xAF,
+	0x5F, 0x7E, 0x00, 0xAD, 0x5D, 0xAC, 0x5C, 0x12,
+/*1520*/0x04, 0x44, 0xAF, 0x5E, 0x7E, 0x00, 0xAD, 0x5D,
+	0xAC, 0x5C, 0x12, 0x0B, 0xD1, 0x05, 0x5E, 0x02,
+/*1530*/0x14, 0xCF, 0xAB, 0x5D, 0xAA, 0x5C, 0xAD, 0x5B,
+	0xAC, 0x5A, 0xAF, 0x59, 0xAE, 0x58, 0x02, 0x1B,
+/*1540*/0xFB, 0x8C, 0x5C, 0x8D, 0x5D, 0x8A, 0x5E, 0x8B,
+	0x5F, 0x75, 0x60, 0x01, 0xE4, 0xF5, 0x61, 0xF5,
+/*1550*/0x62, 0xF5, 0x63, 0x12, 0x1E, 0xA5, 0x8F, 0x60,
+	0xD3, 0xE5, 0x60, 0x95, 0x5D, 0xE5, 0x5C, 0x12,
+/*1560*/0x07, 0x6B, 0x50, 0x61, 0xE5, 0x5F, 0x45, 0x5E,
+	0x70, 0x27, 0x12, 0x07, 0x2A, 0x75, 0x83, 0xB6,
+/*1570*/0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xB8,
+	0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBA,
+/*1580*/0xE5, 0x60, 0xF0, 0xAF, 0x61, 0x7E, 0x00, 0xE5,
+	0x62, 0x12, 0x08, 0x7A, 0x12, 0x0A, 0xFF, 0x80,
+/*1590*/0x19, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE5,
+	0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0x8E, 0xE4,
+/*15A0*/0x12, 0x07, 0x29, 0x74, 0x01, 0x12, 0x07, 0x29,
+	0xE4, 0xF0, 0xAF, 0x63, 0x7E, 0x00, 0xAD, 0x5F,
+/*15B0*/0xAC, 0x5E, 0x12, 0x04, 0x44, 0xAF, 0x60, 0x7E,
+	0x00, 0xAD, 0x5F, 0xAC, 0x5E, 0x12, 0x12, 0x8B,
+/*15C0*/0x05, 0x60, 0x02, 0x15, 0x58, 0x22, 0x90, 0x11,
+	0x4D, 0xE4, 0x93, 0x90, 0x07, 0x2E, 0xF0, 0x12,
+/*15D0*/0x08, 0x1F, 0x75, 0x83, 0xAE, 0xE0, 0x54, 0x1A,
+	0xF5, 0x34, 0x70, 0x67, 0xEF, 0x44, 0x07, 0xF5,
+/*15E0*/0x82, 0x75, 0x83, 0xCE, 0xE0, 0xFF, 0x13, 0x13,
+	0x13, 0x54, 0x07, 0xF5, 0x36, 0x54, 0x0F, 0xD3,
+/*15F0*/0x94, 0x00, 0x40, 0x06, 0x12, 0x14, 0x2D, 0x12,
+	0x1B, 0xA9, 0xE5, 0x36, 0x54, 0x0F, 0x24, 0xFE,
+/*1600*/0x60, 0x0C, 0x14, 0x60, 0x0C, 0x14, 0x60, 0x19,
+	0x24, 0x03, 0x70, 0x37, 0x80, 0x10, 0x02, 0x1E,
+/*1610*/0x91, 0x12, 0x1E, 0x91, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0xCE, 0xE0, 0x54, 0xEF, 0xF0, 0x02, 0x1D,
+/*1620*/0xAE, 0x12, 0x10, 0x14, 0xE4, 0xF5, 0x55, 0x12,
+	0x1D, 0x85, 0x05, 0x55, 0xE5, 0x55, 0xC3, 0x94,
+/*1630*/0x05, 0x40, 0xF4, 0x12, 0x07, 0x2A, 0x75, 0x83,
+	0xCE, 0xE0, 0x54, 0xC7, 0x12, 0x07, 0x29, 0xE0,
+/*1640*/0x44, 0x08, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
+	0x59, 0xAF, 0x08, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+/*1650*/0x75, 0x83, 0xD0, 0xE0, 0xFD, 0xC4, 0x54, 0x0F,
+	0xF5, 0x5A, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0x75,
+/*1660*/0x83, 0x80, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x21,
+	0x75, 0x83, 0x82, 0xE5, 0x45, 0xF0, 0xEF, 0x44,
+/*1670*/0x07, 0xF5, 0x82, 0x75, 0x83, 0x8A, 0x74, 0xFF,
+	0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x07, 0x2A, 0x75,
+/*1680*/0x83, 0xBC, 0xE0, 0x54, 0xEF, 0x12, 0x07, 0x29,
+	0x75, 0x83, 0xBE, 0xE0, 0x54, 0xEF, 0x12, 0x07,
+/*1690*/0x29, 0x75, 0x83, 0xC0, 0xE0, 0x54, 0xEF, 0x12,
+	0x07, 0x29, 0x75, 0x83, 0xBC, 0xE0, 0x44, 0x10,
+/*16A0*/0x12, 0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x44,
+	0x10, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0,
+/*16B0*/0x44, 0x10, 0xF0, 0xAF, 0x58, 0xE5, 0x59, 0x12,
+	0x08, 0x78, 0x02, 0x0A, 0xFF, 0xE4, 0xF5, 0x58,
+/*16C0*/0x7D, 0x01, 0xF5, 0x59, 0xAF, 0x35, 0xFE, 0xFC,
+	0x12, 0x09, 0x15, 0x12, 0x07, 0x2A, 0x75, 0x83,
+/*16D0*/0xB6, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+	0xB8, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*16E0*/0xBA, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+	0xBC, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*16F0*/0xBE, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+	0xC0, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*1700*/0x90, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2,
+	0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4,
+/*1710*/0x12, 0x07, 0x29, 0x75, 0x83, 0x92, 0xE4, 0x12,
+	0x07, 0x29, 0x75, 0x83, 0xC6, 0xE4, 0x12, 0x07,
+/*1720*/0x29, 0x75, 0x83, 0xC8, 0xE4, 0xF0, 0xAF, 0x58,
+	0xFE, 0xE5, 0x59, 0x12, 0x08, 0x7A, 0x02, 0x0A,
+/*1730*/0xFF, 0xE5, 0xE2, 0x30, 0xE4, 0x6C, 0xE5, 0xE7,
+	0x54, 0xC0, 0x64, 0x40, 0x70, 0x64, 0xE5, 0x09,
+/*1740*/0xC4, 0x54, 0x30, 0xFE, 0xE5, 0x08, 0x25, 0xE0,
+	0x25, 0xE0, 0x54, 0xC0, 0x4E, 0xFE, 0xEF, 0x54,
+/*1750*/0x3F, 0x4E, 0xFD, 0xE5, 0x2B, 0xAE, 0x2A, 0x78,
+	0x02, 0xC3, 0x33, 0xCE, 0x33, 0xCE, 0xD8, 0xF9,
+/*1760*/0xF5, 0x82, 0x8E, 0x83, 0xED, 0xF0, 0xE5, 0x2B,
+	0xAE, 0x2A, 0x78, 0x02, 0xC3, 0x33, 0xCE, 0x33,
+/*1770*/0xCE, 0xD8, 0xF9, 0xFF, 0xF5, 0x82, 0x8E, 0x83,
+	0xA3, 0xE5, 0xFE, 0xF0, 0x8F, 0x82, 0x8E, 0x83,
+/*1780*/0xA3, 0xA3, 0xE5, 0xFD, 0xF0, 0x8F, 0x82, 0x8E,
+	0x83, 0xA3, 0xA3, 0xA3, 0xE5, 0xFC, 0xF0, 0xC3,
+/*1790*/0xE5, 0x2B, 0x94, 0xFA, 0xE5, 0x2A, 0x94, 0x00,
+	0x50, 0x08, 0x05, 0x2B, 0xE5, 0x2B, 0x70, 0x02,
+/*17A0*/0x05, 0x2A, 0x22, 0xE4, 0xFF, 0xE4, 0xF5, 0x58,
+	0xF5, 0x56, 0xF5, 0x57, 0x74, 0x82, 0xFC, 0x12,
+/*17B0*/0x0E, 0x04, 0x8C, 0x83, 0xE0, 0xF5, 0x10, 0x54,
+	0x7F, 0xF0, 0xE5, 0x10, 0x44, 0x80, 0x12, 0x0E,
+/*17C0*/0x98, 0xED, 0xF0, 0x7E, 0x0A, 0x12, 0x0E, 0x04,
+	0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0, 0x26, 0xDE,
+/*17D0*/0xF4, 0x05, 0x57, 0xE5, 0x57, 0x70, 0x02, 0x05,
+	0x56, 0xE5, 0x14, 0x24, 0x01, 0xFD, 0xE4, 0x33,
+/*17E0*/0xFC, 0xD3, 0xE5, 0x57, 0x9D, 0xE5, 0x56, 0x9C,
+	0x40, 0xD9, 0xE5, 0x0A, 0x94, 0x20, 0x50, 0x02,
+/*17F0*/0x05, 0x0A, 0x43, 0xE1, 0x08, 0xC2, 0x31, 0x12,
+	0x0E, 0x04, 0x75, 0x83, 0xA6, 0xE0, 0x55, 0x12,
+/*1800*/0x65, 0x12, 0x70, 0x03, 0xD2, 0x31, 0x22, 0xC2,
+	0x31, 0x22, 0x90, 0x07, 0x26, 0xE0, 0xFA, 0xA3,
+/*1810*/0xE0, 0xF5, 0x82, 0x8A, 0x83, 0xE0, 0xF5, 0x41,
+	0xE5, 0x39, 0xC3, 0x95, 0x41, 0x40, 0x26, 0xE5,
+/*1820*/0x39, 0x95, 0x41, 0xC3, 0x9F, 0xEE, 0x12, 0x07,
+	0x6B, 0x40, 0x04, 0x7C, 0x01, 0x80, 0x02, 0x7C,
+/*1830*/0x00, 0xE5, 0x41, 0x64, 0x3F, 0x60, 0x04, 0x7B,
+	0x01, 0x80, 0x02, 0x7B, 0x00, 0xEC, 0x5B, 0x60,
+/*1840*/0x29, 0x05, 0x41, 0x80, 0x28, 0xC3, 0xE5, 0x41,
+	0x95, 0x39, 0xC3, 0x9F, 0xEE, 0x12, 0x07, 0x6B,
+/*1850*/0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00,
+	0xE5, 0x41, 0x60, 0x04, 0x7E, 0x01, 0x80, 0x02,
+/*1860*/0x7E, 0x00, 0xEF, 0x5E, 0x60, 0x04, 0x15, 0x41,
+	0x80, 0x03, 0x85, 0x39, 0x41, 0x85, 0x3A, 0x40,
+/*1870*/0x22, 0xE5, 0xE2, 0x30, 0xE4, 0x60, 0xE5, 0xE1,
+	0x30, 0xE2, 0x5B, 0xE5, 0x09, 0x70, 0x04, 0x7F,
+/*1880*/0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x08, 0x70,
+	0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
+/*1890*/0x5F, 0x60, 0x43, 0x53, 0xF9, 0xF8, 0xE5, 0xE2,
+	0x30, 0xE4, 0x3B, 0xE5, 0xE1, 0x30, 0xE2, 0x2E,
+/*18A0*/0x43, 0xFA, 0x02, 0x53, 0xFA, 0xFB, 0xE4, 0xF5,
+	0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0xE5,
+/*18B0*/0xE1, 0x30, 0xE2, 0xE7, 0x90, 0x94, 0x70, 0xE0,
+	0x65, 0x10, 0x60, 0x03, 0x43, 0xFA, 0x04, 0x05,
+/*18C0*/0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0x70,
+	0xE6, 0x12, 0x00, 0x06, 0x80, 0xE1, 0x53, 0xFA,
+/*18D0*/0xFD, 0x53, 0xFA, 0xFB, 0x80, 0xC0, 0x22, 0x8F,
+	0x54, 0x12, 0x00, 0x06, 0xE5, 0xE1, 0x30, 0xE0,
+/*18E0*/0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5,
+	0x7E, 0xD3, 0x94, 0x05, 0x40, 0x04, 0x7E, 0x01,
+/*18F0*/0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60, 0x3D,
+	0x85, 0x54, 0x11, 0xE5, 0xE2, 0x20, 0xE1, 0x32,
+/*1900*/0x74, 0xCE, 0x12, 0x1A, 0x05, 0x30, 0xE7, 0x04,
+	0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0x8F, 0x82,
+/*1910*/0x8E, 0x83, 0xE0, 0x30, 0xE6, 0x04, 0x7F, 0x01,
+	0x80, 0x02, 0x7F, 0x00, 0xEF, 0x5D, 0x70, 0x15,
+/*1920*/0x12, 0x15, 0xC6, 0x74, 0xCE, 0x12, 0x1A, 0x05,
+	0x30, 0xE6, 0x07, 0xE0, 0x44, 0x80, 0xF0, 0x43,
+/*1930*/0xF9, 0x80, 0x12, 0x18, 0x71, 0x22, 0x12, 0x0E,
+	0x44, 0xE5, 0x16, 0x25, 0xE0, 0x25, 0xE0, 0x24,
+/*1940*/0xB0, 0xF5, 0x82, 0xE4, 0x34, 0x1A, 0xF5, 0x83,
+	0xE4, 0x93, 0xF5, 0x0F, 0xE5, 0x16, 0x25, 0xE0,
+/*1950*/0x25, 0xE0, 0x24, 0xB1, 0xF5, 0x82, 0xE4, 0x34,
+	0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0E, 0x12,
+/*1960*/0x0E, 0x65, 0xF5, 0x10, 0xE5, 0x0F, 0x54, 0xF0,
+	0x12, 0x0E, 0x17, 0x75, 0x83, 0x8C, 0xEF, 0xF0,
+/*1970*/0xE5, 0x0F, 0x30, 0xE0, 0x0C, 0x12, 0x0E, 0x04,
+	0x75, 0x83, 0x86, 0xE0, 0x44, 0x40, 0xF0, 0x80,
+/*1980*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x86, 0xE0,
+	0x54, 0xBF, 0xF0, 0x12, 0x0E, 0x91, 0x75, 0x83,
+/*1990*/0x82, 0xE5, 0x0E, 0xF0, 0x22, 0x7F, 0x05, 0x12,
+	0x17, 0x31, 0x12, 0x0E, 0x04, 0x12, 0x0E, 0x33,
+/*19A0*/0x74, 0x02, 0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E,
+	0x04, 0x12, 0x0E, 0x0B, 0xEF, 0xF0, 0x75, 0x15,
+/*19B0*/0x70, 0x12, 0x0F, 0xF7, 0x20, 0x34, 0x05, 0x75,
+	0x15, 0x10, 0x80, 0x03, 0x75, 0x15, 0x50, 0x12,
+/*19C0*/0x0F, 0xF7, 0x20, 0x34, 0x04, 0x74, 0x10, 0x80,
+	0x02, 0x74, 0xF0, 0x25, 0x15, 0xF5, 0x15, 0x12,
+/*19D0*/0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x20,
+	0x34, 0x17, 0xE5, 0x15, 0x64, 0x30, 0x60, 0x0C,
+/*19E0*/0x74, 0x10, 0x25, 0x15, 0xF5, 0x15, 0xB4, 0x80,
+	0x03, 0xE4, 0xF5, 0x15, 0x12, 0x0E, 0x21, 0xEF,
+/*19F0*/0xF0, 0x22, 0xF0, 0xE5, 0x0B, 0x25, 0xE0, 0x25,
+	0xE0, 0x24, 0x82, 0xF5, 0x82, 0xE4, 0x34, 0x07,
+/*1A00*/0xF5, 0x83, 0x22, 0x74, 0x88, 0xFE, 0xE5, 0x08,
+	0x44, 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0,
+/*1A10*/0x22, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
+	0x22, 0xF0, 0xE0, 0x54, 0xC0, 0x8F, 0x82, 0x8E,
+/*1A20*/0x83, 0xF0, 0x22, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+	0x75, 0x83, 0x86, 0xE0, 0x54, 0x10, 0xD3, 0x94,
+/*1A30*/0x00, 0x22, 0xF0, 0x90, 0x07, 0x15, 0xE0, 0x04,
+	0xF0, 0x22, 0x44, 0x06, 0xF5, 0x82, 0x75, 0x83,
+/*1A40*/0x9E, 0xE0, 0x22, 0xFE, 0xEF, 0x44, 0x07, 0xF5,
+	0x82, 0x8E, 0x83, 0xE0, 0x22, 0xE4, 0x90, 0x07,
+/*1A50*/0x2A, 0xF0, 0xA3, 0xF0, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0x82, 0xE0, 0x54, 0x7F, 0x12, 0x07, 0x29,
+/*1A60*/0xE0, 0x44, 0x80, 0xF0, 0x12, 0x10, 0xFC, 0x12,
+	0x08, 0x1F, 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0,
+/*1A70*/0x1A, 0x90, 0x07, 0x2B, 0xE0, 0x04, 0xF0, 0x70,
+	0x06, 0x90, 0x07, 0x2A, 0xE0, 0x04, 0xF0, 0x90,
+/*1A80*/0x07, 0x2A, 0xE0, 0xB4, 0x10, 0xE1, 0xA3, 0xE0,
+	0xB4, 0x00, 0xDC, 0xEE, 0x44, 0xA6, 0xFC, 0xEF,
+/*1A90*/0x44, 0x07, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0xF5,
+	0x32, 0xEE, 0x44, 0xA8, 0xFE, 0xEF, 0x44, 0x07,
+/*1AA0*/0xF5, 0x82, 0x8E, 0x83, 0xE0, 0xF5, 0x33, 0x22,
+	0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x90,
+/*1AB0*/0x00, 0x20, 0x0F, 0x92, 0x00, 0x21, 0x0F, 0x94,
+	0x00, 0x22, 0x0F, 0x96, 0x00, 0x23, 0x0F, 0x98,
+/*1AC0*/0x00, 0x24, 0x0F, 0x9A, 0x00, 0x25, 0x0F, 0x9C,
+	0x00, 0x26, 0x0F, 0x9E, 0x00, 0x27, 0x0F, 0xA0,
+/*1AD0*/0x01, 0x20, 0x01, 0xA2, 0x01, 0x21, 0x01, 0xA4,
+	0x01, 0x22, 0x01, 0xA6, 0x01, 0x23, 0x01, 0xA8,
+/*1AE0*/0x01, 0x24, 0x01, 0xAA, 0x01, 0x25, 0x01, 0xAC,
+	0x01, 0x26, 0x01, 0xAE, 0x01, 0x27, 0x01, 0xB0,
+/*1AF0*/0x01, 0x28, 0x01, 0xB4, 0x00, 0x28, 0x0F, 0xB6,
+	0x40, 0x28, 0x0F, 0xB8, 0x61, 0x28, 0x01, 0xCB,
+/*1B00*/0xEF, 0xCB, 0xCA, 0xEE, 0xCA, 0x7F, 0x01, 0xE4,
+	0xFD, 0xEB, 0x4A, 0x70, 0x24, 0xE5, 0x08, 0xF5,
+/*1B10*/0x82, 0x74, 0xB6, 0x12, 0x08, 0x29, 0xE5, 0x08,
+	0xF5, 0x82, 0x74, 0xB8, 0x12, 0x08, 0x29, 0xE5,
+/*1B20*/0x08, 0xF5, 0x82, 0x74, 0xBA, 0x12, 0x08, 0x29,
+	0x7E, 0x00, 0x7C, 0x00, 0x12, 0x0A, 0xFF, 0x80,
+/*1B30*/0x12, 0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE5,
+	0x41, 0xF0, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35,
+/*1B40*/0xE5, 0x40, 0xF0, 0x12, 0x07, 0x2A, 0x75, 0x83,
+	0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74, 0x01, 0x12,
+/*1B50*/0x07, 0x29, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x26,
+	0xF5, 0x27, 0x53, 0xE1, 0xFE, 0xF5, 0x2A, 0x75,
+/*1B60*/0x2B, 0x01, 0xF5, 0x08, 0x7F, 0x01, 0x12, 0x17,
+	0x31, 0x30, 0x30, 0x1C, 0x90, 0x1A, 0xA9, 0xE4,
+/*1B70*/0x93, 0xF5, 0x10, 0x90, 0x1F, 0xF9, 0xE4, 0x93,
+	0xF5, 0x10, 0x90, 0x00, 0x41, 0xE4, 0x93, 0xF5,
+/*1B80*/0x10, 0x90, 0x1E, 0xCA, 0xE4, 0x93, 0xF5, 0x10,
+	0x7F, 0x02, 0x12, 0x17, 0x31, 0x12, 0x0F, 0x54,
+/*1B90*/0x7F, 0x03, 0x12, 0x17, 0x31, 0x12, 0x00, 0x06,
+	0xE5, 0xE2, 0x30, 0xE7, 0x09, 0x12, 0x10, 0x00,
+/*1BA0*/0x30, 0x30, 0x03, 0x12, 0x11, 0x00, 0x02, 0x00,
+	0x47, 0x12, 0x08, 0x1F, 0x75, 0x83, 0xD0, 0xE0,
+/*1BB0*/0xC4, 0x54, 0x0F, 0xFD, 0x75, 0x43, 0x01, 0x75,
+	0x44, 0xFF, 0x12, 0x08, 0xAA, 0x74, 0x04, 0xF0,
+/*1BC0*/0x75, 0x3B, 0x01, 0xED, 0x14, 0x60, 0x0C, 0x14,
+	0x60, 0x0B, 0x14, 0x60, 0x0F, 0x24, 0x03, 0x70,
+/*1BD0*/0x0B, 0x80, 0x09, 0x80, 0x00, 0x12, 0x08, 0xA7,
+	0x04, 0xF0, 0x80, 0x06, 0x12, 0x08, 0xA7, 0x74,
+/*1BE0*/0x04, 0xF0, 0xEE, 0x44, 0x82, 0xFE, 0xEF, 0x44,
+	0x07, 0xF5, 0x82, 0x8E, 0x83, 0xE5, 0x45, 0x12,
+/*1BF0*/0x08, 0xBE, 0x75, 0x83, 0x82, 0xE5, 0x31, 0xF0,
+	0x02, 0x11, 0x4C, 0x8E, 0x60, 0x8F, 0x61, 0x12,
+/*1C00*/0x1E, 0xA5, 0xE4, 0xFF, 0xCE, 0xED, 0xCE, 0xEE,
+	0xD3, 0x95, 0x61, 0xE5, 0x60, 0x12, 0x07, 0x6B,
+/*1C10*/0x40, 0x39, 0x74, 0x20, 0x2E, 0xF5, 0x82, 0xE4,
+	0x34, 0x03, 0xF5, 0x83, 0xE0, 0x70, 0x03, 0xFF,
+/*1C20*/0x80, 0x26, 0x12, 0x08, 0xE2, 0xFD, 0xC3, 0x9F,
+	0x40, 0x1E, 0xCF, 0xED, 0xCF, 0xEB, 0x4A, 0x70,
+/*1C30*/0x0B, 0x8D, 0x42, 0x12, 0x08, 0xEE, 0xF5, 0x41,
+	0x8E, 0x40, 0x80, 0x0C, 0x12, 0x08, 0xE2, 0xF5,
+/*1C40*/0x38, 0x12, 0x08, 0xEE, 0xF5, 0x39, 0x8E, 0x3A,
+	0x1E, 0x80, 0xBC, 0x22, 0x75, 0x58, 0x01, 0xE5,
+/*1C50*/0x35, 0x70, 0x0C, 0x12, 0x07, 0xCC, 0xE0, 0xF5,
+	0x4A, 0x12, 0x07, 0xD8, 0xE0, 0xF5, 0x4C, 0xE5,
+/*1C60*/0x35, 0xB4, 0x04, 0x0C, 0x12, 0x07, 0xE4, 0xE0,
+	0xF5, 0x4A, 0x12, 0x07, 0xF0, 0xE0, 0xF5, 0x4C,
+/*1C70*/0xE5, 0x35, 0xB4, 0x01, 0x04, 0x7F, 0x01, 0x80,
+	0x02, 0x7F, 0x00, 0xE5, 0x35, 0xB4, 0x02, 0x04,
+/*1C80*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F,
+	0x60, 0x0C, 0x12, 0x07, 0xFC, 0xE0, 0xF5, 0x4A,
+/*1C90*/0x12, 0x08, 0x08, 0xE0, 0xF5, 0x4C, 0x85, 0x41,
+	0x49, 0x85, 0x40, 0x4B, 0x22, 0x75, 0x5B, 0x01,
+/*1CA0*/0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE0, 0x54,
+	0x1F, 0xFF, 0xD3, 0x94, 0x02, 0x50, 0x04, 0x8F,
+/*1CB0*/0x58, 0x80, 0x05, 0xEF, 0x24, 0xFE, 0xF5, 0x58,
+	0xEF, 0xC3, 0x94, 0x18, 0x40, 0x05, 0x75, 0x59,
+/*1CC0*/0x18, 0x80, 0x04, 0xEF, 0x04, 0xF5, 0x59, 0x85,
+	0x43, 0x5A, 0xAF, 0x58, 0x7E, 0x00, 0xAD, 0x59,
+/*1CD0*/0x7C, 0x00, 0xAB, 0x5B, 0x7A, 0x00, 0x12, 0x15,
+	0x41, 0xAF, 0x5A, 0x7E, 0x00, 0x12, 0x18, 0x0A,
+/*1CE0*/0xAF, 0x5B, 0x7E, 0x00, 0x02, 0x1A, 0xFF, 0xE5,
+	0xE2, 0x30, 0xE7, 0x0E, 0x12, 0x10, 0x03, 0xC2,
+/*1CF0*/0x30, 0x30, 0x30, 0x03, 0x12, 0x10, 0xFF, 0x20,
+	0x33, 0x28, 0xE5, 0xE7, 0x30, 0xE7, 0x05, 0x12,
+/*1D00*/0x0E, 0xA2, 0x80, 0x0D, 0xE5, 0xFE, 0xC3, 0x94,
+	0x20, 0x50, 0x06, 0x12, 0x0E, 0xA2, 0x43, 0xF9,
+/*1D10*/0x08, 0xE5, 0xF2, 0x30, 0xE7, 0x03, 0x53, 0xF9,
+	0x7F, 0xE5, 0xF1, 0x54, 0x70, 0xD3, 0x94, 0x00,
+/*1D20*/0x50, 0xD8, 0x22, 0x12, 0x0E, 0x04, 0x75, 0x83,
+	0x80, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0x12,
+/*1D30*/0x0D, 0xFD, 0x75, 0x83, 0x84, 0x12, 0x0E, 0x02,
+	0x75, 0x83, 0x86, 0x12, 0x0E, 0x02, 0x75, 0x83,
+/*1D40*/0x8C, 0xE0, 0x54, 0xF3, 0x12, 0x0E, 0x03, 0x75,
+	0x83, 0x8E, 0x12, 0x0E, 0x02, 0x75, 0x83, 0x94,
+/*1D50*/0xE0, 0x54, 0xFB, 0xF0, 0x22, 0x12, 0x07, 0x2A,
+	0x75, 0x83, 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74,
+/*1D60*/0x01, 0x12, 0x07, 0x29, 0xE4, 0x12, 0x08, 0xBE,
+	0x75, 0x83, 0x8C, 0xE0, 0x44, 0x20, 0x12, 0x08,
+/*1D70*/0xBE, 0xE0, 0x54, 0xDF, 0xF0, 0x74, 0x84, 0x85,
+	0x08, 0x82, 0xF5, 0x83, 0xE0, 0x54, 0x7F, 0xF0,
+/*1D80*/0xE0, 0x44, 0x80, 0xF0, 0x22, 0x75, 0x56, 0x01,
+	0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE, 0xFC,
+/*1D90*/0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12, 0x1E,
+	0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E, 0x00,
+/*1DA0*/0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
+	0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0x75, 0x56,
+/*1DB0*/0x01, 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE,
+	0xFC, 0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12,
+/*1DC0*/0x1E, 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E,
+	0x00, 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44,
+/*1DD0*/0xAF, 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xE4,
+	0xF5, 0x16, 0x12, 0x0E, 0x44, 0xFE, 0xE5, 0x08,
+/*1DE0*/0x44, 0x05, 0xFF, 0x12, 0x0E, 0x65, 0x8F, 0x82,
+	0x8E, 0x83, 0xF0, 0x05, 0x16, 0xE5, 0x16, 0xC3,
+/*1DF0*/0x94, 0x14, 0x40, 0xE6, 0xE5, 0x08, 0x12, 0x0E,
+	0x2B, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
+/*1E00*/0x59, 0xF5, 0x5A, 0xFF, 0xFE, 0xAD, 0x58, 0xFC,
+	0x12, 0x09, 0x15, 0x7F, 0x04, 0x7E, 0x00, 0xAD,
+/*1E10*/0x58, 0x7C, 0x00, 0x12, 0x09, 0x15, 0x7F, 0x02,
+	0x7E, 0x00, 0xAD, 0x58, 0x7C, 0x00, 0x02, 0x09,
+/*1E20*/0x15, 0xE5, 0x3C, 0x25, 0x3E, 0xFC, 0xE5, 0x42,
+	0x24, 0x00, 0xFB, 0xE4, 0x33, 0xFA, 0xEC, 0xC3,
+/*1E30*/0x9B, 0xEA, 0x12, 0x07, 0x6B, 0x40, 0x0B, 0x8C,
+	0x42, 0xE5, 0x3D, 0x25, 0x3F, 0xF5, 0x41, 0x8F,
+/*1E40*/0x40, 0x22, 0x12, 0x09, 0x0B, 0x22, 0x74, 0x84,
+	0xF5, 0x18, 0x85, 0x08, 0x19, 0x85, 0x19, 0x82,
+/*1E50*/0x85, 0x18, 0x83, 0xE0, 0x54, 0x7F, 0xF0, 0xE0,
+	0x44, 0x80, 0xF0, 0xE0, 0x44, 0x80, 0xF0, 0x22,
+/*1E60*/0xEF, 0x4E, 0x70, 0x0B, 0x12, 0x07, 0x2A, 0x75,
+	0x83, 0xD2, 0xE0, 0x54, 0xDF, 0xF0, 0x22, 0x12,
+/*1E70*/0x07, 0x2A, 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x20,
+	0xF0, 0x22, 0x75, 0x58, 0x01, 0x90, 0x07, 0x26,
+/*1E80*/0x12, 0x07, 0x35, 0xE0, 0x54, 0x3F, 0xF5, 0x41,
+	0x12, 0x07, 0x32, 0xE0, 0x54, 0x3F, 0xF5, 0x40,
+/*1E90*/0x22, 0x75, 0x56, 0x02, 0xE4, 0xF5, 0x57, 0x12,
+	0x1D, 0xFC, 0xAF, 0x57, 0x7E, 0x00, 0xAD, 0x56,
+/*1EA0*/0x7C, 0x00, 0x02, 0x04, 0x44, 0xE4, 0xF5, 0x42,
+	0xF5, 0x41, 0xF5, 0x40, 0xF5, 0x38, 0xF5, 0x39,
+/*1EB0*/0xF5, 0x3A, 0x22, 0xEF, 0x54, 0x07, 0xFF, 0xE5,
+	0xF9, 0x54, 0xF8, 0x4F, 0xF5, 0xF9, 0x22, 0x7F,
+/*1EC0*/0x01, 0xE4, 0xFE, 0x0F, 0x0E, 0xBE, 0xFF, 0xFB,
+	0x22, 0x01, 0x20, 0x00, 0x01, 0x04, 0x20, 0x00,
+/*1ED0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1EE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1EF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F00*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F10*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F20*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F30*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F40*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F50*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F60*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F70*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F80*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F90*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FA0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FB0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FC0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FD0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x81
+};
+
+int qib_sd7220_ib_load(struct qib_devdata *dd)
+{
+	return qib_sd7220_prog_ld(dd, IB_7220_SERDES, qib_sd7220_ib_img,
+		sizeof(qib_sd7220_ib_img), 0);
+}
+
+int qib_sd7220_ib_vfy(struct qib_devdata *dd)
+{
+	return qib_sd7220_prog_vfy(dd, IB_7220_SERDES, qib_sd7220_ib_img,
+		sizeof(qib_sd7220_ib_img), 0);
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 35/52] IB/qib: Add qib_sdma.c
From: Ralph Campbell @ 2010-05-07  0:01 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_sdma.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_sdma.c |  973 ++++++++++++++++++++++++++++++++++
 1 files changed, 973 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_sdma.c

diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
new file mode 100644
index 0000000..b845688
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -0,0 +1,973 @@
+/*
+ * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+
+#include "qib.h"
+#include "qib_common.h"
+
+/* default pio off, sdma on */
+static ushort sdma_descq_cnt = 256;
+module_param_named(sdma_descq_cnt, sdma_descq_cnt, ushort, S_IRUGO);
+MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
+
+/*
+ * Bits defined in the send DMA descriptor.
+ */
+#define SDMA_DESC_LAST          (1ULL << 11)
+#define SDMA_DESC_FIRST         (1ULL << 12)
+#define SDMA_DESC_DMA_HEAD      (1ULL << 13)
+#define SDMA_DESC_USE_LARGE_BUF (1ULL << 14)
+#define SDMA_DESC_INTR          (1ULL << 15)
+#define SDMA_DESC_COUNT_LSB     16
+#define SDMA_DESC_GEN_LSB       30
+
+char *qib_sdma_state_names[] = {
+	[qib_sdma_state_s00_hw_down]          = "s00_HwDown",
+	[qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait",
+	[qib_sdma_state_s20_idle]             = "s20_Idle",
+	[qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
+	[qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
+	[qib_sdma_state_s50_hw_halt_wait]     = "s50_HwHaltWait",
+	[qib_sdma_state_s99_running]          = "s99_Running",
+};
+
+char *qib_sdma_event_names[] = {
+	[qib_sdma_event_e00_go_hw_down]   = "e00_GoHwDown",
+	[qib_sdma_event_e10_go_hw_start]  = "e10_GoHwStart",
+	[qib_sdma_event_e20_hw_started]   = "e20_HwStarted",
+	[qib_sdma_event_e30_go_running]   = "e30_GoRunning",
+	[qib_sdma_event_e40_sw_cleaned]   = "e40_SwCleaned",
+	[qib_sdma_event_e50_hw_cleaned]   = "e50_HwCleaned",
+	[qib_sdma_event_e60_hw_halted]    = "e60_HwHalted",
+	[qib_sdma_event_e70_go_idle]      = "e70_GoIdle",
+	[qib_sdma_event_e7220_err_halted] = "e7220_ErrHalted",
+	[qib_sdma_event_e7322_err_halted] = "e7322_ErrHalted",
+	[qib_sdma_event_e90_timer_tick]   = "e90_TimerTick",
+};
+
+/* declare all statics here rather than keep sorting */
+static int alloc_sdma(struct qib_pportdata *);
+static void sdma_complete(struct kref *);
+static void sdma_finalput(struct qib_sdma_state *);
+static void sdma_get(struct qib_sdma_state *);
+static void sdma_put(struct qib_sdma_state *);
+static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states);
+static void sdma_start_sw_clean_up(struct qib_pportdata *);
+static void sdma_sw_clean_up_task(unsigned long);
+static void unmap_desc(struct qib_pportdata *, unsigned);
+
+static void sdma_get(struct qib_sdma_state *ss)
+{
+	kref_get(&ss->kref);
+}
+
+static void sdma_complete(struct kref *kref)
+{
+	struct qib_sdma_state *ss =
+		container_of(kref, struct qib_sdma_state, kref);
+
+	complete(&ss->comp);
+}
+
+static void sdma_put(struct qib_sdma_state *ss)
+{
+	kref_put(&ss->kref, sdma_complete);
+}
+
+static void sdma_finalput(struct qib_sdma_state *ss)
+{
+	sdma_put(ss);
+	wait_for_completion(&ss->comp);
+}
+
+/*
+ * Complete all the sdma requests on the active list, in the correct
+ * order, and with appropriate processing.   Called when cleaning up
+ * after sdma shutdown, and when new sdma requests are submitted for
+ * a link that is down.   This matches what is done for requests
+ * that complete normally, it's just the full list.
+ *
+ * Must be called with sdma_lock held
+ */
+static void clear_sdma_activelist(struct qib_pportdata *ppd)
+{
+	struct qib_sdma_txreq *txp, *txp_next;
+
+	list_for_each_entry_safe(txp, txp_next, &ppd->sdma_activelist, list) {
+		list_del_init(&txp->list);
+		if (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) {
+			unsigned idx;
+
+			idx = txp->start_idx;
+			while (idx != txp->next_descq_idx) {
+				unmap_desc(ppd, idx);
+				if (++idx == ppd->sdma_descq_cnt)
+					idx = 0;
+			}
+		}
+		if (txp->callback)
+			(*txp->callback)(txp, QIB_SDMA_TXREQ_S_ABORTED);
+	}
+}
+
+static void sdma_sw_clean_up_task(unsigned long opaque)
+{
+	struct qib_pportdata *ppd = (struct qib_pportdata *) opaque;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+	/*
+	 * At this point, the following should always be true:
+	 * - We are halted, so no more descriptors are getting retired.
+	 * - We are not running, so no one is submitting new work.
+	 * - Only we can send the e40_sw_cleaned, so we can't start
+	 *   running again until we say so.  So, the active list and
+	 *   descq are ours to play with.
+	 */
+
+	/* Process all retired requests. */
+	qib_sdma_make_progress(ppd);
+
+	clear_sdma_activelist(ppd);
+
+	/*
+	 * Resync count of added and removed.  It is VERY important that
+	 * sdma_descq_removed NEVER decrement - user_sdma depends on it.
+	 */
+	ppd->sdma_descq_removed = ppd->sdma_descq_added;
+
+	/*
+	 * Reset our notion of head and tail.
+	 * Note that the HW registers will be reset when switching states
+	 * due to calling __qib_sdma_process_event() below.
+	 */
+	ppd->sdma_descq_tail = 0;
+	ppd->sdma_descq_head = 0;
+	ppd->sdma_head_dma[0] = 0;
+	ppd->sdma_generation = 0;
+
+	__qib_sdma_process_event(ppd, qib_sdma_event_e40_sw_cleaned);
+
+	spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+/*
+ * This is called when changing to state qib_sdma_state_s10_hw_start_up_wait
+ * as a result of send buffer errors or send DMA descriptor errors.
+ * We want to disarm the buffers in these cases.
+ */
+static void sdma_hw_start_up(struct qib_pportdata *ppd)
+{
+	struct qib_sdma_state *ss = &ppd->sdma_state;
+	unsigned bufno;
+
+	for (bufno = ss->first_sendbuf; bufno < ss->last_sendbuf; ++bufno)
+		ppd->dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_BUF(bufno));
+
+	ppd->dd->f_sdma_hw_start_up(ppd);
+}
+
+static void sdma_sw_tear_down(struct qib_pportdata *ppd)
+{
+	struct qib_sdma_state *ss = &ppd->sdma_state;
+
+	/* Releasing this reference means the state machine has stopped. */
+	sdma_put(ss);
+}
+
+static void sdma_start_sw_clean_up(struct qib_pportdata *ppd)
+{
+	tasklet_hi_schedule(&ppd->sdma_sw_clean_up_task);
+}
+
+static void sdma_set_state(struct qib_pportdata *ppd,
+	enum qib_sdma_states next_state)
+{
+	struct qib_sdma_state *ss = &ppd->sdma_state;
+	struct sdma_set_state_action *action = ss->set_state_action;
+	unsigned op = 0;
+
+	/* debugging bookkeeping */
+	ss->previous_state = ss->current_state;
+	ss->previous_op = ss->current_op;
+
+	ss->current_state = next_state;
+
+	if (action[next_state].op_enable)
+		op |= QIB_SDMA_SENDCTRL_OP_ENABLE;
+
+	if (action[next_state].op_intenable)
+		op |= QIB_SDMA_SENDCTRL_OP_INTENABLE;
+
+	if (action[next_state].op_halt)
+		op |= QIB_SDMA_SENDCTRL_OP_HALT;
+
+	if (action[next_state].op_drain)
+		op |= QIB_SDMA_SENDCTRL_OP_DRAIN;
+
+	if (action[next_state].go_s99_running_tofalse)
+		ss->go_s99_running = 0;
+
+	if (action[next_state].go_s99_running_totrue)
+		ss->go_s99_running = 1;
+
+	ss->current_op = op;
+
+	ppd->dd->f_sdma_sendctrl(ppd, ss->current_op);
+}
+
+static void unmap_desc(struct qib_pportdata *ppd, unsigned head)
+{
+	__le64 *descqp = &ppd->sdma_descq[head].qw[0];
+	u64 desc[2];
+	dma_addr_t addr;
+	size_t len;
+
+	desc[0] = le64_to_cpu(descqp[0]);
+	desc[1] = le64_to_cpu(descqp[1]);
+
+	addr = (desc[1] << 32) | (desc[0] >> 32);
+	len = (desc[0] >> 14) & (0x7ffULL << 2);
+	dma_unmap_single(&ppd->dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
+}
+
+static int alloc_sdma(struct qib_pportdata *ppd)
+{
+	ppd->sdma_descq_cnt = sdma_descq_cnt;
+	if (!ppd->sdma_descq_cnt)
+		ppd->sdma_descq_cnt = 256;
+
+	/* Allocate memory for SendDMA descriptor FIFO */
+	ppd->sdma_descq = dma_alloc_coherent(&ppd->dd->pcidev->dev,
+		ppd->sdma_descq_cnt * sizeof(u64[2]), &ppd->sdma_descq_phys,
+		GFP_KERNEL);
+
+	if (!ppd->sdma_descq) {
+		qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor "
+			    "FIFO memory\n");
+		goto bail;
+	}
+
+	/* Allocate memory for DMA of head register to memory */
+	ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev,
+		PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL);
+	if (!ppd->sdma_head_dma) {
+		qib_dev_err(ppd->dd, "failed to allocate SendDMA "
+			    "head memory\n");
+		goto cleanup_descq;
+	}
+	ppd->sdma_head_dma[0] = 0;
+	return 0;
+
+cleanup_descq:
+	dma_free_coherent(&ppd->dd->pcidev->dev,
+		ppd->sdma_descq_cnt * sizeof(u64[2]), (void *)ppd->sdma_descq,
+		ppd->sdma_descq_phys);
+	ppd->sdma_descq = NULL;
+	ppd->sdma_descq_phys = 0;
+bail:
+	ppd->sdma_descq_cnt = 0;
+	return -ENOMEM;
+}
+
+static void free_sdma(struct qib_pportdata *ppd)
+{
+	struct qib_devdata *dd = ppd->dd;
+
+	if (ppd->sdma_head_dma) {
+		dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+				  (void *)ppd->sdma_head_dma,
+				  ppd->sdma_head_phys);
+		ppd->sdma_head_dma = NULL;
+		ppd->sdma_head_phys = 0;
+	}
+
+	if (ppd->sdma_descq) {
+		dma_free_coherent(&dd->pcidev->dev,
+				  ppd->sdma_descq_cnt * sizeof(u64[2]),
+				  ppd->sdma_descq, ppd->sdma_descq_phys);
+		ppd->sdma_descq = NULL;
+		ppd->sdma_descq_phys = 0;
+	}
+}
+
+static inline void make_sdma_desc(struct qib_pportdata *ppd,
+				  u64 *sdmadesc, u64 addr, u64 dwlen,
+				  u64 dwoffset)
+{
+
+	WARN_ON(addr & 3);
+	/* SDmaPhyAddr[47:32] */
+	sdmadesc[1] = addr >> 32;
+	/* SDmaPhyAddr[31:0] */
+	sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
+	/* SDmaGeneration[1:0] */
+	sdmadesc[0] |= (ppd->sdma_generation & 3ULL) <<
+		SDMA_DESC_GEN_LSB;
+	/* SDmaDwordCount[10:0] */
+	sdmadesc[0] |= (dwlen & 0x7ffULL) << SDMA_DESC_COUNT_LSB;
+	/* SDmaBufOffset[12:2] */
+	sdmadesc[0] |= dwoffset & 0x7ffULL;
+}
+
+/* sdma_lock must be held */
+int qib_sdma_make_progress(struct qib_pportdata *ppd)
+{
+	struct list_head *lp = NULL;
+	struct qib_sdma_txreq *txp = NULL;
+	struct qib_devdata *dd = ppd->dd;
+	int progress = 0;
+	u16 hwhead;
+	u16 idx = 0;
+
+	hwhead = dd->f_sdma_gethead(ppd);
+
+	/* The reason for some of the complexity of this code is that
+	 * not all descriptors have corresponding txps.  So, we have to
+	 * be able to skip over descs until we wander into the range of
+	 * the next txp on the list.
+	 */
+
+	if (!list_empty(&ppd->sdma_activelist)) {
+		lp = ppd->sdma_activelist.next;
+		txp = list_entry(lp, struct qib_sdma_txreq, list);
+		idx = txp->start_idx;
+	}
+
+	while (ppd->sdma_descq_head != hwhead) {
+		/* if desc is part of this txp, unmap if needed */
+		if (txp && (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) &&
+		    (idx == ppd->sdma_descq_head)) {
+			unmap_desc(ppd, ppd->sdma_descq_head);
+			if (++idx == ppd->sdma_descq_cnt)
+				idx = 0;
+		}
+
+		/* increment dequed desc count */
+		ppd->sdma_descq_removed++;
+
+		/* advance head, wrap if needed */
+		if (++ppd->sdma_descq_head == ppd->sdma_descq_cnt)
+			ppd->sdma_descq_head = 0;
+
+		/* if now past this txp's descs, do the callback */
+		if (txp && txp->next_descq_idx == ppd->sdma_descq_head) {
+			/* remove from active list */
+			list_del_init(&txp->list);
+			if (txp->callback)
+				(*txp->callback)(txp, QIB_SDMA_TXREQ_S_OK);
+			/* see if there is another txp */
+			if (list_empty(&ppd->sdma_activelist))
+				txp = NULL;
+			else {
+				lp = ppd->sdma_activelist.next;
+				txp = list_entry(lp, struct qib_sdma_txreq,
+					list);
+				idx = txp->start_idx;
+			}
+		}
+		progress = 1;
+	}
+	if (progress)
+		qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd));
+	return progress;
+}
+
+/*
+ * This is called from interrupt context.
+ */
+void qib_sdma_intr(struct qib_pportdata *ppd)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+	__qib_sdma_intr(ppd);
+
+	spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+void __qib_sdma_intr(struct qib_pportdata *ppd)
+{
+	if (__qib_sdma_running(ppd))
+		qib_sdma_make_progress(ppd);
+}
+
+int qib_setup_sdma(struct qib_pportdata *ppd)
+{
+	struct qib_devdata *dd = ppd->dd;
+	unsigned long flags;
+	int ret = 0;
+
+	ret = alloc_sdma(ppd);
+	if (ret)
+		goto bail;
+
+	/* set consistent sdma state */
+	ppd->dd->f_sdma_init_early(ppd);
+	spin_lock_irqsave(&ppd->sdma_lock, flags);
+	sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+	spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+	/* set up reference counting */
+	kref_init(&ppd->sdma_state.kref);
+	init_completion(&ppd->sdma_state.comp);
+
+	ppd->sdma_generation = 0;
+	ppd->sdma_descq_head = 0;
+	ppd->sdma_descq_removed = 0;
+	ppd->sdma_descq_added = 0;
+
+	INIT_LIST_HEAD(&ppd->sdma_activelist);
+
+	tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
+		(unsigned long)ppd);
+
+	ret = dd->f_init_sdma_regs(ppd);
+	if (ret)
+		goto bail_alloc;
+
+	qib_sdma_process_event(ppd, qib_sdma_event_e10_go_hw_start);
+
+	return 0;
+
+bail_alloc:
+	qib_teardown_sdma(ppd);
+bail:
+	return ret;
+}
+
+void qib_teardown_sdma(struct qib_pportdata *ppd)
+{
+	qib_sdma_process_event(ppd, qib_sdma_event_e00_go_hw_down);
+
+	/*
+	 * This waits for the state machine to exit so it is not
+	 * necessary to kill the sdma_sw_clean_up_task to make sure
+	 * it is not running.
+	 */
+	sdma_finalput(&ppd->sdma_state);
+
+	free_sdma(ppd);
+}
+
+int qib_sdma_running(struct qib_pportdata *ppd)
+{
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&ppd->sdma_lock, flags);
+	ret = __qib_sdma_running(ppd);
+	spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+	return ret;
+}
+
+/*
+ * Complete a request when sdma not running; likely only request
+ * but to simplify the code, always queue it, then process the full
+ * activelist.  We process the entire list to ensure that this particular
+ * request does get it's callback, but in the correct order.
+ * Must be called with sdma_lock held
+ */
+static void complete_sdma_err_req(struct qib_pportdata *ppd,
+				  struct qib_verbs_txreq *tx)
+{
+	atomic_inc(&tx->qp->s_dma_busy);
+	/* no sdma descriptors, so no unmap_desc */
+	tx->txreq.start_idx = 0;
+	tx->txreq.next_descq_idx = 0;
+	list_add_tail(&tx->txreq.list, &ppd->sdma_activelist);
+	clear_sdma_activelist(ppd);
+}
+
+/*
+ * This function queues one IB packet onto the send DMA queue per call.
+ * The caller is responsible for checking:
+ * 1) The number of send DMA descriptor entries is less than the size of
+ *    the descriptor queue.
+ * 2) The IB SGE addresses and lengths are 32-bit aligned
+ *    (except possibly the last SGE's length)
+ * 3) The SGE addresses are suitable for passing to dma_map_single().
+ */
+int qib_sdma_verbs_send(struct qib_pportdata *ppd,
+			struct qib_sge_state *ss, u32 dwords,
+			struct qib_verbs_txreq *tx)
+{
+	unsigned long flags;
+	struct qib_sge *sge;
+	struct qib_qp *qp;
+	int ret = 0;
+	u16 tail;
+	__le64 *descqp;
+	u64 sdmadesc[2];
+	u32 dwoffset;
+	dma_addr_t addr;
+
+	spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+retry:
+	if (unlikely(!__qib_sdma_running(ppd))) {
+		complete_sdma_err_req(ppd, tx);
+		goto unlock;
+	}
+
+	if (tx->txreq.sg_count > qib_sdma_descq_freecnt(ppd)) {
+		if (qib_sdma_make_progress(ppd))
+			goto retry;
+		if (ppd->dd->flags & QIB_HAS_SDMA_TIMEOUT)
+			ppd->dd->f_sdma_set_desc_cnt(ppd,
+					ppd->sdma_descq_cnt / 2);
+		goto busy;
+	}
+
+	dwoffset = tx->hdr_dwords;
+	make_sdma_desc(ppd, sdmadesc, (u64) tx->txreq.addr, dwoffset, 0);
+
+	sdmadesc[0] |= SDMA_DESC_FIRST;
+	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF)
+		sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF;
+
+	/* write to the descq */
+	tail = ppd->sdma_descq_tail;
+	descqp = &ppd->sdma_descq[tail].qw[0];
+	*descqp++ = cpu_to_le64(sdmadesc[0]);
+	*descqp++ = cpu_to_le64(sdmadesc[1]);
+
+	/* increment the tail */
+	if (++tail == ppd->sdma_descq_cnt) {
+		tail = 0;
+		descqp = &ppd->sdma_descq[0].qw[0];
+		++ppd->sdma_generation;
+	}
+
+	tx->txreq.start_idx = tail;
+
+	sge = &ss->sge;
+	while (dwords) {
+		u32 dw;
+		u32 len;
+
+		len = dwords << 2;
+		if (len > sge->length)
+			len = sge->length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		dw = (len + 3) >> 2;
+		addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr,
+				      dw << 2, DMA_TO_DEVICE);
+		if (dma_mapping_error(&ppd->dd->pcidev->dev, addr))
+			goto unmap;
+		sdmadesc[0] = 0;
+		make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset);
+		/* SDmaUseLargeBuf has to be set in every descriptor */
+		if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF)
+			sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF;
+		/* write to the descq */
+		*descqp++ = cpu_to_le64(sdmadesc[0]);
+		*descqp++ = cpu_to_le64(sdmadesc[1]);
+
+		/* increment the tail */
+		if (++tail == ppd->sdma_descq_cnt) {
+			tail = 0;
+			descqp = &ppd->sdma_descq[0].qw[0];
+			++ppd->sdma_generation;
+		}
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (--ss->num_sge)
+				*sge = *ss->sg_list++;
+		} else if (sge->length == 0 && sge->mr->lkey) {
+			if (++sge->n >= QIB_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+
+		dwoffset += dw;
+		dwords -= dw;
+	}
+
+	if (!tail)
+		descqp = &ppd->sdma_descq[ppd->sdma_descq_cnt].qw[0];
+	descqp -= 2;
+	descqp[0] |= cpu_to_le64(SDMA_DESC_LAST);
+	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_HEADTOHOST)
+		descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD);
+	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ)
+		descqp[0] |= cpu_to_le64(SDMA_DESC_INTR);
+
+	atomic_inc(&tx->qp->s_dma_busy);
+	tx->txreq.next_descq_idx = tail;
+	ppd->dd->f_sdma_update_tail(ppd, tail);
+	ppd->sdma_descq_added += tx->txreq.sg_count;
+	list_add_tail(&tx->txreq.list, &ppd->sdma_activelist);
+	goto unlock;
+
+unmap:
+	for (;;) {
+		if (!tail)
+			tail = ppd->sdma_descq_cnt - 1;
+		else
+			tail--;
+		if (tail == ppd->sdma_descq_tail)
+			break;
+		unmap_desc(ppd, tail);
+	}
+	qp = tx->qp;
+	qib_put_txreq(tx);
+	spin_lock(&qp->s_lock);
+	if (qp->ibqp.qp_type == IB_QPT_RC) {
+		/* XXX what about error sending RDMA read responses? */
+		if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)
+			qib_error_qp(qp, IB_WC_GENERAL_ERR);
+	} else if (qp->s_wqe)
+		qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+	spin_unlock(&qp->s_lock);
+	/* return zero to process the next send work request */
+	goto unlock;
+
+busy:
+	qp = tx->qp;
+	spin_lock(&qp->s_lock);
+	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+		struct qib_ibdev *dev;
+
+		/*
+		 * If we couldn't queue the DMA request, save the info
+		 * and try again later rather than destroying the
+		 * buffer and undoing the side effects of the copy.
+		 */
+		tx->ss = ss;
+		tx->dwords = dwords;
+		qp->s_tx = tx;
+		dev = &ppd->dd->verbs_dev;
+		spin_lock(&dev->pending_lock);
+		if (list_empty(&qp->iowait)) {
+			struct qib_ibport *ibp;
+
+			ibp = &ppd->ibport_data;
+			ibp->n_dmawait++;
+			qp->s_flags |= QIB_S_WAIT_DMA_DESC;
+			list_add_tail(&qp->iowait, &dev->dmawait);
+		}
+		spin_unlock(&dev->pending_lock);
+		qp->s_flags &= ~QIB_S_BUSY;
+		spin_unlock(&qp->s_lock);
+		ret = -EBUSY;
+	} else {
+		spin_unlock(&qp->s_lock);
+		qib_put_txreq(tx);
+	}
+unlock:
+	spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+	return ret;
+}
+
+void qib_sdma_process_event(struct qib_pportdata *ppd,
+	enum qib_sdma_events event)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+	__qib_sdma_process_event(ppd, event);
+
+	if (ppd->sdma_state.current_state == qib_sdma_state_s99_running)
+		qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd));
+
+	spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+void __qib_sdma_process_event(struct qib_pportdata *ppd,
+	enum qib_sdma_events event)
+{
+	struct qib_sdma_state *ss = &ppd->sdma_state;
+
+	switch (ss->current_state) {
+	case qib_sdma_state_s00_hw_down:
+		switch (event) {
+		case qib_sdma_event_e00_go_hw_down:
+			break;
+		case qib_sdma_event_e30_go_running:
+			/*
+			 * If down, but running requested (usually result
+			 * of link up, then we need to start up.
+			 * This can happen when hw down is requested while
+			 * bringing the link up with traffic active on
+			 * 7220, e.g. */
+			ss->go_s99_running = 1;
+			/* fall through and start dma engine */
+		case qib_sdma_event_e10_go_hw_start:
+			/* This reference means the state machine is started */
+			sdma_get(&ppd->sdma_state);
+			sdma_set_state(ppd,
+				       qib_sdma_state_s10_hw_start_up_wait);
+			break;
+		case qib_sdma_event_e20_hw_started:
+			break;
+		case qib_sdma_event_e40_sw_cleaned:
+			sdma_sw_tear_down(ppd);
+			break;
+		case qib_sdma_event_e50_hw_cleaned:
+			break;
+		case qib_sdma_event_e60_hw_halted:
+			break;
+		case qib_sdma_event_e70_go_idle:
+			break;
+		case qib_sdma_event_e7220_err_halted:
+			break;
+		case qib_sdma_event_e7322_err_halted:
+			break;
+		case qib_sdma_event_e90_timer_tick:
+			break;
+		}
+		break;
+
+	case qib_sdma_state_s10_hw_start_up_wait:
+		switch (event) {
+		case qib_sdma_event_e00_go_hw_down:
+			sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+			sdma_sw_tear_down(ppd);
+			break;
+		case qib_sdma_event_e10_go_hw_start:
+			break;
+		case qib_sdma_event_e20_hw_started:
+			sdma_set_state(ppd, ss->go_s99_running ?
+				       qib_sdma_state_s99_running :
+				       qib_sdma_state_s20_idle);
+			break;
+		case qib_sdma_event_e30_go_running:
+			ss->go_s99_running = 1;
+			break;
+		case qib_sdma_event_e40_sw_cleaned:
+			break;
+		case qib_sdma_event_e50_hw_cleaned:
+			break;
+		case qib_sdma_event_e60_hw_halted:
+			break;
+		case qib_sdma_event_e70_go_idle:
+			ss->go_s99_running = 0;
+			break;
+		case qib_sdma_event_e7220_err_halted:
+			break;
+		case qib_sdma_event_e7322_err_halted:
+			break;
+		case qib_sdma_event_e90_timer_tick:
+			break;
+		}
+		break;
+
+	case qib_sdma_state_s20_idle:
+		switch (event) {
+		case qib_sdma_event_e00_go_hw_down:
+			sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+			sdma_sw_tear_down(ppd);
+			break;
+		case qib_sdma_event_e10_go_hw_start:
+			break;
+		case qib_sdma_event_e20_hw_started:
+			break;
+		case qib_sdma_event_e30_go_running:
+			sdma_set_state(ppd, qib_sdma_state_s99_running);
+			ss->go_s99_running = 1;
+			break;
+		case qib_sdma_event_e40_sw_cleaned:
+			break;
+		case qib_sdma_event_e50_hw_cleaned:
+			break;
+		case qib_sdma_event_e60_hw_halted:
+			break;
+		case qib_sdma_event_e70_go_idle:
+			break;
+		case qib_sdma_event_e7220_err_halted:
+			break;
+		case qib_sdma_event_e7322_err_halted:
+			break;
+		case qib_sdma_event_e90_timer_tick:
+			break;
+		}
+		break;
+
+	case qib_sdma_state_s30_sw_clean_up_wait:
+		switch (event) {
+		case qib_sdma_event_e00_go_hw_down:
+			sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+			break;
+		case qib_sdma_event_e10_go_hw_start:
+			break;
+		case qib_sdma_event_e20_hw_started:
+			break;
+		case qib_sdma_event_e30_go_running:
+			ss->go_s99_running = 1;
+			break;
+		case qib_sdma_event_e40_sw_cleaned:
+			sdma_set_state(ppd,
+				       qib_sdma_state_s10_hw_start_up_wait);
+			sdma_hw_start_up(ppd);
+			break;
+		case qib_sdma_event_e50_hw_cleaned:
+			break;
+		case qib_sdma_event_e60_hw_halted:
+			break;
+		case qib_sdma_event_e70_go_idle:
+			ss->go_s99_running = 0;
+			break;
+		case qib_sdma_event_e7220_err_halted:
+			break;
+		case qib_sdma_event_e7322_err_halted:
+			break;
+		case qib_sdma_event_e90_timer_tick:
+			break;
+		}
+		break;
+
+	case qib_sdma_state_s40_hw_clean_up_wait:
+		switch (event) {
+		case qib_sdma_event_e00_go_hw_down:
+			sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+			sdma_start_sw_clean_up(ppd);
+			break;
+		case qib_sdma_event_e10_go_hw_start:
+			break;
+		case qib_sdma_event_e20_hw_started:
+			break;
+		case qib_sdma_event_e30_go_running:
+			ss->go_s99_running = 1;
+			break;
+		case qib_sdma_event_e40_sw_cleaned:
+			break;
+		case qib_sdma_event_e50_hw_cleaned:
+			sdma_set_state(ppd,
+				       qib_sdma_state_s30_sw_clean_up_wait);
+			sdma_start_sw_clean_up(ppd);
+			break;
+		case qib_sdma_event_e60_hw_halted:
+			break;
+		case qib_sdma_event_e70_go_idle:
+			ss->go_s99_running = 0;
+			break;
+		case qib_sdma_event_e7220_err_halted:
+			break;
+		case qib_sdma_event_e7322_err_halted:
+			break;
+		case qib_sdma_event_e90_timer_tick:
+			break;
+		}
+		break;
+
+	case qib_sdma_state_s50_hw_halt_wait:
+		switch (event) {
+		case qib_sdma_event_e00_go_hw_down:
+			sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+			sdma_start_sw_clean_up(ppd);
+			break;
+		case qib_sdma_event_e10_go_hw_start:
+			break;
+		case qib_sdma_event_e20_hw_started:
+			break;
+		case qib_sdma_event_e30_go_running:
+			ss->go_s99_running = 1;
+			break;
+		case qib_sdma_event_e40_sw_cleaned:
+			break;
+		case qib_sdma_event_e50_hw_cleaned:
+			break;
+		case qib_sdma_event_e60_hw_halted:
+			sdma_set_state(ppd,
+				       qib_sdma_state_s40_hw_clean_up_wait);
+			ppd->dd->f_sdma_hw_clean_up(ppd);
+			break;
+		case qib_sdma_event_e70_go_idle:
+			ss->go_s99_running = 0;
+			break;
+		case qib_sdma_event_e7220_err_halted:
+			break;
+		case qib_sdma_event_e7322_err_halted:
+			break;
+		case qib_sdma_event_e90_timer_tick:
+			break;
+		}
+		break;
+
+	case qib_sdma_state_s99_running:
+		switch (event) {
+		case qib_sdma_event_e00_go_hw_down:
+			sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+			sdma_start_sw_clean_up(ppd);
+			break;
+		case qib_sdma_event_e10_go_hw_start:
+			break;
+		case qib_sdma_event_e20_hw_started:
+			break;
+		case qib_sdma_event_e30_go_running:
+			break;
+		case qib_sdma_event_e40_sw_cleaned:
+			break;
+		case qib_sdma_event_e50_hw_cleaned:
+			break;
+		case qib_sdma_event_e60_hw_halted:
+			sdma_set_state(ppd,
+				       qib_sdma_state_s30_sw_clean_up_wait);
+			sdma_start_sw_clean_up(ppd);
+			break;
+		case qib_sdma_event_e70_go_idle:
+			sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait);
+			ss->go_s99_running = 0;
+			break;
+		case qib_sdma_event_e7220_err_halted:
+			sdma_set_state(ppd,
+				       qib_sdma_state_s30_sw_clean_up_wait);
+			sdma_start_sw_clean_up(ppd);
+			break;
+		case qib_sdma_event_e7322_err_halted:
+			sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait);
+			break;
+		case qib_sdma_event_e90_timer_tick:
+			break;
+		}
+		break;
+	}
+
+	ss->last_event = event;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 36/52] IB/qib: Add qib_srq.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_srq.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_srq.c |  374 +++++++++++++++++++++++++++++++++++
 1 files changed, 374 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_srq.c

diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
new file mode 100644
index 0000000..d79ae33
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_srq.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+
+#include "qib_verbs.h"
+
+/**
+ * qib_post_srq_receive - post a receive on a shared receive queue
+ * @ibsrq: the SRQ to post the receive on
+ * @wr: the list of work requests to post
+ * @bad_wr: A pointer to the first WR to cause a problem is put here
+ *
+ * This may be called from interrupt context.
+ */
+int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+			 struct ib_recv_wr **bad_wr)
+{
+	struct qib_srq *srq = to_isrq(ibsrq);
+	struct qib_rwq *wq;
+	unsigned long flags;
+	int ret;
+
+	for (; wr; wr = wr->next) {
+		struct qib_rwqe *wqe;
+		u32 next;
+		int i;
+
+		if ((unsigned) wr->num_sge > srq->rq.max_sge) {
+			*bad_wr = wr;
+			ret = -EINVAL;
+			goto bail;
+		}
+
+		spin_lock_irqsave(&srq->rq.lock, flags);
+		wq = srq->rq.wq;
+		next = wq->head + 1;
+		if (next >= srq->rq.size)
+			next = 0;
+		if (next == wq->tail) {
+			spin_unlock_irqrestore(&srq->rq.lock, flags);
+			*bad_wr = wr;
+			ret = -ENOMEM;
+			goto bail;
+		}
+
+		wqe = get_rwqe_ptr(&srq->rq, wq->head);
+		wqe->wr_id = wr->wr_id;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		/* Make sure queue entry is written before the head index. */
+		smp_wmb();
+		wq->head = next;
+		spin_unlock_irqrestore(&srq->rq.lock, flags);
+	}
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * qib_create_srq - create a shared receive queue
+ * @ibpd: the protection domain of the SRQ to create
+ * @srq_init_attr: the attributes of the SRQ
+ * @udata: data from libibverbs when creating a user SRQ
+ */
+struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
+			      struct ib_srq_init_attr *srq_init_attr,
+			      struct ib_udata *udata)
+{
+	struct qib_ibdev *dev = to_idev(ibpd->device);
+	struct qib_srq *srq;
+	u32 sz;
+	struct ib_srq *ret;
+
+	if (srq_init_attr->attr.max_sge == 0 ||
+	    srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
+	    srq_init_attr->attr.max_wr == 0 ||
+	    srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) {
+		ret = ERR_PTR(-EINVAL);
+		goto done;
+	}
+
+	srq = kmalloc(sizeof(*srq), GFP_KERNEL);
+	if (!srq) {
+		ret = ERR_PTR(-ENOMEM);
+		goto done;
+	}
+
+	/*
+	 * Need to use vmalloc() if we want to support large #s of entries.
+	 */
+	srq->rq.size = srq_init_attr->attr.max_wr + 1;
+	srq->rq.max_sge = srq_init_attr->attr.max_sge;
+	sz = sizeof(struct ib_sge) * srq->rq.max_sge +
+		sizeof(struct qib_rwqe);
+	srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz);
+	if (!srq->rq.wq) {
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_srq;
+	}
+
+	/*
+	 * Return the address of the RWQ as the offset to mmap.
+	 * See qib_mmap() for details.
+	 */
+	if (udata && udata->outlen >= sizeof(__u64)) {
+		int err;
+		u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz;
+
+		srq->ip =
+		    qib_create_mmap_info(dev, s, ibpd->uobject->context,
+					 srq->rq.wq);
+		if (!srq->ip) {
+			ret = ERR_PTR(-ENOMEM);
+			goto bail_wq;
+		}
+
+		err = ib_copy_to_udata(udata, &srq->ip->offset,
+				       sizeof(srq->ip->offset));
+		if (err) {
+			ret = ERR_PTR(err);
+			goto bail_ip;
+		}
+	} else
+		srq->ip = NULL;
+
+	/*
+	 * ib_create_srq() will initialize srq->ibsrq.
+	 */
+	spin_lock_init(&srq->rq.lock);
+	srq->rq.wq->head = 0;
+	srq->rq.wq->tail = 0;
+	srq->limit = srq_init_attr->attr.srq_limit;
+
+	spin_lock(&dev->n_srqs_lock);
+	if (dev->n_srqs_allocated == ib_qib_max_srqs) {
+		spin_unlock(&dev->n_srqs_lock);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail_ip;
+	}
+
+	dev->n_srqs_allocated++;
+	spin_unlock(&dev->n_srqs_lock);
+
+	if (srq->ip) {
+		spin_lock_irq(&dev->pending_lock);
+		list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
+		spin_unlock_irq(&dev->pending_lock);
+	}
+
+	ret = &srq->ibsrq;
+	goto done;
+
+bail_ip:
+	kfree(srq->ip);
+bail_wq:
+	vfree(srq->rq.wq);
+bail_srq:
+	kfree(srq);
+done:
+	return ret;
+}
+
+/**
+ * qib_modify_srq - modify a shared receive queue
+ * @ibsrq: the SRQ to modify
+ * @attr: the new attributes of the SRQ
+ * @attr_mask: indicates which attributes to modify
+ * @udata: user data for libibverbs.so
+ */
+int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		   enum ib_srq_attr_mask attr_mask,
+		   struct ib_udata *udata)
+{
+	struct qib_srq *srq = to_isrq(ibsrq);
+	struct qib_rwq *wq;
+	int ret = 0;
+
+	if (attr_mask & IB_SRQ_MAX_WR) {
+		struct qib_rwq *owq;
+		struct qib_rwqe *p;
+		u32 sz, size, n, head, tail;
+
+		/* Check that the requested sizes are below the limits. */
+		if ((attr->max_wr > ib_qib_max_srq_wrs) ||
+		    ((attr_mask & IB_SRQ_LIMIT) ?
+		     attr->srq_limit : srq->limit) > attr->max_wr) {
+			ret = -EINVAL;
+			goto bail;
+		}
+
+		sz = sizeof(struct qib_rwqe) +
+			srq->rq.max_sge * sizeof(struct ib_sge);
+		size = attr->max_wr + 1;
+		wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz);
+		if (!wq) {
+			ret = -ENOMEM;
+			goto bail;
+		}
+
+		/* Check that we can write the offset to mmap. */
+		if (udata && udata->inlen >= sizeof(__u64)) {
+			__u64 offset_addr;
+			__u64 offset = 0;
+
+			ret = ib_copy_from_udata(&offset_addr, udata,
+						 sizeof(offset_addr));
+			if (ret)
+				goto bail_free;
+			udata->outbuf =
+				(void __user *) (unsigned long) offset_addr;
+			ret = ib_copy_to_udata(udata, &offset,
+					       sizeof(offset));
+			if (ret)
+				goto bail_free;
+		}
+
+		spin_lock_irq(&srq->rq.lock);
+		/*
+		 * validate head and tail pointer values and compute
+		 * the number of remaining WQEs.
+		 */
+		owq = srq->rq.wq;
+		head = owq->head;
+		tail = owq->tail;
+		if (head >= srq->rq.size || tail >= srq->rq.size) {
+			ret = -EINVAL;
+			goto bail_unlock;
+		}
+		n = head;
+		if (n < tail)
+			n += srq->rq.size - tail;
+		else
+			n -= tail;
+		if (size <= n) {
+			ret = -EINVAL;
+			goto bail_unlock;
+		}
+		n = 0;
+		p = wq->wq;
+		while (tail != head) {
+			struct qib_rwqe *wqe;
+			int i;
+
+			wqe = get_rwqe_ptr(&srq->rq, tail);
+			p->wr_id = wqe->wr_id;
+			p->num_sge = wqe->num_sge;
+			for (i = 0; i < wqe->num_sge; i++)
+				p->sg_list[i] = wqe->sg_list[i];
+			n++;
+			p = (struct qib_rwqe *)((char *) p + sz);
+			if (++tail >= srq->rq.size)
+				tail = 0;
+		}
+		srq->rq.wq = wq;
+		srq->rq.size = size;
+		wq->head = n;
+		wq->tail = 0;
+		if (attr_mask & IB_SRQ_LIMIT)
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
+
+		vfree(owq);
+
+		if (srq->ip) {
+			struct qib_mmap_info *ip = srq->ip;
+			struct qib_ibdev *dev = to_idev(srq->ibsrq.device);
+			u32 s = sizeof(struct qib_rwq) + size * sz;
+
+			qib_update_mmap_info(dev, ip, s, wq);
+
+			/*
+			 * Return the offset to mmap.
+			 * See qib_mmap() for details.
+			 */
+			if (udata && udata->inlen >= sizeof(__u64)) {
+				ret = ib_copy_to_udata(udata, &ip->offset,
+						       sizeof(ip->offset));
+				if (ret)
+					goto bail;
+			}
+
+			/*
+			 * Put user mapping info onto the pending list
+			 * unless it already is on the list.
+			 */
+			spin_lock_irq(&dev->pending_lock);
+			if (list_empty(&ip->pending_mmaps))
+				list_add(&ip->pending_mmaps,
+					 &dev->pending_mmaps);
+			spin_unlock_irq(&dev->pending_lock);
+		}
+	} else if (attr_mask & IB_SRQ_LIMIT) {
+		spin_lock_irq(&srq->rq.lock);
+		if (attr->srq_limit >= srq->rq.size)
+			ret = -EINVAL;
+		else
+			srq->limit = attr->srq_limit;
+		spin_unlock_irq(&srq->rq.lock);
+	}
+	goto bail;
+
+bail_unlock:
+	spin_unlock_irq(&srq->rq.lock);
+bail_free:
+	vfree(wq);
+bail:
+	return ret;
+}
+
+int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+	struct qib_srq *srq = to_isrq(ibsrq);
+
+	attr->max_wr = srq->rq.size - 1;
+	attr->max_sge = srq->rq.max_sge;
+	attr->srq_limit = srq->limit;
+	return 0;
+}
+
+/**
+ * qib_destroy_srq - destroy a shared receive queue
+ * @ibsrq: the SRQ to destroy
+ */
+int qib_destroy_srq(struct ib_srq *ibsrq)
+{
+	struct qib_srq *srq = to_isrq(ibsrq);
+	struct qib_ibdev *dev = to_idev(ibsrq->device);
+
+	spin_lock(&dev->n_srqs_lock);
+	dev->n_srqs_allocated--;
+	spin_unlock(&dev->n_srqs_lock);
+	if (srq->ip)
+		kref_put(&srq->ip->ref, qib_release_mmap_info);
+	else
+		vfree(srq->rq.wq);
+	kfree(srq);
+
+	return 0;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 37/52] IB/qib: Add qib_sysfs.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_sysfs.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_sysfs.c |  691 +++++++++++++++++++++++++++++++++
 1 files changed, 691 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_sysfs.c

diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
new file mode 100644
index 0000000..dab4d9f
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/ctype.h>
+
+#include "qib.h"
+
+/**
+ * qib_parse_ushort - parse an unsigned short value in an arbitrary base
+ * @str: the string containing the number
+ * @valp: where to put the result
+ *
+ * Returns the number of bytes consumed, or negative value on error.
+ */
+static int qib_parse_ushort(const char *str, unsigned short *valp)
+{
+	unsigned long val;
+	char *end;
+	int ret;
+
+	if (!isdigit(str[0])) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	val = simple_strtoul(str, &end, 0);
+
+	if (val > 0xffff) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	*valp = val;
+
+	ret = end + 1 - str;
+	if (ret == 0)
+		ret = -EINVAL;
+
+bail:
+	return ret;
+}
+
+/* start of per-port functions */
+/*
+ * Get/Set heartbeat enable. OR of 1=enabled, 2=auto
+ */
+static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf)
+{
+	struct qib_devdata *dd = ppd->dd;
+	int ret;
+
+	ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT);
+	ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+	return ret;
+}
+
+static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
+			       size_t count)
+{
+	struct qib_devdata *dd = ppd->dd;
+	int ret;
+	u16 val;
+
+	ret = qib_parse_ushort(buf, &val);
+
+	/*
+	 * Set the "intentional" heartbeat enable per either of
+	 * "Enable" and "Auto", as these are normally set together.
+	 * This bit is consulted when leaving loopback mode,
+	 * because entering loopback mode overrides it and automatically
+	 * disables heartbeat.
+	 */
+	if (ret >= 0)
+		ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val);
+	if (ret < 0)
+		qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+	return ret < 0 ? ret : count;
+}
+
+static ssize_t store_loopback(struct qib_pportdata *ppd, const char *buf,
+			      size_t count)
+{
+	struct qib_devdata *dd = ppd->dd;
+	int ret = count, r;
+
+	r = dd->f_set_ib_loopback(ppd, buf);
+	if (r < 0)
+		ret = r;
+
+	return ret;
+}
+
+static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf,
+				  size_t count)
+{
+	struct qib_devdata *dd = ppd->dd;
+	int ret;
+	u16 val;
+
+	ret = qib_parse_ushort(buf, &val);
+	if (ret > 0)
+		qib_set_led_override(ppd, val);
+	else
+		qib_dev_err(dd, "attempt to set invalid LED override\n");
+	return ret < 0 ? ret : count;
+}
+
+static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
+{
+	ssize_t ret;
+
+	if (!ppd->statusp)
+		ret = -EINVAL;
+	else
+		ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+				(unsigned long long) *(ppd->statusp));
+	return ret;
+}
+
+/*
+ * For userland compatibility, these offsets must remain fixed.
+ * They are strings for QIB_STATUS_*
+ */
+static const char *qib_status_str[] = {
+	"Initted",
+	"",
+	"",
+	"",
+	"",
+	"Present",
+	"IB_link_up",
+	"IB_configured",
+	"",
+	"Fatal_Hardware_Error",
+	NULL,
+};
+
+static ssize_t show_status_str(struct qib_pportdata *ppd, char *buf)
+{
+	int i, any;
+	u64 s;
+	ssize_t ret;
+
+	if (!ppd->statusp) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	s = *(ppd->statusp);
+	*buf = '\0';
+	for (any = i = 0; s && qib_status_str[i]; i++) {
+		if (s & 1) {
+			/* if overflow */
+			if (any && strlcat(buf, " ", PAGE_SIZE) >= PAGE_SIZE)
+				break;
+			if (strlcat(buf, qib_status_str[i], PAGE_SIZE) >=
+					PAGE_SIZE)
+				break;
+			any = 1;
+		}
+		s >>= 1;
+	}
+	if (any)
+		strlcat(buf, "\n", PAGE_SIZE);
+
+	ret = strlen(buf);
+
+bail:
+	return ret;
+}
+
+/* end of per-port functions */
+
+/*
+ * Start of per-port file structures and support code
+ * Because we are fitting into other infrastructure, we have to supply the
+ * full set of kobject/sysfs_ops structures and routines.
+ */
+#define QIB_PORT_ATTR(name, mode, show, store) \
+	static struct qib_port_attr qib_port_attr_##name = \
+		__ATTR(name, mode, show, store)
+
+struct qib_port_attr {
+	struct attribute attr;
+	ssize_t (*show)(struct qib_pportdata *, char *);
+	ssize_t (*store)(struct qib_pportdata *, const char *, size_t);
+};
+
+QIB_PORT_ATTR(loopback, S_IWUSR, NULL, store_loopback);
+QIB_PORT_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+QIB_PORT_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
+	      store_hrtbt_enb);
+QIB_PORT_ATTR(status, S_IRUGO, show_status, NULL);
+QIB_PORT_ATTR(status_str, S_IRUGO, show_status_str, NULL);
+
+static struct attribute *port_default_attributes[] = {
+	&qib_port_attr_loopback.attr,
+	&qib_port_attr_led_override.attr,
+	&qib_port_attr_hrtbt_enable.attr,
+	&qib_port_attr_status.attr,
+	&qib_port_attr_status_str.attr,
+	NULL
+};
+
+static ssize_t qib_portattr_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct qib_port_attr *pattr =
+		container_of(attr, struct qib_port_attr, attr);
+	struct qib_pportdata *ppd =
+		container_of(kobj, struct qib_pportdata, pport_kobj);
+
+	return pattr->show(ppd, buf);
+}
+
+static ssize_t qib_portattr_store(struct kobject *kobj,
+	struct attribute *attr, const char *buf, size_t len)
+{
+	struct qib_port_attr *pattr =
+		container_of(attr, struct qib_port_attr, attr);
+	struct qib_pportdata *ppd =
+		container_of(kobj, struct qib_pportdata, pport_kobj);
+
+	return pattr->store(ppd, buf, len);
+}
+
+static void qib_port_release(struct kobject *kobj)
+{
+	/* nothing to do since memory is freed by qib_free_devdata() */
+}
+
+static const struct sysfs_ops qib_port_ops = {
+	.show = qib_portattr_show,
+	.store = qib_portattr_store,
+};
+
+static struct kobj_type qib_port_ktype = {
+	.release = qib_port_release,
+	.sysfs_ops = &qib_port_ops,
+	.default_attrs = port_default_attributes
+};
+
+/* Start sl2vl */
+
+#define QIB_SL2VL_ATTR(N) \
+	static struct qib_sl2vl_attr qib_sl2vl_attr_##N = { \
+		.attr = { .name = __stringify(N), .mode = 0444 }, \
+		.sl = N \
+	}
+
+struct qib_sl2vl_attr {
+	struct attribute attr;
+	int sl;
+};
+
+QIB_SL2VL_ATTR(0);
+QIB_SL2VL_ATTR(1);
+QIB_SL2VL_ATTR(2);
+QIB_SL2VL_ATTR(3);
+QIB_SL2VL_ATTR(4);
+QIB_SL2VL_ATTR(5);
+QIB_SL2VL_ATTR(6);
+QIB_SL2VL_ATTR(7);
+QIB_SL2VL_ATTR(8);
+QIB_SL2VL_ATTR(9);
+QIB_SL2VL_ATTR(10);
+QIB_SL2VL_ATTR(11);
+QIB_SL2VL_ATTR(12);
+QIB_SL2VL_ATTR(13);
+QIB_SL2VL_ATTR(14);
+QIB_SL2VL_ATTR(15);
+
+static struct attribute *sl2vl_default_attributes[] = {
+	&qib_sl2vl_attr_0.attr,
+	&qib_sl2vl_attr_1.attr,
+	&qib_sl2vl_attr_2.attr,
+	&qib_sl2vl_attr_3.attr,
+	&qib_sl2vl_attr_4.attr,
+	&qib_sl2vl_attr_5.attr,
+	&qib_sl2vl_attr_6.attr,
+	&qib_sl2vl_attr_7.attr,
+	&qib_sl2vl_attr_8.attr,
+	&qib_sl2vl_attr_9.attr,
+	&qib_sl2vl_attr_10.attr,
+	&qib_sl2vl_attr_11.attr,
+	&qib_sl2vl_attr_12.attr,
+	&qib_sl2vl_attr_13.attr,
+	&qib_sl2vl_attr_14.attr,
+	&qib_sl2vl_attr_15.attr,
+	NULL
+};
+
+static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr,
+			       char *buf)
+{
+	struct qib_sl2vl_attr *sattr =
+		container_of(attr, struct qib_sl2vl_attr, attr);
+	struct qib_pportdata *ppd =
+		container_of(kobj, struct qib_pportdata, sl2vl_kobj);
+	struct qib_ibport *qibp = &ppd->ibport_data;
+
+	return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]);
+}
+
+static const struct sysfs_ops qib_sl2vl_ops = {
+	.show = sl2vl_attr_show,
+};
+
+static struct kobj_type qib_sl2vl_ktype = {
+	.release = qib_port_release,
+	.sysfs_ops = &qib_sl2vl_ops,
+	.default_attrs = sl2vl_default_attributes
+};
+
+/* End sl2vl */
+
+/* Start diag_counters */
+
+#define QIB_DIAGC_ATTR(N) \
+	static struct qib_diagc_attr qib_diagc_attr_##N = { \
+		.attr = { .name = __stringify(N), .mode = 0444 }, \
+		.counter = offsetof(struct qib_ibport, n_##N) \
+	}
+
+struct qib_diagc_attr {
+	struct attribute attr;
+	size_t counter;
+};
+
+QIB_DIAGC_ATTR(rc_resends);
+QIB_DIAGC_ATTR(rc_acks);
+QIB_DIAGC_ATTR(rc_qacks);
+QIB_DIAGC_ATTR(rc_delayed_comp);
+QIB_DIAGC_ATTR(seq_naks);
+QIB_DIAGC_ATTR(rdma_seq);
+QIB_DIAGC_ATTR(rnr_naks);
+QIB_DIAGC_ATTR(other_naks);
+QIB_DIAGC_ATTR(rc_timeouts);
+QIB_DIAGC_ATTR(loop_pkts);
+QIB_DIAGC_ATTR(pkt_drops);
+QIB_DIAGC_ATTR(dmawait);
+QIB_DIAGC_ATTR(unaligned);
+QIB_DIAGC_ATTR(rc_dupreq);
+QIB_DIAGC_ATTR(rc_seqnak);
+
+static struct attribute *diagc_default_attributes[] = {
+	&qib_diagc_attr_rc_resends.attr,
+	&qib_diagc_attr_rc_acks.attr,
+	&qib_diagc_attr_rc_qacks.attr,
+	&qib_diagc_attr_rc_delayed_comp.attr,
+	&qib_diagc_attr_seq_naks.attr,
+	&qib_diagc_attr_rdma_seq.attr,
+	&qib_diagc_attr_rnr_naks.attr,
+	&qib_diagc_attr_other_naks.attr,
+	&qib_diagc_attr_rc_timeouts.attr,
+	&qib_diagc_attr_loop_pkts.attr,
+	&qib_diagc_attr_pkt_drops.attr,
+	&qib_diagc_attr_dmawait.attr,
+	&qib_diagc_attr_unaligned.attr,
+	&qib_diagc_attr_rc_dupreq.attr,
+	&qib_diagc_attr_rc_seqnak.attr,
+	NULL
+};
+
+static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
+			       char *buf)
+{
+	struct qib_diagc_attr *dattr =
+		container_of(attr, struct qib_diagc_attr, attr);
+	struct qib_pportdata *ppd =
+		container_of(kobj, struct qib_pportdata, diagc_kobj);
+	struct qib_ibport *qibp = &ppd->ibport_data;
+
+	return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter));
+}
+
+static const struct sysfs_ops qib_diagc_ops = {
+	.show = diagc_attr_show,
+};
+
+static struct kobj_type qib_diagc_ktype = {
+	.release = qib_port_release,
+	.sysfs_ops = &qib_diagc_ops,
+	.default_attrs = diagc_default_attributes
+};
+
+/* End diag_counters */
+
+/* end of per-port file structures and support code */
+
+/*
+ * Start of per-unit (or driver, in some cases, but replicated
+ * per unit) functions (these get a device *)
+ */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+
+	return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+	struct qib_devdata *dd = dd_from_dev(dev);
+	int ret;
+
+	if (!dd->boardname)
+		ret = -EINVAL;
+	else
+		ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
+	return ret;
+}
+
+static ssize_t show_version(struct device *device,
+			    struct device_attribute *attr, char *buf)
+{
+	/* The string printed here is already newline-terminated. */
+	return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version);
+}
+
+static ssize_t show_boardversion(struct device *device,
+				 struct device_attribute *attr, char *buf)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+	struct qib_devdata *dd = dd_from_dev(dev);
+
+	/* The string printed here is already newline-terminated. */
+	return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
+}
+
+
+static ssize_t show_localbus_info(struct device *device,
+				  struct device_attribute *attr, char *buf)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+	struct qib_devdata *dd = dd_from_dev(dev);
+
+	/* The string printed here is already newline-terminated. */
+	return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info);
+}
+
+
+static ssize_t show_nctxts(struct device *device,
+			   struct device_attribute *attr, char *buf)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+	struct qib_devdata *dd = dd_from_dev(dev);
+
+	/* Return the number of user ports (contexts) available. */
+	return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts -
+		dd->first_user_ctxt);
+}
+
+static ssize_t show_serial(struct device *device,
+			   struct device_attribute *attr, char *buf)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+	struct qib_devdata *dd = dd_from_dev(dev);
+
+	buf[sizeof dd->serial] = '\0';
+	memcpy(buf, dd->serial, sizeof dd->serial);
+	strcat(buf, "\n");
+	return strlen(buf);
+}
+
+static ssize_t store_chip_reset(struct device *device,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+	struct qib_devdata *dd = dd_from_dev(dev);
+	int ret;
+
+	if (count < 5 || memcmp(buf, "reset", 5) || !dd->diag_client) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	ret = qib_reset_device(dd->unit);
+bail:
+	return ret < 0 ? ret : count;
+}
+
+static ssize_t show_logged_errs(struct device *device,
+				struct device_attribute *attr, char *buf)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+	struct qib_devdata *dd = dd_from_dev(dev);
+	int idx, count;
+
+	/* force consistency with actual EEPROM */
+	if (qib_update_eeprom_log(dd) != 0)
+		return -ENXIO;
+
+	count = 0;
+	for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) {
+		count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+				   dd->eep_st_errs[idx],
+				   idx == (QIB_EEP_LOG_CNT - 1) ? '\n' : ' ');
+	}
+
+	return count;
+}
+
+/*
+ * Dump tempsense regs. in decimal, to ease shell-scripts.
+ */
+static ssize_t show_tempsense(struct device *device,
+			      struct device_attribute *attr, char *buf)
+{
+	struct qib_ibdev *dev =
+		container_of(device, struct qib_ibdev, ibdev.dev);
+	struct qib_devdata *dd = dd_from_dev(dev);
+	int ret;
+	int idx;
+	u8 regvals[8];
+
+	ret = -ENXIO;
+	for (idx = 0; idx < 8; ++idx) {
+		if (idx == 6)
+			continue;
+		ret = dd->f_tempsense_rd(dd, idx);
+		if (ret < 0)
+			break;
+		regvals[idx] = ret;
+	}
+	if (idx == 8)
+		ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
+				*(signed char *)(regvals),
+				*(signed char *)(regvals + 1),
+				regvals[2], regvals[3],
+				*(signed char *)(regvals + 5),
+				*(signed char *)(regvals + 7));
+	return ret;
+}
+
+/*
+ * end of per-unit (or driver, in some cases, but replicated
+ * per unit) functions
+ */
+
+/* start of per-unit file structures and support code */
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
+static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
+static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
+static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
+static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
+static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
+static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
+
+static struct device_attribute *qib_attributes[] = {
+	&dev_attr_hw_rev,
+	&dev_attr_hca_type,
+	&dev_attr_board_id,
+	&dev_attr_version,
+	&dev_attr_nctxts,
+	&dev_attr_serial,
+	&dev_attr_boardversion,
+	&dev_attr_logged_errors,
+	&dev_attr_tempsense,
+	&dev_attr_localbus_info,
+	&dev_attr_chip_reset,
+};
+
+int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
+			  struct kobject *kobj)
+{
+	struct qib_pportdata *ppd;
+	struct qib_devdata *dd = dd_from_ibdev(ibdev);
+	int ret;
+
+	if (!port_num || port_num > dd->num_pports) {
+		qib_dev_err(dd, "Skipping infiniband class with "
+			    "invalid port %u\n", port_num);
+		ret = -ENODEV;
+		goto bail;
+	}
+	ppd = &dd->pport[port_num - 1];
+
+	ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj,
+				   "linkcontrol");
+	if (ret) {
+		qib_dev_err(dd, "Skipping linkcontrol sysfs info, "
+			    "(err %d) port %u\n", ret, port_num);
+		goto bail;
+	}
+	kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
+
+	ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj,
+				   "sl2vl");
+	if (ret) {
+		qib_dev_err(dd, "Skipping sl2vl sysfs info, "
+			    "(err %d) port %u\n", ret, port_num);
+		goto bail_sl;
+	}
+	kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
+
+	ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj,
+				   "diag_counters");
+	if (ret) {
+		qib_dev_err(dd, "Skipping diag_counters sysfs info, "
+			    "(err %d) port %u\n", ret, port_num);
+		goto bail_diagc;
+	}
+	kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
+
+	return 0;
+
+bail_diagc:
+	kobject_put(&ppd->sl2vl_kobj);
+bail_sl:
+	kobject_put(&ppd->pport_kobj);
+bail:
+	return ret;
+}
+
+/*
+ * Register and create our files in /sys/class/infiniband.
+ */
+int qib_verbs_register_sysfs(struct qib_devdata *dd)
+{
+	struct ib_device *dev = &dd->verbs_dev.ibdev;
+	int i, ret;
+
+	for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
+		ret = device_create_file(&dev->dev, qib_attributes[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Unregister and remove our files in /sys/class/infiniband.
+ */
+void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
+{
+	struct qib_pportdata *ppd;
+	int i;
+
+	for (i = 0; i < dd->num_pports; i++) {
+		ppd = &dd->pport[i];
+		kobject_put(&ppd->pport_kobj);
+		kobject_put(&ppd->sl2vl_kobj);
+	}
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 38/52] IB/qib: Add qib_twsi.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_twsi.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_twsi.c |  498 ++++++++++++++++++++++++++++++++++
 1 files changed, 498 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_twsi.c

diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c
new file mode 100644
index 0000000..6f31ca5
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_twsi.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+
+/*
+ * QLogic_IB "Two Wire Serial Interface" driver.
+ * Originally written for a not-quite-i2c serial eeprom, which is
+ * still used on some supported boards. Later boards have added a
+ * variety of other uses, most board-specific, so teh bit-boffing
+ * part has been split off to this file, while the other parts
+ * have been moved to chip-specific files.
+ *
+ * We have also dropped all pretense of fully generic (e.g. pretend
+ * we don't know whether '1' is the higher voltage) interface, as
+ * the restrictions of the generic i2c interface (e.g. no access from
+ * driver itself) make it unsuitable for this use.
+ */
+
+#define READ_CMD 1
+#define WRITE_CMD 0
+
+/**
+ * i2c_wait_for_writes - wait for a write
+ * @dd: the qlogic_ib device
+ *
+ * We use this instead of udelay directly, so we can make sure
+ * that previous register writes have been flushed all the way
+ * to the chip.  Since we are delaying anyway, the cost doesn't
+ * hurt, and makes the bit twiddling more regular
+ */
+static void i2c_wait_for_writes(struct qib_devdata *dd)
+{
+	/*
+	 * implicit read of EXTStatus is as good as explicit
+	 * read of scratch, if all we want to do is flush
+	 * writes.
+	 */
+	dd->f_gpio_mod(dd, 0, 0, 0);
+	rmb(); /* inlined, so prevent compiler reordering */
+}
+
+/*
+ * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that
+ * for "almost compliant" modules
+ */
+#define SCL_WAIT_USEC 1000
+
+/* BUF_WAIT is time bus must be free between STOP or ACK and to next START.
+ * Should be 20, but some chips need more.
+ */
+#define TWSI_BUF_WAIT_USEC 60
+
+static void scl_out(struct qib_devdata *dd, u8 bit)
+{
+	u32 mask;
+
+	udelay(1);
+
+	mask = 1UL << dd->gpio_scl_num;
+
+	/* SCL is meant to be bare-drain, so never set "OUT", just DIR */
+	dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask);
+
+	/*
+	 * Allow for slow slaves by simple
+	 * delay for falling edge, sampling on rise.
+	 */
+	if (!bit)
+		udelay(2);
+	else {
+		int rise_usec;
+		for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
+			if (mask & dd->f_gpio_mod(dd, 0, 0, 0))
+				break;
+			udelay(2);
+		}
+		if (rise_usec <= 0)
+			qib_dev_err(dd, "SCL interface stuck low > %d uSec\n",
+				    SCL_WAIT_USEC);
+	}
+	i2c_wait_for_writes(dd);
+}
+
+static void sda_out(struct qib_devdata *dd, u8 bit)
+{
+	u32 mask;
+
+	mask = 1UL << dd->gpio_sda_num;
+
+	/* SDA is meant to be bare-drain, so never set "OUT", just DIR */
+	dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask);
+
+	i2c_wait_for_writes(dd);
+	udelay(2);
+}
+
+static u8 sda_in(struct qib_devdata *dd, int wait)
+{
+	int bnum;
+	u32 read_val, mask;
+
+	bnum = dd->gpio_sda_num;
+	mask = (1UL << bnum);
+	/* SDA is meant to be bare-drain, so never set "OUT", just DIR */
+	dd->f_gpio_mod(dd, 0, 0, mask);
+	read_val = dd->f_gpio_mod(dd, 0, 0, 0);
+	if (wait)
+		i2c_wait_for_writes(dd);
+	return (read_val & mask) >> bnum;
+}
+
+/**
+ * i2c_ackrcv - see if ack following write is true
+ * @dd: the qlogic_ib device
+ */
+static int i2c_ackrcv(struct qib_devdata *dd)
+{
+	u8 ack_received;
+
+	/* AT ENTRY SCL = LOW */
+	/* change direction, ignore data */
+	ack_received = sda_in(dd, 1);
+	scl_out(dd, 1);
+	ack_received = sda_in(dd, 1) == 0;
+	scl_out(dd, 0);
+	return ack_received;
+}
+
+static void stop_cmd(struct qib_devdata *dd);
+
+/**
+ * rd_byte - read a byte, sending STOP on last, else ACK
+ * @dd: the qlogic_ib device
+ *
+ * Returns byte shifted out of device
+ */
+static int rd_byte(struct qib_devdata *dd, int last)
+{
+	int bit_cntr, data;
+
+	data = 0;
+
+	for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
+		data <<= 1;
+		scl_out(dd, 1);
+		data |= sda_in(dd, 0);
+		scl_out(dd, 0);
+	}
+	if (last) {
+		scl_out(dd, 1);
+		stop_cmd(dd);
+	} else {
+		sda_out(dd, 0);
+		scl_out(dd, 1);
+		scl_out(dd, 0);
+		sda_out(dd, 1);
+	}
+	return data;
+}
+
+/**
+ * wr_byte - write a byte, one bit at a time
+ * @dd: the qlogic_ib device
+ * @data: the byte to write
+ *
+ * Returns 0 if we got the following ack, otherwise 1
+ */
+static int wr_byte(struct qib_devdata *dd, u8 data)
+{
+	int bit_cntr;
+	u8 bit;
+
+	for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
+		bit = (data >> bit_cntr) & 1;
+		sda_out(dd, bit);
+		scl_out(dd, 1);
+		scl_out(dd, 0);
+	}
+	return (!i2c_ackrcv(dd)) ? 1 : 0;
+}
+
+/*
+ * issue TWSI start sequence:
+ * (both clock/data high, clock high, data low while clock is high)
+ */
+static void start_seq(struct qib_devdata *dd)
+{
+	sda_out(dd, 1);
+	scl_out(dd, 1);
+	sda_out(dd, 0);
+	udelay(1);
+	scl_out(dd, 0);
+}
+
+/**
+ * stop_seq - transmit the stop sequence
+ * @dd: the qlogic_ib device
+ *
+ * (both clock/data low, clock high, data high while clock is high)
+ */
+static void stop_seq(struct qib_devdata *dd)
+{
+	scl_out(dd, 0);
+	sda_out(dd, 0);
+	scl_out(dd, 1);
+	sda_out(dd, 1);
+}
+
+/**
+ * stop_cmd - transmit the stop condition
+ * @dd: the qlogic_ib device
+ *
+ * (both clock/data low, clock high, data high while clock is high)
+ */
+static void stop_cmd(struct qib_devdata *dd)
+{
+	stop_seq(dd);
+	udelay(TWSI_BUF_WAIT_USEC);
+}
+
+/**
+ * qib_twsi_reset - reset I2C communication
+ * @dd: the qlogic_ib device
+ */
+
+int qib_twsi_reset(struct qib_devdata *dd)
+{
+	int clock_cycles_left = 9;
+	int was_high = 0;
+	u32 pins, mask;
+
+	/* Both SCL and SDA should be high. If not, there
+	 * is something wrong.
+	 */
+	mask = (1UL << dd->gpio_scl_num) | (1UL << dd->gpio_sda_num);
+
+	/*
+	 * Force pins to desired innocuous state.
+	 * This is the default power-on state with out=0 and dir=0,
+	 * So tri-stated and should be floating high (barring HW problems)
+	 */
+	dd->f_gpio_mod(dd, 0, 0, mask);
+
+	/*
+	 * Clock nine times to get all listeners into a sane state.
+	 * If SDA does not go high at any point, we are wedged.
+	 * One vendor recommends then issuing START followed by STOP.
+	 * we cannot use our "normal" functions to do that, because
+	 * if SCL drops between them, another vendor's part will
+	 * wedge, dropping SDA and keeping it low forever, at the end of
+	 * the next transaction (even if it was not the device addressed).
+	 * So our START and STOP take place with SCL held high.
+	 */
+	while (clock_cycles_left--) {
+		scl_out(dd, 0);
+		scl_out(dd, 1);
+		/* Note if SDA is high, but keep clocking to sync slave */
+		was_high |= sda_in(dd, 0);
+	}
+
+	if (was_high) {
+		/*
+		 * We saw a high, which we hope means the slave is sync'd.
+		 * Issue START, STOP, pause for T_BUF.
+		 */
+
+		pins = dd->f_gpio_mod(dd, 0, 0, 0);
+		if ((pins & mask) != mask)
+			qib_dev_err(dd, "GPIO pins not at rest: %d\n",
+				    pins & mask);
+		/* Drop SDA to issue START */
+		udelay(1); /* Guarantee .6 uSec setup */
+		sda_out(dd, 0);
+		udelay(1); /* Guarantee .6 uSec hold */
+		/* At this point, SCL is high, SDA low. Raise SDA for STOP */
+		sda_out(dd, 1);
+		udelay(TWSI_BUF_WAIT_USEC);
+	}
+
+	return !was_high;
+}
+
+#define QIB_TWSI_START 0x100
+#define QIB_TWSI_STOP 0x200
+
+/* Write byte to TWSI, optionally prefixed with START or suffixed with
+ * STOP.
+ * returns 0 if OK (ACK received), else != 0
+ */
+static int qib_twsi_wr(struct qib_devdata *dd, int data, int flags)
+{
+	int ret = 1;
+	if (flags & QIB_TWSI_START)
+		start_seq(dd);
+
+	ret = wr_byte(dd, data); /* Leaves SCL low (from i2c_ackrcv()) */
+
+	if (flags & QIB_TWSI_STOP)
+		stop_cmd(dd);
+	return ret;
+}
+
+/* Added functionality for IBA7220-based cards */
+#define QIB_TEMP_DEV 0x98
+
+/*
+ * qib_twsi_blk_rd
+ * Formerly called qib_eeprom_internal_read, and only used for eeprom,
+ * but now the general interface for data transfer from twsi devices.
+ * One vestige of its former role is that it recognizes a device
+ * QIB_TWSI_NO_DEV and does the correct operation for the legacy part,
+ * which responded to all TWSI device codes, interpreting them as
+ * address within device. On all other devices found on board handled by
+ * this driver, the device is followed by a one-byte "address" which selects
+ * the "register" or "offset" within the device from which data should
+ * be read.
+ */
+int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr,
+		    void *buffer, int len)
+{
+	int ret;
+	u8 *bp = buffer;
+
+	ret = 1;
+
+	if (dev == QIB_TWSI_NO_DEV) {
+		/* legacy not-really-I2C */
+		addr = (addr << 1) | READ_CMD;
+		ret = qib_twsi_wr(dd, addr, QIB_TWSI_START);
+	} else {
+		/* Actual I2C */
+		ret = qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START);
+		if (ret) {
+			stop_cmd(dd);
+			ret = 1;
+			goto bail;
+		}
+		/*
+		 * SFF spec claims we do _not_ stop after the addr
+		 * but simply issue a start with the "read" dev-addr.
+		 * Since we are implicitely waiting for ACK here,
+		 * we need t_buf (nominally 20uSec) before that start,
+		 * and cannot rely on the delay built in to the STOP
+		 */
+		ret = qib_twsi_wr(dd, addr, 0);
+		udelay(TWSI_BUF_WAIT_USEC);
+
+		if (ret) {
+			qib_dev_err(dd,
+				"Failed to write interface read addr %02X\n",
+				addr);
+			ret = 1;
+			goto bail;
+		}
+		ret = qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START);
+	}
+	if (ret) {
+		stop_cmd(dd);
+		ret = 1;
+		goto bail;
+	}
+
+	/*
+	 * block devices keeps clocking data out as long as we ack,
+	 * automatically incrementing the address. Some have "pages"
+	 * whose boundaries will not be crossed, but the handling
+	 * of these is left to the caller, who is in a better
+	 * position to know.
+	 */
+	while (len-- > 0) {
+		/*
+		 * Get and store data, sending ACK if length remaining,
+		 * else STOP
+		 */
+		*bp++ = rd_byte(dd, !len);
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/*
+ * qib_twsi_blk_wr
+ * Formerly called qib_eeprom_internal_write, and only used for eeprom,
+ * but now the general interface for data transfer to twsi devices.
+ * One vestige of its former role is that it recognizes a device
+ * QIB_TWSI_NO_DEV and does the correct operation for the legacy part,
+ * which responded to all TWSI device codes, interpreting them as
+ * address within device. On all other devices found on board handled by
+ * this driver, the device is followed by a one-byte "address" which selects
+ * the "register" or "offset" within the device to which data should
+ * be written.
+ */
+int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr,
+		    const void *buffer, int len)
+{
+	int sub_len;
+	const u8 *bp = buffer;
+	int max_wait_time, i;
+	int ret;
+	ret = 1;
+
+	while (len > 0) {
+		if (dev == QIB_TWSI_NO_DEV) {
+			if (qib_twsi_wr(dd, (addr << 1) | WRITE_CMD,
+					QIB_TWSI_START)) {
+				goto failed_write;
+			}
+		} else {
+			/* Real I2C */
+			if (qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START))
+				goto failed_write;
+			ret = qib_twsi_wr(dd, addr, 0);
+			if (ret) {
+				qib_dev_err(dd, "Failed to write interface"
+					    " write addr %02X\n", addr);
+				goto failed_write;
+			}
+		}
+
+		sub_len = min(len, 4);
+		addr += sub_len;
+		len -= sub_len;
+
+		for (i = 0; i < sub_len; i++)
+			if (qib_twsi_wr(dd, *bp++, 0))
+				goto failed_write;
+
+		stop_cmd(dd);
+
+		/*
+		 * Wait for write complete by waiting for a successful
+		 * read (the chip replies with a zero after the write
+		 * cmd completes, and before it writes to the eeprom.
+		 * The startcmd for the read will fail the ack until
+		 * the writes have completed.   We do this inline to avoid
+		 * the debug prints that are in the real read routine
+		 * if the startcmd fails.
+		 * We also use the proper device address, so it doesn't matter
+		 * whether we have real eeprom_dev. Legacy likes any address.
+		 */
+		max_wait_time = 100;
+		while (qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START)) {
+			stop_cmd(dd);
+			if (!--max_wait_time)
+				goto failed_write;
+		}
+		/* now read (and ignore) the resulting byte */
+		rd_byte(dd, 1);
+	}
+
+	ret = 0;
+	goto bail;
+
+failed_write:
+	stop_cmd(dd);
+	ret = 1;
+
+bail:
+	return ret;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 39/52] IB/qib: Add qib_tx.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_tx.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_tx.c |  557 ++++++++++++++++++++++++++++++++++++
 1 files changed, 557 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_tx.c

diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
new file mode 100644
index 0000000..f7eb1dd
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -0,0 +1,557 @@
+/*
+ * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+
+static unsigned qib_hol_timeout_ms = 3000;
+module_param_named(hol_timeout_ms, qib_hol_timeout_ms, uint, S_IRUGO);
+MODULE_PARM_DESC(hol_timeout_ms,
+		 "duration of user app suspension after link failure");
+
+unsigned qib_sdma_fetch_arb = 1;
+module_param_named(fetch_arb, qib_sdma_fetch_arb, uint, S_IRUGO);
+MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration");
+
+/**
+ * qib_disarm_piobufs - cancel a range of PIO buffers
+ * @dd: the qlogic_ib device
+ * @first: the first PIO buffer to cancel
+ * @cnt: the number of PIO buffers to cancel
+ *
+ * Cancel a range of PIO buffers. Used at user process close,
+ * in case it died while writing to a PIO buffer.
+ */
+void qib_disarm_piobufs(struct qib_devdata *dd, unsigned first, unsigned cnt)
+{
+	unsigned long flags;
+	unsigned i;
+	unsigned last;
+
+	last = first + cnt;
+	spin_lock_irqsave(&dd->pioavail_lock, flags);
+	for (i = first; i < last; i++) {
+		__clear_bit(i, dd->pio_need_disarm);
+		dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i));
+	}
+	spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/*
+ * This is called by a user process when it sees the DISARM_BUFS event
+ * bit is set.
+ */
+int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
+{
+	struct qib_devdata *dd = rcd->dd;
+	unsigned i;
+	unsigned last;
+	unsigned n = 0;
+
+	last = rcd->pio_base + rcd->piocnt;
+	/*
+	 * Don't need uctxt_lock here, since user has called in to us.
+	 * Clear at start in case more interrupts set bits while we
+	 * are disarming
+	 */
+	if (rcd->user_event_mask) {
+		/*
+		 * subctxt_cnt is 0 if not shared, so do base
+		 * separately, first, then remaining subctxt, if any
+		 */
+		clear_bit(_QIB_EVENT_DISARM_BUFS_BIT, &rcd->user_event_mask[0]);
+		for (i = 1; i < rcd->subctxt_cnt; i++)
+			clear_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+				  &rcd->user_event_mask[i]);
+	}
+	spin_lock_irq(&dd->pioavail_lock);
+	for (i = rcd->pio_base; i < last; i++) {
+		if (__test_and_clear_bit(i, dd->pio_need_disarm)) {
+			n++;
+			dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i));
+		}
+	}
+	spin_unlock_irq(&dd->pioavail_lock);
+	return 0;
+}
+
+static struct qib_pportdata *is_sdma_buf(struct qib_devdata *dd, unsigned i)
+{
+	struct qib_pportdata *ppd;
+	unsigned pidx;
+
+	for (pidx = 0; pidx < dd->num_pports; pidx++) {
+		ppd = dd->pport + pidx;
+		if (i >= ppd->sdma_state.first_sendbuf &&
+		    i < ppd->sdma_state.last_sendbuf)
+			return ppd;
+	}
+	return NULL;
+}
+
+/*
+ * Return true if send buffer is being used by a user context.
+ * Sets  _QIB_EVENT_DISARM_BUFS_BIT in user_event_mask as a side effect
+ */
+static int find_ctxt(struct qib_devdata *dd, unsigned bufn)
+{
+	struct qib_ctxtdata *rcd;
+	unsigned ctxt;
+	int ret = 0;
+
+	spin_lock(&dd->uctxt_lock);
+	for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) {
+		rcd = dd->rcd[ctxt];
+		if (!rcd || bufn < rcd->pio_base ||
+		    bufn >= rcd->pio_base + rcd->piocnt)
+			continue;
+		if (rcd->user_event_mask) {
+			int i;
+			/*
+			 * subctxt_cnt is 0 if not shared, so do base
+			 * separately, first, then remaining subctxt, if any
+			 */
+			set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+				&rcd->user_event_mask[0]);
+			for (i = 1; i < rcd->subctxt_cnt; i++)
+				set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+					&rcd->user_event_mask[i]);
+		}
+		ret = 1;
+		break;
+	}
+	spin_unlock(&dd->uctxt_lock);
+
+	return ret;
+}
+
+/*
+ * Disarm a set of send buffers.  If the buffer might be actively being
+ * written to, mark the buffer to be disarmed later when it is not being
+ * written to.
+ *
+ * This should only be called from the IRQ error handler.
+ */
+void qib_disarm_piobufs_set(struct qib_devdata *dd, unsigned long *mask,
+			    unsigned cnt)
+{
+	struct qib_pportdata *ppd, *pppd[dd->num_pports];
+	unsigned i;
+	unsigned long flags;
+
+	for (i = 0; i < dd->num_pports; i++)
+		pppd[i] = NULL;
+
+	for (i = 0; i < cnt; i++) {
+		int which;
+		if (!test_bit(i, mask))
+			continue;
+		/*
+		 * If the buffer is owned by the DMA hardware,
+		 * reset the DMA engine.
+		 */
+		ppd = is_sdma_buf(dd, i);
+		if (ppd) {
+			pppd[ppd->port] = ppd;
+			continue;
+		}
+		/*
+		 * If the kernel is writing the buffer or the buffer is
+		 * owned by a user process, we can't clear it yet.
+		 */
+		spin_lock_irqsave(&dd->pioavail_lock, flags);
+		if (test_bit(i, dd->pio_writing) ||
+		    (!test_bit(i << 1, dd->pioavailkernel) &&
+		     find_ctxt(dd, i))) {
+			__set_bit(i, dd->pio_need_disarm);
+			which = 0;
+		} else {
+			which = 1;
+			dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i));
+		}
+		spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+	}
+
+	/* do cancel_sends once per port that had sdma piobufs in error */
+	for (i = 0; i < dd->num_pports; i++)
+		if (pppd[i])
+			qib_cancel_sends(pppd[i]);
+}
+
+/**
+ * update_send_bufs - update shadow copy of the PIO availability map
+ * @dd: the qlogic_ib device
+ *
+ * called whenever our local copy indicates we have run out of send buffers
+ */
+static void update_send_bufs(struct qib_devdata *dd)
+{
+	unsigned long flags;
+	unsigned i;
+	const unsigned piobregs = dd->pioavregs;
+
+	/*
+	 * If the generation (check) bits have changed, then we update the
+	 * busy bit for the corresponding PIO buffer.  This algorithm will
+	 * modify positions to the value they already have in some cases
+	 * (i.e., no change), but it's faster than changing only the bits
+	 * that have changed.
+	 *
+	 * We would like to do this atomicly, to avoid spinlocks in the
+	 * critical send path, but that's not really possible, given the
+	 * type of changes, and that this routine could be called on
+	 * multiple cpu's simultaneously, so we lock in this routine only,
+	 * to avoid conflicting updates; all we change is the shadow, and
+	 * it's a single 64 bit memory location, so by definition the update
+	 * is atomic in terms of what other cpu's can see in testing the
+	 * bits.  The spin_lock overhead isn't too bad, since it only
+	 * happens when all buffers are in use, so only cpu overhead, not
+	 * latency or bandwidth is affected.
+	 */
+	if (!dd->pioavailregs_dma)
+		return;
+	spin_lock_irqsave(&dd->pioavail_lock, flags);
+	for (i = 0; i < piobregs; i++) {
+		u64 pchbusy, pchg, piov, pnew;
+
+		piov = le64_to_cpu(dd->pioavailregs_dma[i]);
+		pchg = dd->pioavailkernel[i] &
+			~(dd->pioavailshadow[i] ^ piov);
+		pchbusy = pchg << QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT;
+		if (pchg && (pchbusy & dd->pioavailshadow[i])) {
+			pnew = dd->pioavailshadow[i] & ~pchbusy;
+			pnew |= piov & pchbusy;
+			dd->pioavailshadow[i] = pnew;
+		}
+	}
+	spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/*
+ * Debugging code and stats updates if no pio buffers available.
+ */
+static noinline void no_send_bufs(struct qib_devdata *dd)
+{
+	dd->upd_pio_shadow = 1;
+
+	/* not atomic, but if we lose a stat count in a while, that's OK */
+	qib_stats.sps_nopiobufs++;
+}
+
+/*
+ * Common code for normal driver send buffer allocation, and reserved
+ * allocation.
+ *
+ * Do appropriate marking as busy, etc.
+ * Returns buffer pointer if one is found, otherwise NULL.
+ */
+u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum,
+				  u32 first, u32 last)
+{
+	unsigned i, j, updated = 0;
+	unsigned nbufs;
+	unsigned long flags;
+	unsigned long *shadow = dd->pioavailshadow;
+	u32 __iomem *buf;
+
+	if (!(dd->flags & QIB_PRESENT))
+		return NULL;
+
+	nbufs = last - first + 1; /* number in range to check */
+	if (dd->upd_pio_shadow) {
+		/*
+		 * Minor optimization.  If we had no buffers on last call,
+		 * start out by doing the update; continue and do scan even
+		 * if no buffers were updated, to be paranoid.
+		 */
+		update_send_bufs(dd);
+		updated++;
+	}
+	i = first;
+rescan:
+	/*
+	 * While test_and_set_bit() is atomic, we do that and then the
+	 * change_bit(), and the pair is not.  See if this is the cause
+	 * of the remaining armlaunch errors.
+	 */
+	spin_lock_irqsave(&dd->pioavail_lock, flags);
+	for (j = 0; j < nbufs; j++, i++) {
+		if (i > last)
+			i = first;
+		if (__test_and_set_bit((2 * i) + 1, shadow))
+			continue;
+		/* flip generation bit */
+		__change_bit(2 * i, shadow);
+		/* remember that the buffer can be written to now */
+		__set_bit(i, dd->pio_writing);
+		break;
+	}
+	spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+
+	if (j == nbufs) {
+		if (!updated) {
+			/*
+			 * First time through; shadow exhausted, but may be
+			 * buffers available, try an update and then rescan.
+			 */
+			update_send_bufs(dd);
+			updated++;
+			i = first;
+			goto rescan;
+		}
+		no_send_bufs(dd);
+		buf = NULL;
+	} else {
+		if (i < dd->piobcnt2k)
+			buf = (u32 __iomem *)(dd->pio2kbase +
+				i * dd->palign);
+		else
+			buf = (u32 __iomem *)(dd->pio4kbase +
+				(i - dd->piobcnt2k) * dd->align4k);
+		if (pbufnum)
+			*pbufnum = i;
+		dd->upd_pio_shadow = 0;
+	}
+
+	return buf;
+}
+
+/*
+ * Record that the caller is finished writing to the buffer so we don't
+ * disarm it while it is being written and disarm it now if needed.
+ */
+void qib_sendbuf_done(struct qib_devdata *dd, unsigned n)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dd->pioavail_lock, flags);
+	__clear_bit(n, dd->pio_writing);
+	if (__test_and_clear_bit(n, dd->pio_need_disarm))
+		dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(n));
+	spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/**
+ * qib_chg_pioavailkernel - change which send buffers are available for kernel
+ * @dd: the qlogic_ib device
+ * @start: the starting send buffer number
+ * @len: the number of send buffers
+ * @avail: true if the buffers are available for kernel use, false otherwise
+ */
+void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start,
+	unsigned len, u32 avail, struct qib_ctxtdata *rcd)
+{
+	unsigned long flags;
+	unsigned end;
+	unsigned ostart = start;
+
+	/* There are two bits per send buffer (busy and generation) */
+	start *= 2;
+	end = start + len * 2;
+
+	spin_lock_irqsave(&dd->pioavail_lock, flags);
+	/* Set or clear the busy bit in the shadow. */
+	while (start < end) {
+		if (avail) {
+			unsigned long dma;
+			int i;
+
+			/*
+			 * The BUSY bit will never be set, because we disarm
+			 * the user buffers before we hand them back to the
+			 * kernel.  We do have to make sure the generation
+			 * bit is set correctly in shadow, since it could
+			 * have changed many times while allocated to user.
+			 * We can't use the bitmap functions on the full
+			 * dma array because it is always little-endian, so
+			 * we have to flip to host-order first.
+			 * BITS_PER_LONG is slightly wrong, since it's
+			 * always 64 bits per register in chip...
+			 * We only work on 64 bit kernels, so that's OK.
+			 */
+			i = start / BITS_PER_LONG;
+			__clear_bit(QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT + start,
+				    dd->pioavailshadow);
+			dma = (unsigned long)
+				le64_to_cpu(dd->pioavailregs_dma[i]);
+			if (test_bit((QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT +
+				      start) % BITS_PER_LONG, &dma))
+				__set_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT +
+					  start, dd->pioavailshadow);
+			else
+				__clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT
+					    + start, dd->pioavailshadow);
+			__set_bit(start, dd->pioavailkernel);
+		} else {
+			__set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT,
+				  dd->pioavailshadow);
+			__clear_bit(start, dd->pioavailkernel);
+		}
+		start += 2;
+	}
+
+	spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+
+	dd->f_txchk_change(dd, ostart, len, avail, rcd);
+}
+
+/*
+ * Flush all sends that might be in the ready to send state, as well as any
+ * that are in the process of being sent.  Used whenever we need to be
+ * sure the send side is idle.  Cleans up all buffer state by canceling
+ * all pio buffers, and issuing an abort, which cleans up anything in the
+ * launch fifo.  The cancel is superfluous on some chip versions, but
+ * it's safer to always do it.
+ * PIOAvail bits are updated by the chip as if a normal send had happened.
+ */
+void qib_cancel_sends(struct qib_pportdata *ppd)
+{
+	struct qib_devdata *dd = ppd->dd;
+	struct qib_ctxtdata *rcd;
+	unsigned long flags;
+	unsigned ctxt;
+	unsigned i;
+	unsigned last;
+
+	/*
+	 * Tell PSM to disarm buffers again before trying to reuse them.
+	 * We need to be sure the rcd doesn't change out from under us
+	 * while we do so.  We hold the two locks sequentially.  We might
+	 * needlessly set some need_disarm bits as a result, if the
+	 * context is closed after we release the uctxt_lock, but that's
+	 * fairly benign, and safer than nesting the locks.
+	 */
+	for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) {
+		spin_lock_irqsave(&dd->uctxt_lock, flags);
+		rcd = dd->rcd[ctxt];
+		if (rcd && rcd->ppd == ppd) {
+			last = rcd->pio_base + rcd->piocnt;
+			if (rcd->user_event_mask) {
+				/*
+				 * subctxt_cnt is 0 if not shared, so do base
+				 * separately, first, then remaining subctxt,
+				 * if any
+				 */
+				set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+					&rcd->user_event_mask[0]);
+				for (i = 1; i < rcd->subctxt_cnt; i++)
+					set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+						&rcd->user_event_mask[i]);
+			}
+			i = rcd->pio_base;
+			spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+			spin_lock_irqsave(&dd->pioavail_lock, flags);
+			for (; i < last; i++)
+				__set_bit(i, dd->pio_need_disarm);
+			spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+		} else
+			spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+	}
+
+	if (!(dd->flags & QIB_HAS_SEND_DMA))
+		dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_ALL |
+				    QIB_SENDCTRL_FLUSH);
+}
+
+/*
+ * Force an update of in-memory copy of the pioavail registers, when
+ * needed for any of a variety of reasons.
+ * If already off, this routine is a nop, on the assumption that the
+ * caller (or set of callers) will "do the right thing".
+ * This is a per-device operation, so just the first port.
+ */
+void qib_force_pio_avail_update(struct qib_devdata *dd)
+{
+	dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+}
+
+void qib_hol_down(struct qib_pportdata *ppd)
+{
+	/*
+	 * Cancel sends when the link goes DOWN so that we aren't doing it
+	 * at INIT when we might be trying to send SMI packets.
+	 */
+	if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG))
+		qib_cancel_sends(ppd);
+}
+
+/*
+ * Link is at INIT.
+ * We start the HoL timer so we can detect stuck packets blocking SMP replies.
+ * Timer may already be running, so use mod_timer, not add_timer.
+ */
+void qib_hol_init(struct qib_pportdata *ppd)
+{
+	if (ppd->hol_state != QIB_HOL_INIT) {
+		ppd->hol_state = QIB_HOL_INIT;
+		mod_timer(&ppd->hol_timer,
+			  jiffies + msecs_to_jiffies(qib_hol_timeout_ms));
+	}
+}
+
+/*
+ * Link is up, continue any user processes, and ensure timer
+ * is a nop, if running.  Let timer keep running, if set; it
+ * will nop when it sees the link is up.
+ */
+void qib_hol_up(struct qib_pportdata *ppd)
+{
+	ppd->hol_state = QIB_HOL_UP;
+}
+
+/*
+ * This is only called via the timer.
+ */
+void qib_hol_event(unsigned long opaque)
+{
+	struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
+
+	/* If hardware error, etc, skip. */
+	if (!(ppd->dd->flags & QIB_INITTED))
+		return;
+
+	if (ppd->hol_state != QIB_HOL_UP) {
+		/*
+		 * Try to flush sends in case a stuck packet is blocking
+		 * SMP replies.
+		 */
+		qib_hol_down(ppd);
+		mod_timer(&ppd->hol_timer,
+			  jiffies + msecs_to_jiffies(qib_hol_timeout_ms));
+	}
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 40/52] IB/qib: Add qib_uc.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_uc.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_uc.c |  555 ++++++++++++++++++++++++++++++++++++
 1 files changed, 555 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_uc.c

diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
new file mode 100644
index 0000000..6c7fe78
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "qib.h"
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_UC_##x
+
+/**
+ * qib_make_uc_req - construct a request packet (SEND, RDMA write)
+ * @qp: a pointer to the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_uc_req(struct qib_qp *qp)
+{
+	struct qib_other_headers *ohdr;
+	struct qib_swqe *wqe;
+	unsigned long flags;
+	u32 hwords;
+	u32 bth0;
+	u32 len;
+	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+	int ret = 0;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
+		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+			goto bail;
+		/* We are in the error state, flush the work request. */
+		if (qp->s_last == qp->s_head)
+			goto bail;
+		/* If DMAs are in progress, we can't flush immediately. */
+		if (atomic_read(&qp->s_dma_busy)) {
+			qp->s_flags |= QIB_S_WAIT_DMA;
+			goto bail;
+		}
+		wqe = get_swqe_ptr(qp, qp->s_last);
+		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		goto done;
+	}
+
+	ohdr = &qp->s_hdr.u.oth;
+	if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+		ohdr = &qp->s_hdr.u.l.oth;
+
+	/* header size in 32-bit words LRH+BTH = (8+12)/4. */
+	hwords = 5;
+	bth0 = 0;
+
+	/* Get the next send request. */
+	wqe = get_swqe_ptr(qp, qp->s_cur);
+	qp->s_wqe = NULL;
+	switch (qp->s_state) {
+	default:
+		if (!(ib_qib_state_ops[qp->state] &
+		    QIB_PROCESS_NEXT_SEND_OK))
+			goto bail;
+		/* Check if send work queue is empty. */
+		if (qp->s_cur == qp->s_head)
+			goto bail;
+		/*
+		 * Start a new request.
+		 */
+		wqe->psn = qp->s_next_psn;
+		qp->s_psn = qp->s_next_psn;
+		qp->s_sge.sge = wqe->sg_list[0];
+		qp->s_sge.sg_list = wqe->sg_list + 1;
+		qp->s_sge.num_sge = wqe->wr.num_sge;
+		qp->s_sge.total_len = wqe->length;
+		len = wqe->length;
+		qp->s_len = len;
+		switch (wqe->wr.opcode) {
+		case IB_WR_SEND:
+		case IB_WR_SEND_WITH_IMM:
+			if (len > pmtu) {
+				qp->s_state = OP(SEND_FIRST);
+				len = pmtu;
+				break;
+			}
+			if (wqe->wr.opcode == IB_WR_SEND)
+				qp->s_state = OP(SEND_ONLY);
+			else {
+				qp->s_state =
+					OP(SEND_ONLY_WITH_IMMEDIATE);
+				/* Immediate data comes after the BTH */
+				ohdr->u.imm_data = wqe->wr.ex.imm_data;
+				hwords += 1;
+			}
+			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+				bth0 |= IB_BTH_SOLICITED;
+			qp->s_wqe = wqe;
+			if (++qp->s_cur >= qp->s_size)
+				qp->s_cur = 0;
+			break;
+
+		case IB_WR_RDMA_WRITE:
+		case IB_WR_RDMA_WRITE_WITH_IMM:
+			ohdr->u.rc.reth.vaddr =
+				cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+			ohdr->u.rc.reth.rkey =
+				cpu_to_be32(wqe->wr.wr.rdma.rkey);
+			ohdr->u.rc.reth.length = cpu_to_be32(len);
+			hwords += sizeof(struct ib_reth) / 4;
+			if (len > pmtu) {
+				qp->s_state = OP(RDMA_WRITE_FIRST);
+				len = pmtu;
+				break;
+			}
+			if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+				qp->s_state = OP(RDMA_WRITE_ONLY);
+			else {
+				qp->s_state =
+					OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+				/* Immediate data comes after the RETH */
+				ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+				hwords += 1;
+				if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+					bth0 |= IB_BTH_SOLICITED;
+			}
+			qp->s_wqe = wqe;
+			if (++qp->s_cur >= qp->s_size)
+				qp->s_cur = 0;
+			break;
+
+		default:
+			goto bail;
+		}
+		break;
+
+	case OP(SEND_FIRST):
+		qp->s_state = OP(SEND_MIDDLE);
+		/* FALLTHROUGH */
+	case OP(SEND_MIDDLE):
+		len = qp->s_len;
+		if (len > pmtu) {
+			len = pmtu;
+			break;
+		}
+		if (wqe->wr.opcode == IB_WR_SEND)
+			qp->s_state = OP(SEND_LAST);
+		else {
+			qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
+			/* Immediate data comes after the BTH */
+			ohdr->u.imm_data = wqe->wr.ex.imm_data;
+			hwords += 1;
+		}
+		if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+			bth0 |= IB_BTH_SOLICITED;
+		qp->s_wqe = wqe;
+		if (++qp->s_cur >= qp->s_size)
+			qp->s_cur = 0;
+		break;
+
+	case OP(RDMA_WRITE_FIRST):
+		qp->s_state = OP(RDMA_WRITE_MIDDLE);
+		/* FALLTHROUGH */
+	case OP(RDMA_WRITE_MIDDLE):
+		len = qp->s_len;
+		if (len > pmtu) {
+			len = pmtu;
+			break;
+		}
+		if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+			qp->s_state = OP(RDMA_WRITE_LAST);
+		else {
+			qp->s_state =
+				OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
+			/* Immediate data comes after the BTH */
+			ohdr->u.imm_data = wqe->wr.ex.imm_data;
+			hwords += 1;
+			if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+				bth0 |= IB_BTH_SOLICITED;
+		}
+		qp->s_wqe = wqe;
+		if (++qp->s_cur >= qp->s_size)
+			qp->s_cur = 0;
+		break;
+	}
+	qp->s_len -= len;
+	qp->s_hdrwords = hwords;
+	qp->s_cur_sge = &qp->s_sge;
+	qp->s_cur_size = len;
+	qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
+			    qp->s_next_psn++ & QIB_PSN_MASK);
+done:
+	ret = 1;
+	goto unlock;
+
+bail:
+	qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	return ret;
+}
+
+/**
+ * qib_uc_rcv - handle an incoming UC packet
+ * @ibp: the port the packet came in on
+ * @hdr: the header of the packet
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the length of the packet
+ * @qp: the QP for this packet.
+ *
+ * This is called from qib_qp_rcv() to process an incoming UC packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+	struct qib_other_headers *ohdr;
+	unsigned long flags;
+	u32 opcode;
+	u32 hdrsize;
+	u32 psn;
+	u32 pad;
+	struct ib_wc wc;
+	u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+	struct ib_reth *reth;
+	int ret;
+
+	/* Check for GRH */
+	if (!has_grh) {
+		ohdr = &hdr->u.oth;
+		hdrsize = 8 + 12;       /* LRH + BTH */
+	} else {
+		ohdr = &hdr->u.l.oth;
+		hdrsize = 8 + 40 + 12;  /* LRH + GRH + BTH */
+	}
+
+	opcode = be32_to_cpu(ohdr->bth[0]);
+	spin_lock_irqsave(&qp->s_lock, flags);
+	if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
+		goto sunlock;
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
+	psn = be32_to_cpu(ohdr->bth[2]);
+	opcode >>= 24;
+	memset(&wc, 0, sizeof wc);
+
+	/* Prevent simultaneous processing after APM on different CPUs */
+	spin_lock(&qp->r_lock);
+
+	/* Compare the PSN verses the expected PSN. */
+	if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) {
+		/*
+		 * Handle a sequence error.
+		 * Silently drop any current message.
+		 */
+		qp->r_psn = psn;
+inv:
+		if (qp->r_state == OP(SEND_FIRST) ||
+		    qp->r_state == OP(SEND_MIDDLE)) {
+			set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+			qp->r_sge.num_sge = 0;
+		} else
+			while (qp->r_sge.num_sge) {
+				atomic_dec(&qp->r_sge.sge.mr->refcount);
+				if (--qp->r_sge.num_sge)
+					qp->r_sge.sge = *qp->r_sge.sg_list++;
+			}
+		qp->r_state = OP(SEND_LAST);
+		switch (opcode) {
+		case OP(SEND_FIRST):
+		case OP(SEND_ONLY):
+		case OP(SEND_ONLY_WITH_IMMEDIATE):
+			goto send_first;
+
+		case OP(RDMA_WRITE_FIRST):
+		case OP(RDMA_WRITE_ONLY):
+		case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+			goto rdma_first;
+
+		default:
+			goto drop;
+		}
+	}
+
+	/* Check for opcode sequence errors. */
+	switch (qp->r_state) {
+	case OP(SEND_FIRST):
+	case OP(SEND_MIDDLE):
+		if (opcode == OP(SEND_MIDDLE) ||
+		    opcode == OP(SEND_LAST) ||
+		    opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+			break;
+		goto inv;
+
+	case OP(RDMA_WRITE_FIRST):
+	case OP(RDMA_WRITE_MIDDLE):
+		if (opcode == OP(RDMA_WRITE_MIDDLE) ||
+		    opcode == OP(RDMA_WRITE_LAST) ||
+		    opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+			break;
+		goto inv;
+
+	default:
+		if (opcode == OP(SEND_FIRST) ||
+		    opcode == OP(SEND_ONLY) ||
+		    opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
+		    opcode == OP(RDMA_WRITE_FIRST) ||
+		    opcode == OP(RDMA_WRITE_ONLY) ||
+		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+			break;
+		goto inv;
+	}
+
+	if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
+		qp->r_flags |= QIB_R_COMM_EST;
+		if (qp->ibqp.event_handler) {
+			struct ib_event ev;
+
+			ev.device = qp->ibqp.device;
+			ev.element.qp = &qp->ibqp;
+			ev.event = IB_EVENT_COMM_EST;
+			qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+		}
+	}
+
+	/* OK, process the packet. */
+	switch (opcode) {
+	case OP(SEND_FIRST):
+	case OP(SEND_ONLY):
+	case OP(SEND_ONLY_WITH_IMMEDIATE):
+send_first:
+		if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+			qp->r_sge = qp->s_rdma_read_sge;
+		else {
+			ret = qib_get_rwqe(qp, 0);
+			if (ret < 0)
+				goto op_err;
+			if (!ret)
+				goto drop;
+			/*
+			 * qp->s_rdma_read_sge will be the owner
+			 * of the mr references.
+			 */
+			qp->s_rdma_read_sge = qp->r_sge;
+		}
+		qp->r_rcv_len = 0;
+		if (opcode == OP(SEND_ONLY))
+			goto send_last;
+		else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
+			goto send_last_imm;
+		/* FALLTHROUGH */
+	case OP(SEND_MIDDLE):
+		/* Check for invalid length PMTU or posted rwqe len. */
+		if (unlikely(tlen != (hdrsize + pmtu + 4)))
+			goto rewind;
+		qp->r_rcv_len += pmtu;
+		if (unlikely(qp->r_rcv_len > qp->r_len))
+			goto rewind;
+		qib_copy_sge(&qp->r_sge, data, pmtu, 0);
+		break;
+
+	case OP(SEND_LAST_WITH_IMMEDIATE):
+send_last_imm:
+		wc.ex.imm_data = ohdr->u.imm_data;
+		hdrsize += 4;
+		wc.wc_flags = IB_WC_WITH_IMM;
+		/* FALLTHROUGH */
+	case OP(SEND_LAST):
+send_last:
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+		/* Check for invalid length. */
+		/* XXX LAST len should be >= 1 */
+		if (unlikely(tlen < (hdrsize + pad + 4)))
+			goto rewind;
+		/* Don't count the CRC. */
+		tlen -= (hdrsize + pad + 4);
+		wc.byte_len = tlen + qp->r_rcv_len;
+		if (unlikely(wc.byte_len > qp->r_len))
+			goto rewind;
+		wc.opcode = IB_WC_RECV;
+last_imm:
+		qib_copy_sge(&qp->r_sge, data, tlen, 0);
+		while (qp->s_rdma_read_sge.num_sge) {
+			atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
+			if (--qp->s_rdma_read_sge.num_sge)
+				qp->s_rdma_read_sge.sge =
+					*qp->s_rdma_read_sge.sg_list++;
+		}
+		wc.wr_id = qp->r_wr_id;
+		wc.status = IB_WC_SUCCESS;
+		wc.qp = &qp->ibqp;
+		wc.src_qp = qp->remote_qpn;
+		wc.slid = qp->remote_ah_attr.dlid;
+		wc.sl = qp->remote_ah_attr.sl;
+		/* Signal completion event if the solicited bit is set. */
+		qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+			     (ohdr->bth[0] &
+				cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+		break;
+
+	case OP(RDMA_WRITE_FIRST):
+	case OP(RDMA_WRITE_ONLY):
+	case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
+rdma_first:
+		if (unlikely(!(qp->qp_access_flags &
+			       IB_ACCESS_REMOTE_WRITE))) {
+			goto drop;
+		}
+		reth = &ohdr->u.rc.reth;
+		hdrsize += sizeof(*reth);
+		qp->r_len = be32_to_cpu(reth->length);
+		qp->r_rcv_len = 0;
+		qp->r_sge.sg_list = NULL;
+		if (qp->r_len != 0) {
+			u32 rkey = be32_to_cpu(reth->rkey);
+			u64 vaddr = be64_to_cpu(reth->vaddr);
+			int ok;
+
+			/* Check rkey */
+			ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+					 vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
+			if (unlikely(!ok))
+				goto drop;
+			qp->r_sge.num_sge = 1;
+		} else {
+			qp->r_sge.num_sge = 0;
+			qp->r_sge.sge.mr = NULL;
+			qp->r_sge.sge.vaddr = NULL;
+			qp->r_sge.sge.length = 0;
+			qp->r_sge.sge.sge_length = 0;
+		}
+		if (opcode == OP(RDMA_WRITE_ONLY))
+			goto rdma_last;
+		else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+			goto rdma_last_imm;
+		/* FALLTHROUGH */
+	case OP(RDMA_WRITE_MIDDLE):
+		/* Check for invalid length PMTU or posted rwqe len. */
+		if (unlikely(tlen != (hdrsize + pmtu + 4)))
+			goto drop;
+		qp->r_rcv_len += pmtu;
+		if (unlikely(qp->r_rcv_len > qp->r_len))
+			goto drop;
+		qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+		break;
+
+	case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
+rdma_last_imm:
+		wc.ex.imm_data = ohdr->u.imm_data;
+		hdrsize += 4;
+		wc.wc_flags = IB_WC_WITH_IMM;
+
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+		/* Check for invalid length. */
+		/* XXX LAST len should be >= 1 */
+		if (unlikely(tlen < (hdrsize + pad + 4)))
+			goto drop;
+		/* Don't count the CRC. */
+		tlen -= (hdrsize + pad + 4);
+		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
+			goto drop;
+		if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+			while (qp->s_rdma_read_sge.num_sge) {
+				atomic_dec(&qp->s_rdma_read_sge.sge.mr->
+					   refcount);
+				if (--qp->s_rdma_read_sge.num_sge)
+					qp->s_rdma_read_sge.sge =
+						*qp->s_rdma_read_sge.sg_list++;
+			}
+		else {
+			ret = qib_get_rwqe(qp, 1);
+			if (ret < 0)
+				goto op_err;
+			if (!ret)
+				goto drop;
+		}
+		wc.byte_len = qp->r_len;
+		wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+		goto last_imm;
+
+	case OP(RDMA_WRITE_LAST):
+rdma_last:
+		/* Get the number of bytes the message was padded by. */
+		pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+		/* Check for invalid length. */
+		/* XXX LAST len should be >= 1 */
+		if (unlikely(tlen < (hdrsize + pad + 4)))
+			goto drop;
+		/* Don't count the CRC. */
+		tlen -= (hdrsize + pad + 4);
+		if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
+			goto drop;
+		qib_copy_sge(&qp->r_sge, data, tlen, 1);
+		while (qp->r_sge.num_sge) {
+			atomic_dec(&qp->r_sge.sge.mr->refcount);
+			if (--qp->r_sge.num_sge)
+				qp->r_sge.sge = *qp->r_sge.sg_list++;
+		}
+		break;
+
+	default:
+		/* Drop packet for unknown opcodes. */
+		goto drop;
+	}
+	qp->r_psn++;
+	qp->r_state = opcode;
+	spin_unlock(&qp->r_lock);
+	return;
+
+rewind:
+	set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+	qp->r_sge.num_sge = 0;
+drop:
+	ibp->n_pkt_drops++;
+	spin_unlock(&qp->r_lock);
+	return;
+
+op_err:
+	qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+	spin_unlock(&qp->r_lock);
+	return;
+
+sunlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 41/52] IB/qib: Add qib_ud.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_ud.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_ud.c |  607 ++++++++++++++++++++++++++++++++++++
 1 files changed, 607 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_ud.c

diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
new file mode 100644
index 0000000..c838cda
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_smi.h>
+
+#include "qib.h"
+#include "qib_mad.h"
+
+/**
+ * qib_ud_loopback - handle send on loopback QPs
+ * @sqp: the sending QP
+ * @swqe: the send work request
+ *
+ * This is called from qib_make_ud_req() to forward a WQE addressed
+ * to the same HCA.
+ * Note that the receive interrupt handler may be calling qib_ud_rcv()
+ * while this is being called.
+ */
+static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
+{
+	struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
+	struct qib_pportdata *ppd;
+	struct qib_qp *qp;
+	struct ib_ah_attr *ah_attr;
+	unsigned long flags;
+	struct qib_sge_state ssge;
+	struct qib_sge *sge;
+	struct ib_wc wc;
+	u32 length;
+
+	qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+	if (!qp) {
+		ibp->n_pkt_drops++;
+		return;
+	}
+	if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
+	    !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+		ibp->n_pkt_drops++;
+		goto drop;
+	}
+
+	ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+	ppd = ppd_from_ibp(ibp);
+
+	if (qp->ibqp.qp_num > 1) {
+		u16 pkey1;
+		u16 pkey2;
+		u16 lid;
+
+		pkey1 = qib_get_pkey(ibp, sqp->s_pkey_index);
+		pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
+		if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
+			lid = ppd->lid | (ah_attr->src_path_bits &
+					  ((1 << ppd->lmc) - 1));
+			qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1,
+				      ah_attr->sl,
+				      sqp->ibqp.qp_num, qp->ibqp.qp_num,
+				      cpu_to_be16(lid),
+				      cpu_to_be16(ah_attr->dlid));
+			goto drop;
+		}
+	}
+
+	/*
+	 * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
+	 * Qkeys with the high order bit set mean use the
+	 * qkey from the QP context instead of the WR (see 10.2.5).
+	 */
+	if (qp->ibqp.qp_num) {
+		u32 qkey;
+
+		qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
+			sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+		if (unlikely(qkey != qp->qkey)) {
+			u16 lid;
+
+			lid = ppd->lid | (ah_attr->src_path_bits &
+					  ((1 << ppd->lmc) - 1));
+			qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
+				      ah_attr->sl,
+				      sqp->ibqp.qp_num, qp->ibqp.qp_num,
+				      cpu_to_be16(lid),
+				      cpu_to_be16(ah_attr->dlid));
+			goto drop;
+		}
+	}
+
+	/*
+	 * A GRH is expected to preceed the data even if not
+	 * present on the wire.
+	 */
+	length = swqe->length;
+	memset(&wc, 0, sizeof wc);
+	wc.byte_len = length + sizeof(struct ib_grh);
+
+	if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+		wc.wc_flags = IB_WC_WITH_IMM;
+		wc.ex.imm_data = swqe->wr.ex.imm_data;
+	}
+
+	spin_lock_irqsave(&qp->r_lock, flags);
+
+	/*
+	 * Get the next work request entry to find where to put the data.
+	 */
+	if (qp->r_flags & QIB_R_REUSE_SGE)
+		qp->r_flags &= ~QIB_R_REUSE_SGE;
+	else {
+		int ret;
+
+		ret = qib_get_rwqe(qp, 0);
+		if (ret < 0) {
+			qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+			goto bail_unlock;
+		}
+		if (!ret) {
+			if (qp->ibqp.qp_num == 0)
+				ibp->n_vl15_dropped++;
+			goto bail_unlock;
+		}
+	}
+	/* Silently drop packets which are too big. */
+	if (unlikely(wc.byte_len > qp->r_len)) {
+		qp->r_flags |= QIB_R_REUSE_SGE;
+		ibp->n_pkt_drops++;
+		goto bail_unlock;
+	}
+
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		qib_copy_sge(&qp->r_sge, &ah_attr->grh,
+			     sizeof(struct ib_grh), 1);
+		wc.wc_flags |= IB_WC_GRH;
+	} else
+		qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+	ssge.sg_list = swqe->sg_list + 1;
+	ssge.sge = *swqe->sg_list;
+	ssge.num_sge = swqe->wr.num_sge;
+	sge = &ssge.sge;
+	while (length) {
+		u32 len = sge->length;
+
+		if (len > length)
+			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (--ssge.num_sge)
+				*sge = *ssge.sg_list++;
+		} else if (sge->length == 0 && sge->mr->lkey) {
+			if (++sge->n >= QIB_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		length -= len;
+	}
+	while (qp->r_sge.num_sge) {
+		atomic_dec(&qp->r_sge.sge.mr->refcount);
+		if (--qp->r_sge.num_sge)
+			qp->r_sge.sge = *qp->r_sge.sg_list++;
+	}
+	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+		goto bail_unlock;
+	wc.wr_id = qp->r_wr_id;
+	wc.status = IB_WC_SUCCESS;
+	wc.opcode = IB_WC_RECV;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = sqp->ibqp.qp_num;
+	wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
+		swqe->wr.wr.ud.pkey_index : 0;
+	wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
+	wc.sl = ah_attr->sl;
+	wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+	wc.port_num = qp->port_num;
+	/* Signal completion event if the solicited bit is set. */
+	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		     swqe->wr.send_flags & IB_SEND_SOLICITED);
+	ibp->n_loop_pkts++;
+bail_unlock:
+	spin_unlock_irqrestore(&qp->r_lock, flags);
+drop:
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+}
+
+/**
+ * qib_make_ud_req - construct a UD request packet
+ * @qp: the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_ud_req(struct qib_qp *qp)
+{
+	struct qib_other_headers *ohdr;
+	struct ib_ah_attr *ah_attr;
+	struct qib_pportdata *ppd;
+	struct qib_ibport *ibp;
+	struct qib_swqe *wqe;
+	unsigned long flags;
+	u32 nwords;
+	u32 extra_bytes;
+	u32 bth0;
+	u16 lrh0;
+	u16 lid;
+	int ret = 0;
+	int next_cur;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
+		if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+			goto bail;
+		/* We are in the error state, flush the work request. */
+		if (qp->s_last == qp->s_head)
+			goto bail;
+		/* If DMAs are in progress, we can't flush immediately. */
+		if (atomic_read(&qp->s_dma_busy)) {
+			qp->s_flags |= QIB_S_WAIT_DMA;
+			goto bail;
+		}
+		wqe = get_swqe_ptr(qp, qp->s_last);
+		qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+		goto done;
+	}
+
+	if (qp->s_cur == qp->s_head)
+		goto bail;
+
+	wqe = get_swqe_ptr(qp, qp->s_cur);
+	next_cur = qp->s_cur + 1;
+	if (next_cur >= qp->s_size)
+		next_cur = 0;
+
+	/* Construct the header. */
+	ibp = to_iport(qp->ibqp.device, qp->port_num);
+	ppd = ppd_from_ibp(ibp);
+	ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
+		if (ah_attr->dlid != QIB_PERMISSIVE_LID)
+			ibp->n_multicast_xmit++;
+		else
+			ibp->n_unicast_xmit++;
+	} else {
+		ibp->n_unicast_xmit++;
+		lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+		if (unlikely(lid == ppd->lid)) {
+			/*
+			 * If DMAs are in progress, we can't generate
+			 * a completion for the loopback packet since
+			 * it would be out of order.
+			 * XXX Instead of waiting, we could queue a
+			 * zero length descriptor so we get a callback.
+			 */
+			if (atomic_read(&qp->s_dma_busy)) {
+				qp->s_flags |= QIB_S_WAIT_DMA;
+				goto bail;
+			}
+			qp->s_cur = next_cur;
+			spin_unlock_irqrestore(&qp->s_lock, flags);
+			qib_ud_loopback(qp, wqe);
+			spin_lock_irqsave(&qp->s_lock, flags);
+			qib_send_complete(qp, wqe, IB_WC_SUCCESS);
+			goto done;
+		}
+	}
+
+	qp->s_cur = next_cur;
+	extra_bytes = -wqe->length & 3;
+	nwords = (wqe->length + extra_bytes) >> 2;
+
+	/* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
+	qp->s_hdrwords = 7;
+	qp->s_cur_size = wqe->length;
+	qp->s_cur_sge = &qp->s_sge;
+	qp->s_srate = ah_attr->static_rate;
+	qp->s_wqe = wqe;
+	qp->s_sge.sge = wqe->sg_list[0];
+	qp->s_sge.sg_list = wqe->sg_list + 1;
+	qp->s_sge.num_sge = wqe->wr.num_sge;
+	qp->s_sge.total_len = wqe->length;
+
+	if (ah_attr->ah_flags & IB_AH_GRH) {
+		/* Header size in 32-bit words. */
+		qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
+					       &ah_attr->grh,
+					       qp->s_hdrwords, nwords);
+		lrh0 = QIB_LRH_GRH;
+		ohdr = &qp->s_hdr.u.l.oth;
+		/*
+		 * Don't worry about sending to locally attached multicast
+		 * QPs.  It is unspecified by the spec. what happens.
+		 */
+	} else {
+		/* Header size in 32-bit words. */
+		lrh0 = QIB_LRH_BTH;
+		ohdr = &qp->s_hdr.u.oth;
+	}
+	if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+		qp->s_hdrwords++;
+		ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
+		bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
+	} else
+		bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+	lrh0 |= ah_attr->sl << 4;
+	if (qp->ibqp.qp_type == IB_QPT_SMI)
+		lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
+	else
+		lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
+	qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+	qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid);  /* DEST LID */
+	qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+	lid = ppd->lid;
+	if (lid) {
+		lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+		qp->s_hdr.lrh[3] = cpu_to_be16(lid);
+	} else
+		qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
+	if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+		bth0 |= IB_BTH_SOLICITED;
+	bth0 |= extra_bytes << 20;
+	bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
+		qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
+			     wqe->wr.wr.ud.pkey_index : qp->s_pkey_index);
+	ohdr->bth[0] = cpu_to_be32(bth0);
+	/*
+	 * Use the multicast QP if the destination LID is a multicast LID.
+	 */
+	ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
+		ah_attr->dlid != QIB_PERMISSIVE_LID ?
+		cpu_to_be32(QIB_MULTICAST_QPN) :
+		cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+	ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
+	/*
+	 * Qkeys with the high order bit set mean use the
+	 * qkey from the QP context instead of the WR (see 10.2.5).
+	 */
+	ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
+					 qp->qkey : wqe->wr.wr.ud.remote_qkey);
+	ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
+
+done:
+	ret = 1;
+	goto unlock;
+
+bail:
+	qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	return ret;
+}
+
+static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
+{
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_devdata *dd = ppd->dd;
+	unsigned ctxt = ppd->hw_pidx;
+	unsigned i;
+
+	pkey &= 0x7fff;	/* remove limited/full membership bit */
+
+	for (i = 0; i < ARRAY_SIZE(dd->rcd[ctxt]->pkeys); ++i)
+		if ((dd->rcd[ctxt]->pkeys[i] & 0x7fff) == pkey)
+			return i;
+
+	/*
+	 * Should not get here, this means hardware failed to validate pkeys.
+	 * Punt and return index 0.
+	 */
+	return 0;
+}
+
+/**
+ * qib_ud_rcv - receive an incoming UD packet
+ * @ibp: the port the packet came in on
+ * @hdr: the packet header
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP the packet came on
+ *
+ * This is called from qib_qp_rcv() to process an incoming UD packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+		int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+	struct qib_other_headers *ohdr;
+	int opcode;
+	u32 hdrsize;
+	u32 pad;
+	struct ib_wc wc;
+	u32 qkey;
+	u32 src_qp;
+	u16 dlid;
+
+	/* Check for GRH */
+	if (!has_grh) {
+		ohdr = &hdr->u.oth;
+		hdrsize = 8 + 12 + 8;   /* LRH + BTH + DETH */
+	} else {
+		ohdr = &hdr->u.l.oth;
+		hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
+	}
+	qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
+	src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
+
+	/* Get the number of bytes the message was padded by. */
+	pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+	if (unlikely(tlen < (hdrsize + pad + 4))) {
+		/* Drop incomplete packets. */
+		ibp->n_pkt_drops++;
+		goto bail;
+	}
+	tlen -= hdrsize + pad + 4;
+
+	/*
+	 * Check that the permissive LID is only used on QP0
+	 * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
+	 */
+	if (qp->ibqp.qp_num) {
+		if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
+			     hdr->lrh[3] == IB_LID_PERMISSIVE)) {
+			ibp->n_pkt_drops++;
+			goto bail;
+		}
+		if (qp->ibqp.qp_num > 1) {
+			u16 pkey1, pkey2;
+
+			pkey1 = be32_to_cpu(ohdr->bth[0]);
+			pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
+			if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
+				qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+					      pkey1,
+					      (be16_to_cpu(hdr->lrh[0]) >> 4) &
+						0xF,
+					      src_qp, qp->ibqp.qp_num,
+					      hdr->lrh[3], hdr->lrh[1]);
+				goto bail;
+			}
+		}
+		if (unlikely(qkey != qp->qkey)) {
+			qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
+				      (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+				      src_qp, qp->ibqp.qp_num,
+				      hdr->lrh[3], hdr->lrh[1]);
+			goto bail;
+		}
+		/* Drop invalid MAD packets (see 13.5.3.1). */
+		if (unlikely(qp->ibqp.qp_num == 1 &&
+			     (tlen != 256 ||
+			      (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) {
+			ibp->n_pkt_drops++;
+			goto bail;
+		}
+	} else {
+		struct ib_smp *smp;
+
+		/* Drop invalid MAD packets (see 13.5.3.1). */
+		if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) {
+			ibp->n_pkt_drops++;
+			goto bail;
+		}
+		smp = (struct ib_smp *) data;
+		if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
+		     hdr->lrh[3] == IB_LID_PERMISSIVE) &&
+		    smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+			ibp->n_pkt_drops++;
+			goto bail;
+		}
+	}
+
+	/*
+	 * The opcode is in the low byte when its in network order
+	 * (top byte when in host order).
+	 */
+	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+	if (qp->ibqp.qp_num > 1 &&
+	    opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
+		wc.ex.imm_data = ohdr->u.ud.imm_data;
+		wc.wc_flags = IB_WC_WITH_IMM;
+		hdrsize += sizeof(u32);
+	} else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
+		wc.ex.imm_data = 0;
+		wc.wc_flags = 0;
+	} else {
+		ibp->n_pkt_drops++;
+		goto bail;
+	}
+
+	/*
+	 * A GRH is expected to preceed the data even if not
+	 * present on the wire.
+	 */
+	wc.byte_len = tlen + sizeof(struct ib_grh);
+
+	/*
+	 * We need to serialize getting a receive work queue entry and
+	 * generating a completion for it against QPs sending to this QP
+	 * locally.
+	 */
+	spin_lock(&qp->r_lock);
+
+	/*
+	 * Get the next work request entry to find where to put the data.
+	 */
+	if (qp->r_flags & QIB_R_REUSE_SGE)
+		qp->r_flags &= ~QIB_R_REUSE_SGE;
+	else {
+		int ret;
+
+		ret = qib_get_rwqe(qp, 0);
+		if (ret < 0) {
+			qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+			goto bail_unlock;
+		}
+		if (!ret) {
+			if (qp->ibqp.qp_num == 0)
+				ibp->n_vl15_dropped++;
+			goto bail_unlock;
+		}
+	}
+	/* Silently drop packets which are too big. */
+	if (unlikely(wc.byte_len > qp->r_len)) {
+		qp->r_flags |= QIB_R_REUSE_SGE;
+		ibp->n_pkt_drops++;
+		goto bail_unlock;
+	}
+	if (has_grh) {
+		qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
+			     sizeof(struct ib_grh), 1);
+		wc.wc_flags |= IB_WC_GRH;
+	} else
+		qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+	qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
+	while (qp->r_sge.num_sge) {
+		atomic_dec(&qp->r_sge.sge.mr->refcount);
+		if (--qp->r_sge.num_sge)
+			qp->r_sge.sge = *qp->r_sge.sg_list++;
+	}
+	if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+		goto bail_unlock;
+	wc.wr_id = qp->r_wr_id;
+	wc.status = IB_WC_SUCCESS;
+	wc.opcode = IB_WC_RECV;
+	wc.vendor_err = 0;
+	wc.qp = &qp->ibqp;
+	wc.src_qp = src_qp;
+	wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
+		qib_lookup_pkey(ibp, be32_to_cpu(ohdr->bth[0])) : 0;
+	wc.slid = be16_to_cpu(hdr->lrh[3]);
+	wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
+	dlid = be16_to_cpu(hdr->lrh[1]);
+	/*
+	 * Save the LMC lower bits if the destination LID is a unicast LID.
+	 */
+	wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 :
+		dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
+	wc.port_num = qp->port_num;
+	/* Signal completion event if the solicited bit is set. */
+	qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+		     (ohdr->bth[0] &
+			cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+bail_unlock:
+	spin_unlock(&qp->r_lock);
+bail:;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 42/52] IB/qib: Add qib_user_pages.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_user_pages.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_user_pages.c |  157 ++++++++++++++++++++++++++++
 1 files changed, 157 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_user_pages.c

diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
new file mode 100644
index 0000000..d7a26c1
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/device.h>
+
+#include "qib.h"
+
+static void __qib_release_user_pages(struct page **p, size_t num_pages,
+				     int dirty)
+{
+	size_t i;
+
+	for (i = 0; i < num_pages; i++) {
+		if (dirty)
+			set_page_dirty_lock(p[i]);
+		put_page(p[i]);
+	}
+}
+
+/*
+ * Call with current->mm->mmap_sem held.
+ */
+static int __get_user_pages(unsigned long start_page, size_t num_pages,
+			    struct page **p, struct vm_area_struct **vma)
+{
+	unsigned long lock_limit;
+	size_t got;
+	int ret;
+
+	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+	if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	for (got = 0; got < num_pages; got += ret) {
+		ret = get_user_pages(current, current->mm,
+				     start_page + got * PAGE_SIZE,
+				     num_pages - got, 1, 1,
+				     p + got, vma);
+		if (ret < 0)
+			goto bail_release;
+	}
+
+	current->mm->locked_vm += num_pages;
+
+	ret = 0;
+	goto bail;
+
+bail_release:
+	__qib_release_user_pages(p, got, 0);
+bail:
+	return ret;
+}
+
+/**
+ * qib_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page,
+			unsigned long offset, size_t size, int direction)
+{
+	dma_addr_t phys;
+
+	phys = pci_map_page(hwdev, page, offset, size, direction);
+
+	if (phys == 0) {
+		pci_unmap_page(hwdev, phys, size, direction);
+		phys = pci_map_page(hwdev, page, offset, size, direction);
+		/*
+		 * FIXME: If we get 0 again, we should keep this page,
+		 * map another, then free the 0 page.
+		 */
+	}
+
+	return phys;
+}
+
+/**
+ * qib_get_user_pages - lock user pages into memory
+ * @start_page: the start page
+ * @num_pages: the number of pages
+ * @p: the output page structures
+ *
+ * This function takes a given start page (page aligned user virtual
+ * address) and pins it and the following specified number of pages.  For
+ * now, num_pages is always 1, but that will probably change at some point
+ * (because caller is doing expected sends on a single virtually contiguous
+ * buffer, so we can do all pages at once).
+ */
+int qib_get_user_pages(unsigned long start_page, size_t num_pages,
+		       struct page **p)
+{
+	int ret;
+
+	down_write(&current->mm->mmap_sem);
+
+	ret = __get_user_pages(start_page, num_pages, p, NULL);
+
+	up_write(&current->mm->mmap_sem);
+
+	return ret;
+}
+
+void qib_release_user_pages(struct page **p, size_t num_pages)
+{
+	if (current->mm) /* during close after signal, mm can be NULL */
+		down_write(&current->mm->mmap_sem);
+
+	__qib_release_user_pages(p, num_pages, 1);
+
+	if (current->mm) {
+		current->mm->locked_vm -= num_pages;
+		up_write(&current->mm->mmap_sem);
+	}
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 43/52] IB/qib: Add qib_user_sdma.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_user_sdma.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_user_sdma.c |  897 +++++++++++++++++++++++++++++
 1 files changed, 897 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_user_sdma.c

diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
new file mode 100644
index 0000000..4c19e06
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -0,0 +1,897 @@
+/*
+ * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_user_sdma.h"
+
+/* minimum size of header */
+#define QIB_USER_SDMA_MIN_HEADER_LENGTH 64
+/* expected size of headers (for dma_pool) */
+#define QIB_USER_SDMA_EXP_HEADER_LENGTH 64
+/* attempt to drain the queue for 5secs */
+#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
+
+struct qib_user_sdma_pkt {
+	u8 naddr;               /* dimension of addr (1..3) ... */
+	u32 counter;            /* sdma pkts queued counter for this entry */
+	u64 added;              /* global descq number of entries */
+
+	struct {
+		u32 offset;                     /* offset for kvaddr, addr */
+		u32 length;                     /* length in page */
+		u8  put_page;                   /* should we put_page? */
+		u8  dma_mapped;                 /* is page dma_mapped? */
+		struct page *page;              /* may be NULL (coherent mem) */
+		void *kvaddr;                   /* FIXME: only for pio hack */
+		dma_addr_t addr;
+	} addr[4];   /* max pages, any more and we coalesce */
+	struct list_head list;  /* list element */
+};
+
+struct qib_user_sdma_queue {
+	/*
+	 * pkts sent to dma engine are queued on this
+	 * list head.  the type of the elements of this
+	 * list are struct qib_user_sdma_pkt...
+	 */
+	struct list_head sent;
+
+	/* headers with expected length are allocated from here... */
+	char header_cache_name[64];
+	struct dma_pool *header_cache;
+
+	/* packets are allocated from the slab cache... */
+	char pkt_slab_name[64];
+	struct kmem_cache *pkt_slab;
+
+	/* as packets go on the queued queue, they are counted... */
+	u32 counter;
+	u32 sent_counter;
+
+	/* dma page table */
+	struct rb_root dma_pages_root;
+
+	/* protect everything above... */
+	struct mutex lock;
+};
+
+struct qib_user_sdma_queue *
+qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
+{
+	struct qib_user_sdma_queue *pq =
+		kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+
+	if (!pq)
+		goto done;
+
+	pq->counter = 0;
+	pq->sent_counter = 0;
+	INIT_LIST_HEAD(&pq->sent);
+
+	mutex_init(&pq->lock);
+
+	snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
+		 "qib-user-sdma-pkts-%u-%02u.%02u", unit, ctxt, sctxt);
+	pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
+					 sizeof(struct qib_user_sdma_pkt),
+					 0, 0, NULL);
+
+	if (!pq->pkt_slab)
+		goto err_kfree;
+
+	snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
+		 "qib-user-sdma-headers-%u-%02u.%02u", unit, ctxt, sctxt);
+	pq->header_cache = dma_pool_create(pq->header_cache_name,
+					   dev,
+					   QIB_USER_SDMA_EXP_HEADER_LENGTH,
+					   4, 0);
+	if (!pq->header_cache)
+		goto err_slab;
+
+	pq->dma_pages_root = RB_ROOT;
+
+	goto done;
+
+err_slab:
+	kmem_cache_destroy(pq->pkt_slab);
+err_kfree:
+	kfree(pq);
+	pq = NULL;
+
+done:
+	return pq;
+}
+
+static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
+				    int i, size_t offset, size_t len,
+				    int put_page, int dma_mapped,
+				    struct page *page,
+				    void *kvaddr, dma_addr_t dma_addr)
+{
+	pkt->addr[i].offset = offset;
+	pkt->addr[i].length = len;
+	pkt->addr[i].put_page = put_page;
+	pkt->addr[i].dma_mapped = dma_mapped;
+	pkt->addr[i].page = page;
+	pkt->addr[i].kvaddr = kvaddr;
+	pkt->addr[i].addr = dma_addr;
+}
+
+static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt,
+				      u32 counter, size_t offset,
+				      size_t len, int dma_mapped,
+				      struct page *page,
+				      void *kvaddr, dma_addr_t dma_addr)
+{
+	pkt->naddr = 1;
+	pkt->counter = counter;
+	qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
+				kvaddr, dma_addr);
+}
+
+/* we've too many pages in the iovec, coalesce to a single page */
+static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
+				  struct qib_user_sdma_pkt *pkt,
+				  const struct iovec *iov,
+				  unsigned long niov)
+{
+	int ret = 0;
+	struct page *page = alloc_page(GFP_KERNEL);
+	void *mpage_save;
+	char *mpage;
+	int i;
+	int len = 0;
+	dma_addr_t dma_addr;
+
+	if (!page) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	mpage = kmap(page);
+	mpage_save = mpage;
+	for (i = 0; i < niov; i++) {
+		int cfur;
+
+		cfur = copy_from_user(mpage,
+				      iov[i].iov_base, iov[i].iov_len);
+		if (cfur) {
+			ret = -EFAULT;
+			goto free_unmap;
+		}
+
+		mpage += iov[i].iov_len;
+		len += iov[i].iov_len;
+	}
+
+	dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
+				DMA_TO_DEVICE);
+	if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+		ret = -ENOMEM;
+		goto free_unmap;
+	}
+
+	qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
+				dma_addr);
+	pkt->naddr = 2;
+
+	goto done;
+
+free_unmap:
+	kunmap(page);
+	__free_page(page);
+done:
+	return ret;
+}
+
+/*
+ * How many pages in this iovec element?
+ */
+static int qib_user_sdma_num_pages(const struct iovec *iov)
+{
+	const unsigned long addr  = (unsigned long) iov->iov_base;
+	const unsigned long  len  = iov->iov_len;
+	const unsigned long spage = addr & PAGE_MASK;
+	const unsigned long epage = (addr + len - 1) & PAGE_MASK;
+
+	return 1 + ((epage - spage) >> PAGE_SHIFT);
+}
+
+/*
+ * Truncate length to page boundry.
+ */
+static int qib_user_sdma_page_length(unsigned long addr, unsigned long len)
+{
+	const unsigned long offset = addr & ~PAGE_MASK;
+
+	return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
+}
+
+static void qib_user_sdma_free_pkt_frag(struct device *dev,
+					struct qib_user_sdma_queue *pq,
+					struct qib_user_sdma_pkt *pkt,
+					int frag)
+{
+	const int i = frag;
+
+	if (pkt->addr[i].page) {
+		if (pkt->addr[i].dma_mapped)
+			dma_unmap_page(dev,
+				       pkt->addr[i].addr,
+				       pkt->addr[i].length,
+				       DMA_TO_DEVICE);
+
+		if (pkt->addr[i].kvaddr)
+			kunmap(pkt->addr[i].page);
+
+		if (pkt->addr[i].put_page)
+			put_page(pkt->addr[i].page);
+		else
+			__free_page(pkt->addr[i].page);
+	} else if (pkt->addr[i].kvaddr)
+		/* free coherent mem from cache... */
+		dma_pool_free(pq->header_cache,
+			      pkt->addr[i].kvaddr, pkt->addr[i].addr);
+}
+
+/* return number of pages pinned... */
+static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
+				   struct qib_user_sdma_pkt *pkt,
+				   unsigned long addr, int tlen, int npages)
+{
+	struct page *pages[2];
+	int j;
+	int ret;
+
+	ret = get_user_pages(current, current->mm, addr,
+			     npages, 0, 1, pages, NULL);
+
+	if (ret != npages) {
+		int i;
+
+		for (i = 0; i < ret; i++)
+			put_page(pages[i]);
+
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	for (j = 0; j < npages; j++) {
+		/* map the pages... */
+		const int flen = qib_user_sdma_page_length(addr, tlen);
+		dma_addr_t dma_addr =
+			dma_map_page(&dd->pcidev->dev,
+				     pages[j], 0, flen, DMA_TO_DEVICE);
+		unsigned long fofs = addr & ~PAGE_MASK;
+
+		if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
+					pages[j], kmap(pages[j]), dma_addr);
+
+		pkt->naddr++;
+		addr += flen;
+		tlen -= flen;
+	}
+
+done:
+	return ret;
+}
+
+static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
+				 struct qib_user_sdma_queue *pq,
+				 struct qib_user_sdma_pkt *pkt,
+				 const struct iovec *iov,
+				 unsigned long niov)
+{
+	int ret = 0;
+	unsigned long idx;
+
+	for (idx = 0; idx < niov; idx++) {
+		const int npages = qib_user_sdma_num_pages(iov + idx);
+		const unsigned long addr = (unsigned long) iov[idx].iov_base;
+
+		ret = qib_user_sdma_pin_pages(dd, pkt, addr,
+					      iov[idx].iov_len, npages);
+		if (ret < 0)
+			goto free_pkt;
+	}
+
+	goto done;
+
+free_pkt:
+	for (idx = 0; idx < pkt->naddr; idx++)
+		qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+
+done:
+	return ret;
+}
+
+static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
+				      struct qib_user_sdma_queue *pq,
+				      struct qib_user_sdma_pkt *pkt,
+				      const struct iovec *iov,
+				      unsigned long niov, int npages)
+{
+	int ret = 0;
+
+	if (npages >= ARRAY_SIZE(pkt->addr))
+		ret = qib_user_sdma_coalesce(dd, pkt, iov, niov);
+	else
+		ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
+
+	return ret;
+}
+
+/* free a packet list -- return counter value of last packet */
+static void qib_user_sdma_free_pkt_list(struct device *dev,
+					struct qib_user_sdma_queue *pq,
+					struct list_head *list)
+{
+	struct qib_user_sdma_pkt *pkt, *pkt_next;
+
+	list_for_each_entry_safe(pkt, pkt_next, list, list) {
+		int i;
+
+		for (i = 0; i < pkt->naddr; i++)
+			qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
+
+		kmem_cache_free(pq->pkt_slab, pkt);
+	}
+}
+
+/*
+ * copy headers, coalesce etc -- pq->lock must be held
+ *
+ * we queue all the packets to list, returning the
+ * number of bytes total.  list must be empty initially,
+ * as, if there is an error we clean it...
+ */
+static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+				    struct qib_user_sdma_queue *pq,
+				    struct list_head *list,
+				    const struct iovec *iov,
+				    unsigned long niov,
+				    int maxpkts)
+{
+	unsigned long idx = 0;
+	int ret = 0;
+	int npkts = 0;
+	struct page *page = NULL;
+	__le32 *pbc;
+	dma_addr_t dma_addr;
+	struct qib_user_sdma_pkt *pkt = NULL;
+	size_t len;
+	size_t nw;
+	u32 counter = pq->counter;
+	int dma_mapped = 0;
+
+	while (idx < niov && npkts < maxpkts) {
+		const unsigned long addr = (unsigned long) iov[idx].iov_base;
+		const unsigned long idx_save = idx;
+		unsigned pktnw;
+		unsigned pktnwc;
+		int nfrags = 0;
+		int npages = 0;
+		int cfur;
+
+		dma_mapped = 0;
+		len = iov[idx].iov_len;
+		nw = len >> 2;
+		page = NULL;
+
+		pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+		if (!pkt) {
+			ret = -ENOMEM;
+			goto free_list;
+		}
+
+		if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
+		    len > PAGE_SIZE || len & 3 || addr & 3) {
+			ret = -EINVAL;
+			goto free_pkt;
+		}
+
+		if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
+			pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+					     &dma_addr);
+		else
+			pbc = NULL;
+
+		if (!pbc) {
+			page = alloc_page(GFP_KERNEL);
+			if (!page) {
+				ret = -ENOMEM;
+				goto free_pkt;
+			}
+			pbc = kmap(page);
+		}
+
+		cfur = copy_from_user(pbc, iov[idx].iov_base, len);
+		if (cfur) {
+			ret = -EFAULT;
+			goto free_pbc;
+		}
+
+		/*
+		 * This assignment is a bit strange.  it's because the
+		 * the pbc counts the number of 32 bit words in the full
+		 * packet _except_ the first word of the pbc itself...
+		 */
+		pktnwc = nw - 1;
+
+		/*
+		 * pktnw computation yields the number of 32 bit words
+		 * that the caller has indicated in the PBC.  note that
+		 * this is one less than the total number of words that
+		 * goes to the send DMA engine as the first 32 bit word
+		 * of the PBC itself is not counted.  Armed with this count,
+		 * we can verify that the packet is consistent with the
+		 * iovec lengths.
+		 */
+		pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK;
+		if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+			ret = -EINVAL;
+			goto free_pbc;
+		}
+
+		idx++;
+		while (pktnwc < pktnw && idx < niov) {
+			const size_t slen = iov[idx].iov_len;
+			const unsigned long faddr =
+				(unsigned long) iov[idx].iov_base;
+
+			if (slen & 3 || faddr & 3 || !slen ||
+			    slen > PAGE_SIZE) {
+				ret = -EINVAL;
+				goto free_pbc;
+			}
+
+			npages++;
+			if ((faddr & PAGE_MASK) !=
+			    ((faddr + slen - 1) & PAGE_MASK))
+				npages++;
+
+			pktnwc += slen >> 2;
+			idx++;
+			nfrags++;
+		}
+
+		if (pktnwc != pktnw) {
+			ret = -EINVAL;
+			goto free_pbc;
+		}
+
+		if (page) {
+			dma_addr = dma_map_page(&dd->pcidev->dev,
+						page, 0, len, DMA_TO_DEVICE);
+			if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+				ret = -ENOMEM;
+				goto free_pbc;
+			}
+
+			dma_mapped = 1;
+		}
+
+		qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
+					  page, pbc, dma_addr);
+
+		if (nfrags) {
+			ret = qib_user_sdma_init_payload(dd, pq, pkt,
+							 iov + idx_save + 1,
+							 nfrags, npages);
+			if (ret < 0)
+				goto free_pbc_dma;
+		}
+
+		counter++;
+		npkts++;
+
+		list_add_tail(&pkt->list, list);
+	}
+
+	ret = idx;
+	goto done;
+
+free_pbc_dma:
+	if (dma_mapped)
+		dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
+free_pbc:
+	if (page) {
+		kunmap(page);
+		__free_page(page);
+	} else
+		dma_pool_free(pq->header_cache, pbc, dma_addr);
+free_pkt:
+	kmem_cache_free(pq->pkt_slab, pkt);
+free_list:
+	qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
+done:
+	return ret;
+}
+
+static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
+					       u32 c)
+{
+	pq->sent_counter = c;
+}
+
+/* try to clean out queue -- needs pq->lock */
+static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
+				     struct qib_user_sdma_queue *pq)
+{
+	struct qib_devdata *dd = ppd->dd;
+	struct list_head free_list;
+	struct qib_user_sdma_pkt *pkt;
+	struct qib_user_sdma_pkt *pkt_prev;
+	int ret = 0;
+
+	INIT_LIST_HEAD(&free_list);
+
+	list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
+		s64 descd = ppd->sdma_descq_removed - pkt->added;
+
+		if (descd < 0)
+			break;
+
+		list_move_tail(&pkt->list, &free_list);
+
+		/* one more packet cleaned */
+		ret++;
+	}
+
+	if (!list_empty(&free_list)) {
+		u32 counter;
+
+		pkt = list_entry(free_list.prev,
+				 struct qib_user_sdma_pkt, list);
+		counter = pkt->counter;
+
+		qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+		qib_user_sdma_set_complete_counter(pq, counter);
+	}
+
+	return ret;
+}
+
+void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
+{
+	if (!pq)
+		return;
+
+	kmem_cache_destroy(pq->pkt_slab);
+	dma_pool_destroy(pq->header_cache);
+	kfree(pq);
+}
+
+/* clean descriptor queue, returns > 0 if some elements cleaned */
+static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
+{
+	int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&ppd->sdma_lock, flags);
+	ret = qib_sdma_make_progress(ppd);
+	spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+	return ret;
+}
+
+/* we're in close, drain packets so that we can cleanup successfully... */
+void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
+			       struct qib_user_sdma_queue *pq)
+{
+	struct qib_devdata *dd = ppd->dd;
+	int i;
+
+	if (!pq)
+		return;
+
+	for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
+		mutex_lock(&pq->lock);
+		if (list_empty(&pq->sent)) {
+			mutex_unlock(&pq->lock);
+			break;
+		}
+		qib_user_sdma_hwqueue_clean(ppd);
+		qib_user_sdma_queue_clean(ppd, pq);
+		mutex_unlock(&pq->lock);
+		msleep(10);
+	}
+
+	if (!list_empty(&pq->sent)) {
+		struct list_head free_list;
+
+		qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
+		INIT_LIST_HEAD(&free_list);
+		mutex_lock(&pq->lock);
+		list_splice_init(&pq->sent, &free_list);
+		qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+		mutex_unlock(&pq->lock);
+	}
+}
+
+static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd,
+					 u64 addr, u64 dwlen, u64 dwoffset)
+{
+	u8 tmpgen;
+
+	tmpgen = ppd->sdma_generation;
+
+	return cpu_to_le64(/* SDmaPhyAddr[31:0] */
+			   ((addr & 0xfffffffcULL) << 32) |
+			   /* SDmaGeneration[1:0] */
+			   ((tmpgen & 3ULL) << 30) |
+			   /* SDmaDwordCount[10:0] */
+			   ((dwlen & 0x7ffULL) << 16) |
+			   /* SDmaBufOffset[12:2] */
+			   (dwoffset & 0x7ffULL));
+}
+
+static inline __le64 qib_sdma_make_first_desc0(__le64 descq)
+{
+	return descq | cpu_to_le64(1ULL << 12);
+}
+
+static inline __le64 qib_sdma_make_last_desc0(__le64 descq)
+{
+					      /* last */  /* dma head */
+	return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
+}
+
+static inline __le64 qib_sdma_make_desc1(u64 addr)
+{
+	/* SDmaPhyAddr[47:32] */
+	return cpu_to_le64(addr >> 32);
+}
+
+static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
+				    struct qib_user_sdma_pkt *pkt, int idx,
+				    unsigned ofs, u16 tail)
+{
+	const u64 addr = (u64) pkt->addr[idx].addr +
+		(u64) pkt->addr[idx].offset;
+	const u64 dwlen = (u64) pkt->addr[idx].length / 4;
+	__le64 *descqp;
+	__le64 descq0;
+
+	descqp = &ppd->sdma_descq[tail].qw[0];
+
+	descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs);
+	if (idx == 0)
+		descq0 = qib_sdma_make_first_desc0(descq0);
+	if (idx == pkt->naddr - 1)
+		descq0 = qib_sdma_make_last_desc0(descq0);
+
+	descqp[0] = descq0;
+	descqp[1] = qib_sdma_make_desc1(addr);
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+				   struct qib_user_sdma_queue *pq,
+				   struct list_head *pktlist)
+{
+	struct qib_devdata *dd = ppd->dd;
+	int ret = 0;
+	unsigned long flags;
+	u16 tail;
+	u8 generation;
+	u64 descq_added;
+
+	if (list_empty(pktlist))
+		return 0;
+
+	if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
+		return -ECOMM;
+
+	spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+	/* keep a copy for restoring purposes in case of problems */
+	generation = ppd->sdma_generation;
+	descq_added = ppd->sdma_descq_added;
+
+	if (unlikely(!__qib_sdma_running(ppd))) {
+		ret = -ECOMM;
+		goto unlock;
+	}
+
+	tail = ppd->sdma_descq_tail;
+	while (!list_empty(pktlist)) {
+		struct qib_user_sdma_pkt *pkt =
+			list_entry(pktlist->next, struct qib_user_sdma_pkt,
+				   list);
+		int i;
+		unsigned ofs = 0;
+		u16 dtail = tail;
+
+		if (pkt->naddr > qib_sdma_descq_freecnt(ppd))
+			goto unlock_check_tail;
+
+		for (i = 0; i < pkt->naddr; i++) {
+			qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail);
+			ofs += pkt->addr[i].length >> 2;
+
+			if (++tail == ppd->sdma_descq_cnt) {
+				tail = 0;
+				++ppd->sdma_generation;
+			}
+		}
+
+		if ((ofs << 2) > ppd->ibmaxlen) {
+			ret = -EMSGSIZE;
+			goto unlock;
+		}
+
+		/*
+		 * If the packet is >= 2KB mtu equivalent, we have to use
+		 * the large buffers, and have to mark each descriptor as
+		 * part of a large buffer packet.
+		 */
+		if (ofs > dd->piosize2kmax_dwords) {
+			for (i = 0; i < pkt->naddr; i++) {
+				ppd->sdma_descq[dtail].qw[0] |=
+					cpu_to_le64(1ULL << 14);
+				if (++dtail == ppd->sdma_descq_cnt)
+					dtail = 0;
+			}
+		}
+
+		ppd->sdma_descq_added += pkt->naddr;
+		pkt->added = ppd->sdma_descq_added;
+		list_move_tail(&pkt->list, &pq->sent);
+		ret++;
+	}
+
+unlock_check_tail:
+	/* advance the tail on the chip if necessary */
+	if (ppd->sdma_descq_tail != tail)
+		dd->f_sdma_update_tail(ppd, tail);
+
+unlock:
+	if (unlikely(ret < 0)) {
+		ppd->sdma_generation = generation;
+		ppd->sdma_descq_added = descq_added;
+	}
+	spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+	return ret;
+}
+
+int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
+			 struct qib_user_sdma_queue *pq,
+			 const struct iovec *iov,
+			 unsigned long dim)
+{
+	struct qib_devdata *dd = rcd->dd;
+	struct qib_pportdata *ppd = rcd->ppd;
+	int ret = 0;
+	struct list_head list;
+	int npkts = 0;
+
+	INIT_LIST_HEAD(&list);
+
+	mutex_lock(&pq->lock);
+
+	/* why not -ECOMM like qib_user_sdma_push_pkts() below? */
+	if (!qib_sdma_running(ppd))
+		goto done_unlock;
+
+	if (ppd->sdma_descq_added != ppd->sdma_descq_removed) {
+		qib_user_sdma_hwqueue_clean(ppd);
+		qib_user_sdma_queue_clean(ppd, pq);
+	}
+
+	while (dim) {
+		const int mxp = 8;
+
+		down_write(&current->mm->mmap_sem);
+		ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
+		up_write(&current->mm->mmap_sem);
+
+		if (ret <= 0)
+			goto done_unlock;
+		else {
+			dim -= ret;
+			iov += ret;
+		}
+
+		/* force packets onto the sdma hw queue... */
+		if (!list_empty(&list)) {
+			/*
+			 * Lazily clean hw queue.  the 4 is a guess of about
+			 * how many sdma descriptors a packet will take (it
+			 * doesn't have to be perfect).
+			 */
+			if (qib_sdma_descq_freecnt(ppd) < ret * 4) {
+				qib_user_sdma_hwqueue_clean(ppd);
+				qib_user_sdma_queue_clean(ppd, pq);
+			}
+
+			ret = qib_user_sdma_push_pkts(ppd, pq, &list);
+			if (ret < 0)
+				goto done_unlock;
+			else {
+				npkts += ret;
+				pq->counter += ret;
+
+				if (!list_empty(&list))
+					goto done_unlock;
+			}
+		}
+	}
+
+done_unlock:
+	if (!list_empty(&list))
+		qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
+	mutex_unlock(&pq->lock);
+
+	return (ret < 0) ? ret : npkts;
+}
+
+int qib_user_sdma_make_progress(struct qib_pportdata *ppd,
+				struct qib_user_sdma_queue *pq)
+{
+	int ret = 0;
+
+	mutex_lock(&pq->lock);
+	qib_user_sdma_hwqueue_clean(ppd);
+	ret = qib_user_sdma_queue_clean(ppd, pq);
+	mutex_unlock(&pq->lock);
+
+	return ret;
+}
+
+u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq)
+{
+	return pq ? pq->sent_counter : 0;
+}
+
+u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq)
+{
+	return pq ? pq->counter : 0;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 44/52] IB/qib: Add qib_user_sdma.h
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_user_sdma.h file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_user_sdma.h |   52 +++++++++++++++++++++++++++++
 1 files changed, 52 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_user_sdma.h

diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h
new file mode 100644
index 0000000..ce8cbaf
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/device.h>
+
+struct qib_user_sdma_queue;
+
+struct qib_user_sdma_queue *
+qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq);
+
+int qib_user_sdma_writev(struct qib_ctxtdata *pd,
+			 struct qib_user_sdma_queue *pq,
+			 const struct iovec *iov,
+			 unsigned long dim);
+
+int qib_user_sdma_make_progress(struct qib_pportdata *ppd,
+				struct qib_user_sdma_queue *pq);
+
+void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
+			       struct qib_user_sdma_queue *pq);
+
+u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq);
+u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 45/52] IB/qib: Add qib_verbs.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_verbs.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_verbs.c | 2248 +++++++++++++++++++++++++++++++++
 1 files changed, 2248 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_verbs.c

diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
new file mode 100644
index 0000000..cda8f41
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -0,0 +1,2248 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
+#include <linux/utsname.h>
+#include <linux/rculist.h>
+#include <linux/mm.h>
+
+#include "qib.h"
+#include "qib_common.h"
+
+static unsigned int ib_qib_qp_table_size = 251;
+module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(qp_table_size, "QP table size");
+
+unsigned int ib_qib_lkey_table_size = 16;
+module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint,
+		   S_IRUGO);
+MODULE_PARM_DESC(lkey_table_size,
+		 "LKEY table size in bits (2^n, 1 <= n <= 23)");
+
+static unsigned int ib_qib_max_pds = 0xFFFF;
+module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
+MODULE_PARM_DESC(max_pds,
+		 "Maximum number of protection domains to support");
+
+static unsigned int ib_qib_max_ahs = 0xFFFF;
+module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
+
+unsigned int ib_qib_max_cqes = 0x2FFFF;
+module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO);
+MODULE_PARM_DESC(max_cqes,
+		 "Maximum number of completion queue entries to support");
+
+unsigned int ib_qib_max_cqs = 0x1FFFF;
+module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
+
+unsigned int ib_qib_max_qp_wrs = 0x3FFF;
+module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
+
+unsigned int ib_qib_max_qps = 16384;
+module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
+unsigned int ib_qib_max_sges = 0x60;
+module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO);
+MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
+
+unsigned int ib_qib_max_mcast_grps = 16384;
+module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO);
+MODULE_PARM_DESC(max_mcast_grps,
+		 "Maximum number of multicast groups to support");
+
+unsigned int ib_qib_max_mcast_qp_attached = 16;
+module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached,
+		   uint, S_IRUGO);
+MODULE_PARM_DESC(max_mcast_qp_attached,
+		 "Maximum number of attached QPs to support");
+
+unsigned int ib_qib_max_srqs = 1024;
+module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
+
+unsigned int ib_qib_max_srq_sges = 128;
+module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
+
+unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
+module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
+
+static unsigned int ib_qib_disable_sma;
+module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(disable_sma, "Disable the SMA");
+
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; qib_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
+const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
+	[IB_QPS_RESET] = 0,
+	[IB_QPS_INIT] = QIB_POST_RECV_OK,
+	[IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK,
+	[IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+	    QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK |
+	    QIB_PROCESS_NEXT_SEND_OK,
+	[IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+	    QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK,
+	[IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+	    QIB_POST_SEND_OK | QIB_FLUSH_SEND,
+	[IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV |
+	    QIB_POST_SEND_OK | QIB_FLUSH_SEND,
+};
+
+struct qib_ucontext {
+	struct ib_ucontext ibucontext;
+};
+
+static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
+						  *ibucontext)
+{
+	return container_of(ibucontext, struct qib_ucontext, ibucontext);
+}
+
+/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_qib_wc_opcode[] = {
+	[IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+	[IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+	[IB_WR_SEND] = IB_WC_SEND,
+	[IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+	[IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+	[IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+	[IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+};
+
+/*
+ * System image GUID.
+ */
+__be64 ib_qib_sys_image_guid;
+
+/**
+ * qib_copy_sge - copy data to SGE memory
+ * @ss: the SGE state
+ * @data: the data to copy
+ * @length: the length of the data
+ */
+void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
+{
+	struct qib_sge *sge = &ss->sge;
+
+	while (length) {
+		u32 len = sge->length;
+
+		if (len > length)
+			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		memcpy(sge->vaddr, data, len);
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (release)
+				atomic_dec(&sge->mr->refcount);
+			if (--ss->num_sge)
+				*sge = *ss->sg_list++;
+		} else if (sge->length == 0 && sge->mr->lkey) {
+			if (++sge->n >= QIB_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		data += len;
+		length -= len;
+	}
+}
+
+/**
+ * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
+ * @ss: the SGE state
+ * @length: the number of bytes to skip
+ */
+void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
+{
+	struct qib_sge *sge = &ss->sge;
+
+	while (length) {
+		u32 len = sge->length;
+
+		if (len > length)
+			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (release)
+				atomic_dec(&sge->mr->refcount);
+			if (--ss->num_sge)
+				*sge = *ss->sg_list++;
+		} else if (sge->length == 0 && sge->mr->lkey) {
+			if (++sge->n >= QIB_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		length -= len;
+	}
+}
+
+/*
+ * Count the number of DMA descriptors needed to send length bytes of data.
+ * Don't modify the qib_sge_state to get the count.
+ * Return zero if any of the segments is not aligned.
+ */
+static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
+{
+	struct qib_sge *sg_list = ss->sg_list;
+	struct qib_sge sge = ss->sge;
+	u8 num_sge = ss->num_sge;
+	u32 ndesc = 1;  /* count the header */
+
+	while (length) {
+		u32 len = sge.length;
+
+		if (len > length)
+			len = length;
+		if (len > sge.sge_length)
+			len = sge.sge_length;
+		BUG_ON(len == 0);
+		if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
+		    (len != length && (len & (sizeof(u32) - 1)))) {
+			ndesc = 0;
+			break;
+		}
+		ndesc++;
+		sge.vaddr += len;
+		sge.length -= len;
+		sge.sge_length -= len;
+		if (sge.sge_length == 0) {
+			if (--num_sge)
+				sge = *sg_list++;
+		} else if (sge.length == 0 && sge.mr->lkey) {
+			if (++sge.n >= QIB_SEGSZ) {
+				if (++sge.m >= sge.mr->mapsz)
+					break;
+				sge.n = 0;
+			}
+			sge.vaddr =
+				sge.mr->map[sge.m]->segs[sge.n].vaddr;
+			sge.length =
+				sge.mr->map[sge.m]->segs[sge.n].length;
+		}
+		length -= len;
+	}
+	return ndesc;
+}
+
+/*
+ * Copy from the SGEs to the data buffer.
+ */
+static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
+{
+	struct qib_sge *sge = &ss->sge;
+
+	while (length) {
+		u32 len = sge->length;
+
+		if (len > length)
+			len = length;
+		if (len > sge->sge_length)
+			len = sge->sge_length;
+		BUG_ON(len == 0);
+		memcpy(data, sge->vaddr, len);
+		sge->vaddr += len;
+		sge->length -= len;
+		sge->sge_length -= len;
+		if (sge->sge_length == 0) {
+			if (--ss->num_sge)
+				*sge = *ss->sg_list++;
+		} else if (sge->length == 0 && sge->mr->lkey) {
+			if (++sge->n >= QIB_SEGSZ) {
+				if (++sge->m >= sge->mr->mapsz)
+					break;
+				sge->n = 0;
+			}
+			sge->vaddr =
+				sge->mr->map[sge->m]->segs[sge->n].vaddr;
+			sge->length =
+				sge->mr->map[sge->m]->segs[sge->n].length;
+		}
+		data += len;
+		length -= len;
+	}
+}
+
+/**
+ * qib_post_one_send - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr)
+{
+	struct qib_swqe *wqe;
+	u32 next;
+	int i;
+	int j;
+	int acc;
+	int ret;
+	unsigned long flags;
+	struct qib_lkey_table *rkt;
+	struct qib_pd *pd;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+
+	/* Check that state is OK to post send. */
+	if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK)))
+		goto bail_inval;
+
+	/* IB spec says that num_sge == 0 is OK. */
+	if (wr->num_sge > qp->s_max_sge)
+		goto bail_inval;
+
+	/*
+	 * Don't allow RDMA reads or atomic operations on UC or
+	 * undefined operations.
+	 * Make sure buffer is large enough to hold the result for atomics.
+	 */
+	if (wr->opcode == IB_WR_FAST_REG_MR) {
+		if (qib_fast_reg_mr(qp, wr))
+			goto bail_inval;
+	} else if (qp->ibqp.qp_type == IB_QPT_UC) {
+		if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
+			goto bail_inval;
+	} else if (qp->ibqp.qp_type != IB_QPT_RC) {
+		/* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
+		if (wr->opcode != IB_WR_SEND &&
+		    wr->opcode != IB_WR_SEND_WITH_IMM)
+			goto bail_inval;
+		/* Check UD destination address PD */
+		if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+			goto bail_inval;
+	} else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
+		goto bail_inval;
+	else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+		   (wr->num_sge == 0 ||
+		    wr->sg_list[0].length < sizeof(u64) ||
+		    wr->sg_list[0].addr & (sizeof(u64) - 1)))
+		goto bail_inval;
+	else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
+		goto bail_inval;
+
+	next = qp->s_head + 1;
+	if (next >= qp->s_size)
+		next = 0;
+	if (next == qp->s_last) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+
+	rkt = &to_idev(qp->ibqp.device)->lk_table;
+	pd = to_ipd(qp->ibqp.pd);
+	wqe = get_swqe_ptr(qp, qp->s_head);
+	wqe->wr = *wr;
+	wqe->length = 0;
+	j = 0;
+	if (wr->num_sge) {
+		acc = wr->opcode >= IB_WR_RDMA_READ ?
+			IB_ACCESS_LOCAL_WRITE : 0;
+		for (i = 0; i < wr->num_sge; i++) {
+			u32 length = wr->sg_list[i].length;
+			int ok;
+
+			if (length == 0)
+				continue;
+			ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
+					 &wr->sg_list[i], acc);
+			if (!ok)
+				goto bail_inval_free;
+			wqe->length += length;
+			j++;
+		}
+		wqe->wr.num_sge = j;
+	}
+	if (qp->ibqp.qp_type == IB_QPT_UC ||
+	    qp->ibqp.qp_type == IB_QPT_RC) {
+		if (wqe->length > 0x80000000U)
+			goto bail_inval_free;
+	} else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
+				  qp->port_num - 1)->ibmtu)
+		goto bail_inval_free;
+	else
+		atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
+	wqe->ssn = qp->s_ssn++;
+	qp->s_head = next;
+
+	ret = 0;
+	goto bail;
+
+bail_inval_free:
+	while (j) {
+		struct qib_sge *sge = &wqe->sg_list[--j];
+
+		atomic_dec(&sge->mr->refcount);
+	}
+bail_inval:
+	ret = -EINVAL;
+bail:
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	return ret;
+}
+
+/**
+ * qib_post_send - post a send on a QP
+ * @ibqp: the QP to post the send on
+ * @wr: the list of work requests to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+			 struct ib_send_wr **bad_wr)
+{
+	struct qib_qp *qp = to_iqp(ibqp);
+	int err = 0;
+
+	for (; wr; wr = wr->next) {
+		err = qib_post_one_send(qp, wr);
+		if (err) {
+			*bad_wr = wr;
+			goto bail;
+		}
+	}
+
+	/* Try to do the send work in the caller's context. */
+	qib_do_send(&qp->s_work);
+
+bail:
+	return err;
+}
+
+/**
+ * qib_post_receive - post a receive on a QP
+ * @ibqp: the QP to post the receive on
+ * @wr: the WR to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+			    struct ib_recv_wr **bad_wr)
+{
+	struct qib_qp *qp = to_iqp(ibqp);
+	struct qib_rwq *wq = qp->r_rq.wq;
+	unsigned long flags;
+	int ret;
+
+	/* Check that state is OK to post receive. */
+	if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
+		*bad_wr = wr;
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	for (; wr; wr = wr->next) {
+		struct qib_rwqe *wqe;
+		u32 next;
+		int i;
+
+		if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
+			*bad_wr = wr;
+			ret = -EINVAL;
+			goto bail;
+		}
+
+		spin_lock_irqsave(&qp->r_rq.lock, flags);
+		next = wq->head + 1;
+		if (next >= qp->r_rq.size)
+			next = 0;
+		if (next == wq->tail) {
+			spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+			*bad_wr = wr;
+			ret = -ENOMEM;
+			goto bail;
+		}
+
+		wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
+		wqe->wr_id = wr->wr_id;
+		wqe->num_sge = wr->num_sge;
+		for (i = 0; i < wr->num_sge; i++)
+			wqe->sg_list[i] = wr->sg_list[i];
+		/* Make sure queue entry is written before the head index. */
+		smp_wmb();
+		wq->head = next;
+		spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+	}
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * qib_qp_rcv - processing an incoming packet on a QP
+ * @rcd: the context pointer
+ * @hdr: the packet header
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP the packet came on
+ *
+ * This is called from qib_ib_rcv() to process an incoming packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+		       int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+	struct qib_ibport *ibp = &rcd->ppd->ibport_data;
+
+	/* Check for valid receive state. */
+	if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+		ibp->n_pkt_drops++;
+		return;
+	}
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_SMI:
+	case IB_QPT_GSI:
+		if (ib_qib_disable_sma)
+			break;
+		/* FALLTHROUGH */
+	case IB_QPT_UD:
+		qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
+		break;
+
+	case IB_QPT_RC:
+		qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
+		break;
+
+	case IB_QPT_UC:
+		qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
+		break;
+
+	default:
+		break;
+	}
+}
+
+/**
+ * qib_ib_rcv - process an incoming packet
+ * @rcd: the context pointer
+ * @rhdr: the header of the packet
+ * @data: the packet payload
+ * @tlen: the packet length
+ *
+ * This is called from qib_kreceive() to process an incoming packet at
+ * interrupt level. Tlen is the length of the header + data + CRC in bytes.
+ */
+void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
+{
+	struct qib_pportdata *ppd = rcd->ppd;
+	struct qib_ibport *ibp = &ppd->ibport_data;
+	struct qib_ib_header *hdr = rhdr;
+	struct qib_other_headers *ohdr;
+	struct qib_qp *qp;
+	u32 qp_num;
+	int lnh;
+	u8 opcode;
+	u16 lid;
+
+	/* 24 == LRH+BTH+CRC */
+	if (unlikely(tlen < 24))
+		goto drop;
+
+	/* Check for a valid destination LID (see ch. 7.11.1). */
+	lid = be16_to_cpu(hdr->lrh[1]);
+	if (lid < QIB_MULTICAST_LID_BASE) {
+		lid &= ~((1 << ppd->lmc) - 1);
+		if (unlikely(lid != ppd->lid))
+			goto drop;
+	}
+
+	/* Check for GRH */
+	lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+	if (lnh == QIB_LRH_BTH)
+		ohdr = &hdr->u.oth;
+	else if (lnh == QIB_LRH_GRH) {
+		u32 vtf;
+
+		ohdr = &hdr->u.l.oth;
+		if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+			goto drop;
+		vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+		if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+			goto drop;
+	} else
+		goto drop;
+
+	opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+	ibp->opstats[opcode & 0x7f].n_bytes += tlen;
+	ibp->opstats[opcode & 0x7f].n_packets++;
+
+	/* Get the destination QP number. */
+	qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+	if (qp_num == QIB_MULTICAST_QPN) {
+		struct qib_mcast *mcast;
+		struct qib_mcast_qp *p;
+
+		if (lnh != QIB_LRH_GRH)
+			goto drop;
+		mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
+		if (mcast == NULL)
+			goto drop;
+		ibp->n_multicast_rcv++;
+		list_for_each_entry_rcu(p, &mcast->qp_list, list)
+			qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
+		/*
+		 * Notify qib_multicast_detach() if it is waiting for us
+		 * to finish.
+		 */
+		if (atomic_dec_return(&mcast->refcount) <= 1)
+			wake_up(&mcast->wait);
+	} else {
+		qp = qib_lookup_qpn(ibp, qp_num);
+		if (!qp)
+			goto drop;
+		ibp->n_unicast_rcv++;
+		qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
+		/*
+		 * Notify qib_destroy_qp() if it is waiting
+		 * for us to finish.
+		 */
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+	return;
+
+drop:
+	ibp->n_pkt_drops++;
+}
+
+/*
+ * This is called from a timer to check for QPs
+ * which need kernel memory in order to send a packet.
+ */
+static void mem_timer(unsigned long data)
+{
+	struct qib_ibdev *dev = (struct qib_ibdev *) data;
+	struct list_head *list = &dev->memwait;
+	struct qib_qp *qp = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->pending_lock, flags);
+	if (!list_empty(list)) {
+		qp = list_entry(list->next, struct qib_qp, iowait);
+		list_del_init(&qp->iowait);
+		atomic_inc(&qp->refcount);
+		if (!list_empty(list))
+			mod_timer(&dev->mem_timer, jiffies + 1);
+	}
+	spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+	if (qp) {
+		spin_lock_irqsave(&qp->s_lock, flags);
+		if (qp->s_flags & QIB_S_WAIT_KMEM) {
+			qp->s_flags &= ~QIB_S_WAIT_KMEM;
+			qib_schedule_send(qp);
+		}
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+}
+
+static void update_sge(struct qib_sge_state *ss, u32 length)
+{
+	struct qib_sge *sge = &ss->sge;
+
+	sge->vaddr += length;
+	sge->length -= length;
+	sge->sge_length -= length;
+	if (sge->sge_length == 0) {
+		if (--ss->num_sge)
+			*sge = *ss->sg_list++;
+	} else if (sge->length == 0 && sge->mr->lkey) {
+		if (++sge->n >= QIB_SEGSZ) {
+			if (++sge->m >= sge->mr->mapsz)
+				return;
+			sge->n = 0;
+		}
+		sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+		sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+	}
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+	return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+	return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+	data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+	data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+	return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+	return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+	return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+	data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+	data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+	return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
+		    u32 length, unsigned flush_wc)
+{
+	u32 extra = 0;
+	u32 data = 0;
+	u32 last;
+
+	while (1) {
+		u32 len = ss->sge.length;
+		u32 off;
+
+		if (len > length)
+			len = length;
+		if (len > ss->sge.sge_length)
+			len = ss->sge.sge_length;
+		BUG_ON(len == 0);
+		/* If the source address is not aligned, try to align it. */
+		off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+		if (off) {
+			u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+					    ~(sizeof(u32) - 1));
+			u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+			u32 y;
+
+			y = sizeof(u32) - off;
+			if (len > y)
+				len = y;
+			if (len + extra >= sizeof(u32)) {
+				data |= set_upper_bits(v, extra *
+						       BITS_PER_BYTE);
+				len = sizeof(u32) - extra;
+				if (len == length) {
+					last = data;
+					break;
+				}
+				__raw_writel(data, piobuf);
+				piobuf++;
+				extra = 0;
+				data = 0;
+			} else {
+				/* Clear unused upper bytes */
+				data |= clear_upper_bytes(v, len, extra);
+				if (len == length) {
+					last = data;
+					break;
+				}
+				extra += len;
+			}
+		} else if (extra) {
+			/* Source address is aligned. */
+			u32 *addr = (u32 *) ss->sge.vaddr;
+			int shift = extra * BITS_PER_BYTE;
+			int ushift = 32 - shift;
+			u32 l = len;
+
+			while (l >= sizeof(u32)) {
+				u32 v = *addr;
+
+				data |= set_upper_bits(v, shift);
+				__raw_writel(data, piobuf);
+				data = get_upper_bits(v, ushift);
+				piobuf++;
+				addr++;
+				l -= sizeof(u32);
+			}
+			/*
+			 * We still have 'extra' number of bytes leftover.
+			 */
+			if (l) {
+				u32 v = *addr;
+
+				if (l + extra >= sizeof(u32)) {
+					data |= set_upper_bits(v, shift);
+					len -= l + extra - sizeof(u32);
+					if (len == length) {
+						last = data;
+						break;
+					}
+					__raw_writel(data, piobuf);
+					piobuf++;
+					extra = 0;
+					data = 0;
+				} else {
+					/* Clear unused upper bytes */
+					data |= clear_upper_bytes(v, l, extra);
+					if (len == length) {
+						last = data;
+						break;
+					}
+					extra += l;
+				}
+			} else if (len == length) {
+				last = data;
+				break;
+			}
+		} else if (len == length) {
+			u32 w;
+
+			/*
+			 * Need to round up for the last dword in the
+			 * packet.
+			 */
+			w = (len + 3) >> 2;
+			qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
+			piobuf += w - 1;
+			last = ((u32 *) ss->sge.vaddr)[w - 1];
+			break;
+		} else {
+			u32 w = len >> 2;
+
+			qib_pio_copy(piobuf, ss->sge.vaddr, w);
+			piobuf += w;
+
+			extra = len & (sizeof(u32) - 1);
+			if (extra) {
+				u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+				/* Clear unused upper bytes */
+				data = clear_upper_bytes(v, extra, 0);
+			}
+		}
+		update_sge(ss, len);
+		length -= len;
+	}
+	/* Update address before sending packet. */
+	update_sge(ss, length);
+	if (flush_wc) {
+		/* must flush early everything before trigger word */
+		qib_flush_wc();
+		__raw_writel(last, piobuf);
+		/* be sure trigger word is written */
+		qib_flush_wc();
+	} else
+		__raw_writel(last, piobuf);
+}
+
+static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
+					 struct qib_qp *qp, int *retp)
+{
+	struct qib_verbs_txreq *tx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	spin_lock(&dev->pending_lock);
+
+	if (!list_empty(&dev->txreq_free)) {
+		struct list_head *l = dev->txreq_free.next;
+
+		list_del(l);
+		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+		*retp = 0;
+	} else {
+		if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
+		    list_empty(&qp->iowait)) {
+			dev->n_txwait++;
+			qp->s_flags |= QIB_S_WAIT_TX;
+			list_add_tail(&qp->iowait, &dev->txwait);
+		}
+		tx = NULL;
+		qp->s_flags &= ~QIB_S_BUSY;
+		*retp = -EBUSY;
+	}
+
+	spin_unlock(&dev->pending_lock);
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
+	return tx;
+}
+
+void qib_put_txreq(struct qib_verbs_txreq *tx)
+{
+	struct qib_ibdev *dev;
+	struct qib_qp *qp;
+	unsigned long flags;
+
+	qp = tx->qp;
+	dev = to_idev(qp->ibqp.device);
+
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+	if (tx->mr) {
+		atomic_dec(&tx->mr->refcount);
+		tx->mr = NULL;
+	}
+	if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
+		tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
+		dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
+				 tx->txreq.addr, tx->hdr_dwords << 2,
+				 DMA_TO_DEVICE);
+		kfree(tx->align_buf);
+	}
+
+	spin_lock_irqsave(&dev->pending_lock, flags);
+
+	/* Put struct back on free list */
+	list_add(&tx->txreq.list, &dev->txreq_free);
+
+	if (!list_empty(&dev->txwait)) {
+		/* Wake up first QP wanting a free struct */
+		qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
+		list_del_init(&qp->iowait);
+		atomic_inc(&qp->refcount);
+		spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+		spin_lock_irqsave(&qp->s_lock, flags);
+		if (qp->s_flags & QIB_S_WAIT_TX) {
+			qp->s_flags &= ~QIB_S_WAIT_TX;
+			qib_schedule_send(qp);
+		}
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	} else
+		spin_unlock_irqrestore(&dev->pending_lock, flags);
+}
+
+/*
+ * This is called when there are send DMA descriptors that might be
+ * available.
+ *
+ * This is called with ppd->sdma_lock held.
+ */
+void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
+{
+	struct qib_qp *qp, *nqp;
+	struct qib_qp *qps[20];
+	struct qib_ibdev *dev;
+	unsigned i, n;
+
+	n = 0;
+	dev = &ppd->dd->verbs_dev;
+	spin_lock(&dev->pending_lock);
+
+	/* Search wait list for first QP wanting DMA descriptors. */
+	list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
+		if (qp->port_num != ppd->port)
+			continue;
+		if (n == ARRAY_SIZE(qps))
+			break;
+		if (qp->s_tx->txreq.sg_count > avail)
+			break;
+		avail -= qp->s_tx->txreq.sg_count;
+		list_del_init(&qp->iowait);
+		atomic_inc(&qp->refcount);
+		qps[n++] = qp;
+	}
+
+	spin_unlock(&dev->pending_lock);
+
+	for (i = 0; i < n; i++) {
+		qp = qps[i];
+		spin_lock(&qp->s_lock);
+		if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
+			qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
+			qib_schedule_send(qp);
+		}
+		spin_unlock(&qp->s_lock);
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+}
+
+/*
+ * This is called with ppd->sdma_lock held.
+ */
+static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
+{
+	struct qib_verbs_txreq *tx =
+		container_of(cookie, struct qib_verbs_txreq, txreq);
+	struct qib_qp *qp = tx->qp;
+
+	spin_lock(&qp->s_lock);
+	if (tx->wqe)
+		qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+	else if (qp->ibqp.qp_type == IB_QPT_RC) {
+		struct qib_ib_header *hdr;
+
+		if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
+			hdr = &tx->align_buf->hdr;
+		else {
+			struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+
+			hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
+		}
+		qib_rc_send_complete(qp, hdr);
+	}
+	if (atomic_dec_and_test(&qp->s_dma_busy)) {
+		if (qp->state == IB_QPS_RESET)
+			wake_up(&qp->wait_dma);
+		else if (qp->s_flags & QIB_S_WAIT_DMA) {
+			qp->s_flags &= ~QIB_S_WAIT_DMA;
+			qib_schedule_send(qp);
+		}
+	}
+	spin_unlock(&qp->s_lock);
+
+	qib_put_txreq(tx);
+}
+
+static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
+{
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&qp->s_lock, flags);
+	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+		spin_lock(&dev->pending_lock);
+		if (list_empty(&qp->iowait)) {
+			if (list_empty(&dev->memwait))
+				mod_timer(&dev->mem_timer, jiffies + 1);
+			qp->s_flags |= QIB_S_WAIT_KMEM;
+			list_add_tail(&qp->iowait, &dev->memwait);
+		}
+		spin_unlock(&dev->pending_lock);
+		qp->s_flags &= ~QIB_S_BUSY;
+		ret = -EBUSY;
+	}
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+
+	return ret;
+}
+
+static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
+			      u32 hdrwords, struct qib_sge_state *ss, u32 len,
+			      u32 plen, u32 dwords)
+{
+	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+	struct qib_devdata *dd = dd_from_dev(dev);
+	struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_verbs_txreq *tx;
+	struct qib_pio_header *phdr;
+	u32 control;
+	u32 ndesc;
+	int ret;
+
+	tx = qp->s_tx;
+	if (tx) {
+		qp->s_tx = NULL;
+		/* resend previously constructed packet */
+		ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
+		goto bail;
+	}
+
+	tx = get_txreq(dev, qp, &ret);
+	if (!tx)
+		goto bail;
+
+	control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
+				       be16_to_cpu(hdr->lrh[0]) >> 12);
+	tx->qp = qp;
+	atomic_inc(&qp->refcount);
+	tx->wqe = qp->s_wqe;
+	tx->mr = qp->s_rdma_mr;
+	if (qp->s_rdma_mr)
+		qp->s_rdma_mr = NULL;
+	tx->txreq.callback = sdma_complete;
+	if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
+		tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
+	else
+		tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
+	if (plen + 1 > dd->piosize2kmax_dwords)
+		tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
+
+	if (len) {
+		/*
+		 * Don't try to DMA if it takes more descriptors than
+		 * the queue holds.
+		 */
+		ndesc = qib_count_sge(ss, len);
+		if (ndesc >= ppd->sdma_descq_cnt)
+			ndesc = 0;
+	} else
+		ndesc = 1;
+	if (ndesc) {
+		phdr = &dev->pio_hdrs[tx->hdr_inx];
+		phdr->pbc[0] = cpu_to_le32(plen);
+		phdr->pbc[1] = cpu_to_le32(control);
+		memcpy(&phdr->hdr, hdr, hdrwords << 2);
+		tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
+		tx->txreq.sg_count = ndesc;
+		tx->txreq.addr = dev->pio_hdrs_phys +
+			tx->hdr_inx * sizeof(struct qib_pio_header);
+		tx->hdr_dwords = hdrwords + 2; /* add PBC length */
+		ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
+		goto bail;
+	}
+
+	/* Allocate a buffer and copy the header and payload to it. */
+	tx->hdr_dwords = plen + 1;
+	phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
+	if (!phdr)
+		goto err_tx;
+	phdr->pbc[0] = cpu_to_le32(plen);
+	phdr->pbc[1] = cpu_to_le32(control);
+	memcpy(&phdr->hdr, hdr, hdrwords << 2);
+	qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
+
+	tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
+					tx->hdr_dwords << 2, DMA_TO_DEVICE);
+	if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
+		goto map_err;
+	tx->align_buf = phdr;
+	tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
+	tx->txreq.sg_count = 1;
+	ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
+	goto unaligned;
+
+map_err:
+	kfree(phdr);
+err_tx:
+	qib_put_txreq(tx);
+	ret = wait_kmem(dev, qp);
+unaligned:
+	ibp->n_unaligned++;
+bail:
+	return ret;
+}
+
+/*
+ * If we are now in the error state, return zero to flush the
+ * send work request.
+ */
+static int no_bufs_available(struct qib_qp *qp)
+{
+	struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+	struct qib_devdata *dd;
+	unsigned long flags;
+	int ret = 0;
+
+	/*
+	 * Note that as soon as want_buffer() is called and
+	 * possibly before it returns, qib_ib_piobufavail()
+	 * could be called. Therefore, put QP on the I/O wait list before
+	 * enabling the PIO avail interrupt.
+	 */
+	spin_lock_irqsave(&qp->s_lock, flags);
+	if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+		spin_lock(&dev->pending_lock);
+		if (list_empty(&qp->iowait)) {
+			dev->n_piowait++;
+			qp->s_flags |= QIB_S_WAIT_PIO;
+			list_add_tail(&qp->iowait, &dev->piowait);
+			dd = dd_from_dev(dev);
+			dd->f_wantpiobuf_intr(dd, 1);
+		}
+		spin_unlock(&dev->pending_lock);
+		qp->s_flags &= ~QIB_S_BUSY;
+		ret = -EBUSY;
+	}
+	spin_unlock_irqrestore(&qp->s_lock, flags);
+	return ret;
+}
+
+static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
+			      u32 hdrwords, struct qib_sge_state *ss, u32 len,
+			      u32 plen, u32 dwords)
+{
+	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+	struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
+	u32 *hdr = (u32 *) ibhdr;
+	u32 __iomem *piobuf_orig;
+	u32 __iomem *piobuf;
+	u64 pbc;
+	unsigned long flags;
+	unsigned flush_wc;
+	u32 control;
+	u32 pbufn;
+
+	control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
+		be16_to_cpu(ibhdr->lrh[0]) >> 12);
+	pbc = ((u64) control << 32) | plen;
+	piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
+	if (unlikely(piobuf == NULL))
+		return no_bufs_available(qp);
+
+	/*
+	 * Write the pbc.
+	 * We have to flush after the PBC for correctness on some cpus
+	 * or WC buffer can be written out of order.
+	 */
+	writeq(pbc, piobuf);
+	piobuf_orig = piobuf;
+	piobuf += 2;
+
+	flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
+	if (len == 0) {
+		/*
+		 * If there is just the header portion, must flush before
+		 * writing last word of header for correctness, and after
+		 * the last header word (trigger word).
+		 */
+		if (flush_wc) {
+			qib_flush_wc();
+			qib_pio_copy(piobuf, hdr, hdrwords - 1);
+			qib_flush_wc();
+			__raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+			qib_flush_wc();
+		} else
+			qib_pio_copy(piobuf, hdr, hdrwords);
+		goto done;
+	}
+
+	if (flush_wc)
+		qib_flush_wc();
+	qib_pio_copy(piobuf, hdr, hdrwords);
+	piobuf += hdrwords;
+
+	/* The common case is aligned and contained in one segment. */
+	if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+		   !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+		u32 *addr = (u32 *) ss->sge.vaddr;
+
+		/* Update address before sending packet. */
+		update_sge(ss, len);
+		if (flush_wc) {
+			qib_pio_copy(piobuf, addr, dwords - 1);
+			/* must flush early everything before trigger word */
+			qib_flush_wc();
+			__raw_writel(addr[dwords - 1], piobuf + dwords - 1);
+			/* be sure trigger word is written */
+			qib_flush_wc();
+		} else
+			qib_pio_copy(piobuf, addr, dwords);
+		goto done;
+	}
+	copy_io(piobuf, ss, len, flush_wc);
+done:
+	if (dd->flags & QIB_USE_SPCL_TRIG) {
+		u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
+		qib_flush_wc();
+		__raw_writel(0xaebecede, piobuf_orig + spcl_off);
+	}
+	qib_sendbuf_done(dd, pbufn);
+	if (qp->s_rdma_mr) {
+		atomic_dec(&qp->s_rdma_mr->refcount);
+		qp->s_rdma_mr = NULL;
+	}
+	if (qp->s_wqe) {
+		spin_lock_irqsave(&qp->s_lock, flags);
+		qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+	} else if (qp->ibqp.qp_type == IB_QPT_RC) {
+		spin_lock_irqsave(&qp->s_lock, flags);
+		qib_rc_send_complete(qp, ibhdr);
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+	}
+	return 0;
+}
+
+/**
+ * qib_verbs_send - send a packet
+ * @qp: the QP to send on
+ * @hdr: the packet header
+ * @hdrwords: the number of 32-bit words in the header
+ * @ss: the SGE to send
+ * @len: the length of the packet in bytes
+ *
+ * Return zero if packet is sent or queued OK.
+ * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
+ */
+int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
+		   u32 hdrwords, struct qib_sge_state *ss, u32 len)
+{
+	struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+	u32 plen;
+	int ret;
+	u32 dwords = (len + 3) >> 2;
+
+	/*
+	 * Calculate the send buffer trigger address.
+	 * The +1 counts for the pbc control dword following the pbc length.
+	 */
+	plen = hdrwords + dwords + 1;
+
+	/*
+	 * VL15 packets (IB_QPT_SMI) will always use PIO, so we
+	 * can defer SDMA restart until link goes ACTIVE without
+	 * worrying about just how we got there.
+	 */
+	if (qp->ibqp.qp_type == IB_QPT_SMI ||
+	    !(dd->flags & QIB_HAS_SEND_DMA))
+		ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+					 plen, dwords);
+	else
+		ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
+					 plen, dwords);
+
+	return ret;
+}
+
+int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
+			  u64 *rwords, u64 *spkts, u64 *rpkts,
+			  u64 *xmit_wait)
+{
+	int ret;
+	struct qib_devdata *dd = ppd->dd;
+
+	if (!(dd->flags & QIB_PRESENT)) {
+		/* no hardware, freeze, etc. */
+		ret = -EINVAL;
+		goto bail;
+	}
+	*swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
+	*rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
+	*spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
+	*rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
+	*xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * qib_get_counters - get various chip counters
+ * @dd: the qlogic_ib device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int qib_get_counters(struct qib_pportdata *ppd,
+		     struct qib_verbs_counters *cntrs)
+{
+	int ret;
+
+	if (!(ppd->dd->flags & QIB_PRESENT)) {
+		/* no hardware, freeze, etc. */
+		ret = -EINVAL;
+		goto bail;
+	}
+	cntrs->symbol_error_counter =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
+	cntrs->link_error_recovery_counter =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
+	/*
+	 * The link downed counter counts when the other side downs the
+	 * connection.  We add in the number of times we downed the link
+	 * due to local link integrity errors to compensate.
+	 */
+	cntrs->link_downed_counter =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
+	cntrs->port_rcv_errors =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
+	cntrs->port_rcv_errors +=
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
+	cntrs->port_rcv_errors +=
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
+	cntrs->port_rcv_remphys_errors =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
+	cntrs->port_xmit_discards =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
+	cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
+			QIBPORTCNTR_WORDSEND);
+	cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
+			QIBPORTCNTR_WORDRCV);
+	cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
+			QIBPORTCNTR_PKTSEND);
+	cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
+			QIBPORTCNTR_PKTRCV);
+	cntrs->local_link_integrity_errors =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
+	cntrs->excessive_buffer_overrun_errors =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
+	cntrs->vl15_dropped =
+		ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * qib_ib_piobufavail - callback when a PIO buffer is available
+ * @dd: the device pointer
+ *
+ * This is called from qib_intr() at interrupt level when a PIO buffer is
+ * available after qib_verbs_send() returned an error that no buffers were
+ * available. Disable the interrupt if there are no more QPs waiting.
+ */
+void qib_ib_piobufavail(struct qib_devdata *dd)
+{
+	struct qib_ibdev *dev = &dd->verbs_dev;
+	struct list_head *list;
+	struct qib_qp *qps[5];
+	struct qib_qp *qp;
+	unsigned long flags;
+	unsigned i, n;
+
+	list = &dev->piowait;
+	n = 0;
+
+	/*
+	 * Note: checking that the piowait list is empty and clearing
+	 * the buffer available interrupt needs to be atomic or we
+	 * could end up with QPs on the wait list with the interrupt
+	 * disabled.
+	 */
+	spin_lock_irqsave(&dev->pending_lock, flags);
+	while (!list_empty(list)) {
+		if (n == ARRAY_SIZE(qps))
+			goto full;
+		qp = list_entry(list->next, struct qib_qp, iowait);
+		list_del_init(&qp->iowait);
+		atomic_inc(&qp->refcount);
+		qps[n++] = qp;
+	}
+	dd->f_wantpiobuf_intr(dd, 0);
+full:
+	spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+	for (i = 0; i < n; i++) {
+		qp = qps[i];
+
+		spin_lock_irqsave(&qp->s_lock, flags);
+		if (qp->s_flags & QIB_S_WAIT_PIO) {
+			qp->s_flags &= ~QIB_S_WAIT_PIO;
+			qib_schedule_send(qp);
+		}
+		spin_unlock_irqrestore(&qp->s_lock, flags);
+
+		/* Notify qib_destroy_qp() if it is waiting. */
+		if (atomic_dec_and_test(&qp->refcount))
+			wake_up(&qp->wait);
+	}
+}
+
+static int qib_query_device(struct ib_device *ibdev,
+			    struct ib_device_attr *props)
+{
+	struct qib_devdata *dd = dd_from_ibdev(ibdev);
+	struct qib_ibdev *dev = to_idev(ibdev);
+
+	memset(props, 0, sizeof(*props));
+
+	props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+		IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+		IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+		IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+	props->page_size_cap = PAGE_SIZE;
+	props->vendor_id =
+		QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
+	props->vendor_part_id = dd->deviceid;
+	props->hw_ver = dd->minrev;
+	props->sys_image_guid = ib_qib_sys_image_guid;
+	props->max_mr_size = ~0ULL;
+	props->max_qp = ib_qib_max_qps;
+	props->max_qp_wr = ib_qib_max_qp_wrs;
+	props->max_sge = ib_qib_max_sges;
+	props->max_cq = ib_qib_max_cqs;
+	props->max_ah = ib_qib_max_ahs;
+	props->max_cqe = ib_qib_max_cqes;
+	props->max_mr = dev->lk_table.max;
+	props->max_fmr = dev->lk_table.max;
+	props->max_map_per_fmr = 32767;
+	props->max_pd = ib_qib_max_pds;
+	props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
+	props->max_qp_init_rd_atom = 255;
+	/* props->max_res_rd_atom */
+	props->max_srq = ib_qib_max_srqs;
+	props->max_srq_wr = ib_qib_max_srq_wrs;
+	props->max_srq_sge = ib_qib_max_srq_sges;
+	/* props->local_ca_ack_delay */
+	props->atomic_cap = IB_ATOMIC_GLOB;
+	props->max_pkeys = qib_get_npkeys(dd);
+	props->max_mcast_grp = ib_qib_max_mcast_grps;
+	props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
+	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+		props->max_mcast_grp;
+
+	return 0;
+}
+
+static int qib_query_port(struct ib_device *ibdev, u8 port,
+			  struct ib_port_attr *props)
+{
+	struct qib_devdata *dd = dd_from_ibdev(ibdev);
+	struct qib_ibport *ibp = to_iport(ibdev, port);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	enum ib_mtu mtu;
+	u16 lid = ppd->lid;
+
+	memset(props, 0, sizeof(*props));
+	props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
+	props->lmc = ppd->lmc;
+	props->sm_lid = ibp->sm_lid;
+	props->sm_sl = ibp->sm_sl;
+	props->state = dd->f_iblink_state(ppd->lastibcstat);
+	props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
+	props->port_cap_flags = ibp->port_cap_flags;
+	props->gid_tbl_len = QIB_GUIDS_PER_PORT;
+	props->max_msg_sz = 0x80000000;
+	props->pkey_tbl_len = qib_get_npkeys(dd);
+	props->bad_pkey_cntr = ibp->pkey_violations;
+	props->qkey_viol_cntr = ibp->qkey_violations;
+	props->active_width = ppd->link_width_active;
+	/* See rate_show() */
+	props->active_speed = ppd->link_speed_active;
+	props->max_vl_num = qib_num_vls(ppd->vls_supported);
+	props->init_type_reply = 0;
+
+	props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
+	switch (ppd->ibmtu) {
+	case 4096:
+		mtu = IB_MTU_4096;
+		break;
+	case 2048:
+		mtu = IB_MTU_2048;
+		break;
+	case 1024:
+		mtu = IB_MTU_1024;
+		break;
+	case 512:
+		mtu = IB_MTU_512;
+		break;
+	case 256:
+		mtu = IB_MTU_256;
+		break;
+	default:
+		mtu = IB_MTU_2048;
+	}
+	props->active_mtu = mtu;
+	props->subnet_timeout = ibp->subnet_timeout;
+
+	return 0;
+}
+
+static int qib_modify_device(struct ib_device *device,
+			     int device_modify_mask,
+			     struct ib_device_modify *device_modify)
+{
+	struct qib_devdata *dd = dd_from_ibdev(device);
+	unsigned i;
+	int ret;
+
+	if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
+				   IB_DEVICE_MODIFY_NODE_DESC)) {
+		ret = -EOPNOTSUPP;
+		goto bail;
+	}
+
+	if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
+		memcpy(device->node_desc, device_modify->node_desc, 64);
+		for (i = 0; i < dd->num_pports; i++) {
+			struct qib_ibport *ibp = &dd->pport[i].ibport_data;
+
+			qib_node_desc_chg(ibp);
+		}
+	}
+
+	if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
+		ib_qib_sys_image_guid =
+			cpu_to_be64(device_modify->sys_image_guid);
+		for (i = 0; i < dd->num_pports; i++) {
+			struct qib_ibport *ibp = &dd->pport[i].ibport_data;
+
+			qib_sys_guid_chg(ibp);
+		}
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+static int qib_modify_port(struct ib_device *ibdev, u8 port,
+			   int port_modify_mask, struct ib_port_modify *props)
+{
+	struct qib_ibport *ibp = to_iport(ibdev, port);
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+	ibp->port_cap_flags |= props->set_port_cap_mask;
+	ibp->port_cap_flags &= ~props->clr_port_cap_mask;
+	if (props->set_port_cap_mask || props->clr_port_cap_mask)
+		qib_cap_mask_chg(ibp);
+	if (port_modify_mask & IB_PORT_SHUTDOWN)
+		qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
+	if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
+		ibp->qkey_violations = 0;
+	return 0;
+}
+
+static int qib_query_gid(struct ib_device *ibdev, u8 port,
+			 int index, union ib_gid *gid)
+{
+	struct qib_devdata *dd = dd_from_ibdev(ibdev);
+	int ret = 0;
+
+	if (!port || port > dd->num_pports)
+		ret = -EINVAL;
+	else {
+		struct qib_ibport *ibp = to_iport(ibdev, port);
+		struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+		gid->global.subnet_prefix = ibp->gid_prefix;
+		if (index == 0)
+			gid->global.interface_id = ppd->guid;
+		else if (index < QIB_GUIDS_PER_PORT)
+			gid->global.interface_id = ibp->guids[index - 1];
+		else
+			ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
+				  struct ib_ucontext *context,
+				  struct ib_udata *udata)
+{
+	struct qib_ibdev *dev = to_idev(ibdev);
+	struct qib_pd *pd;
+	struct ib_pd *ret;
+
+	/*
+	 * This is actually totally arbitrary.  Some correctness tests
+	 * assume there's a maximum number of PDs that can be allocated.
+	 * We don't actually have this limit, but we fail the test if
+	 * we allow allocations of more than we report for this value.
+	 */
+
+	pd = kmalloc(sizeof *pd, GFP_KERNEL);
+	if (!pd) {
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	spin_lock(&dev->n_pds_lock);
+	if (dev->n_pds_allocated == ib_qib_max_pds) {
+		spin_unlock(&dev->n_pds_lock);
+		kfree(pd);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	dev->n_pds_allocated++;
+	spin_unlock(&dev->n_pds_lock);
+
+	/* ib_alloc_pd() will initialize pd->ibpd. */
+	pd->user = udata != NULL;
+
+	ret = &pd->ibpd;
+
+bail:
+	return ret;
+}
+
+static int qib_dealloc_pd(struct ib_pd *ibpd)
+{
+	struct qib_pd *pd = to_ipd(ibpd);
+	struct qib_ibdev *dev = to_idev(ibpd->device);
+
+	spin_lock(&dev->n_pds_lock);
+	dev->n_pds_allocated--;
+	spin_unlock(&dev->n_pds_lock);
+
+	kfree(pd);
+
+	return 0;
+}
+
+int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+{
+	/* A multicast address requires a GRH (see ch. 8.4.1). */
+	if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
+	    ah_attr->dlid != QIB_PERMISSIVE_LID &&
+	    !(ah_attr->ah_flags & IB_AH_GRH))
+		goto bail;
+	if ((ah_attr->ah_flags & IB_AH_GRH) &&
+	    ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
+		goto bail;
+	if (ah_attr->dlid == 0)
+		goto bail;
+	if (ah_attr->port_num < 1 ||
+	    ah_attr->port_num > ibdev->phys_port_cnt)
+		goto bail;
+	if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
+	    ib_rate_to_mult(ah_attr->static_rate) < 0)
+		goto bail;
+	if (ah_attr->sl > 15)
+		goto bail;
+	return 0;
+bail:
+	return -EINVAL;
+}
+
+/**
+ * qib_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ *
+ * This may be called from interrupt context.
+ */
+static struct ib_ah *qib_create_ah(struct ib_pd *pd,
+				   struct ib_ah_attr *ah_attr)
+{
+	struct qib_ah *ah;
+	struct ib_ah *ret;
+	struct qib_ibdev *dev = to_idev(pd->device);
+	unsigned long flags;
+
+	if (qib_check_ah(pd->device, ah_attr)) {
+		ret = ERR_PTR(-EINVAL);
+		goto bail;
+	}
+
+	ah = kmalloc(sizeof *ah, GFP_ATOMIC);
+	if (!ah) {
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
+	if (dev->n_ahs_allocated == ib_qib_max_ahs) {
+		spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+		kfree(ah);
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	dev->n_ahs_allocated++;
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+	/* ib_create_ah() will initialize ah->ibah. */
+	ah->attr = *ah_attr;
+	atomic_set(&ah->refcount, 0);
+
+	ret = &ah->ibah;
+
+bail:
+	return ret;
+}
+
+/**
+ * qib_destroy_ah - destroy an address handle
+ * @ibah: the AH to destroy
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_destroy_ah(struct ib_ah *ibah)
+{
+	struct qib_ibdev *dev = to_idev(ibah->device);
+	struct qib_ah *ah = to_iah(ibah);
+	unsigned long flags;
+
+	if (atomic_read(&ah->refcount) != 0)
+		return -EBUSY;
+
+	spin_lock_irqsave(&dev->n_ahs_lock, flags);
+	dev->n_ahs_allocated--;
+	spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+	kfree(ah);
+
+	return 0;
+}
+
+static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct qib_ah *ah = to_iah(ibah);
+
+	if (qib_check_ah(ibah->device, ah_attr))
+		return -EINVAL;
+
+	ah->attr = *ah_attr;
+
+	return 0;
+}
+
+static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+	struct qib_ah *ah = to_iah(ibah);
+
+	*ah_attr = ah->attr;
+
+	return 0;
+}
+
+/**
+ * qib_get_npkeys - return the size of the PKEY table for context 0
+ * @dd: the qlogic_ib device
+ */
+unsigned qib_get_npkeys(struct qib_devdata *dd)
+{
+	return ARRAY_SIZE(dd->rcd[0]->pkeys);
+}
+
+/*
+ * Return the indexed PKEY from the port PKEY table.
+ * No need to validate rcd[ctxt]; the port is setup if we are here.
+ */
+unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
+{
+	struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+	struct qib_devdata *dd = ppd->dd;
+	unsigned ctxt = ppd->hw_pidx;
+	unsigned ret;
+
+	/* dd->rcd null if mini_init or some init failures */
+	if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
+		ret = 0;
+	else
+		ret = dd->rcd[ctxt]->pkeys[index];
+
+	return ret;
+}
+
+static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+			  u16 *pkey)
+{
+	struct qib_devdata *dd = dd_from_ibdev(ibdev);
+	int ret;
+
+	if (index >= qib_get_npkeys(dd)) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	*pkey = qib_get_pkey(to_iport(ibdev, port), index);
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+/**
+ * qib_alloc_ucontext - allocate a ucontest
+ * @ibdev: the infiniband device
+ * @udata: not used by the QLogic_IB driver
+ */
+
+static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
+					      struct ib_udata *udata)
+{
+	struct qib_ucontext *context;
+	struct ib_ucontext *ret;
+
+	context = kmalloc(sizeof *context, GFP_KERNEL);
+	if (!context) {
+		ret = ERR_PTR(-ENOMEM);
+		goto bail;
+	}
+
+	ret = &context->ibucontext;
+
+bail:
+	return ret;
+}
+
+static int qib_dealloc_ucontext(struct ib_ucontext *context)
+{
+	kfree(to_iucontext(context));
+	return 0;
+}
+
+static void init_ibport(struct qib_pportdata *ppd)
+{
+	struct qib_verbs_counters cntrs;
+	struct qib_ibport *ibp = &ppd->ibport_data;
+
+	spin_lock_init(&ibp->lock);
+	/* Set the prefix to the default value (see ch. 4.1.1) */
+	ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
+	ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
+	ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
+		IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
+		IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
+		IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
+		IB_PORT_OTHER_LOCAL_CHANGES_SUP;
+	if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
+		ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
+	ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+	ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+	ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+	ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+	ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+
+	/* Snapshot current HW counters to "clear" them. */
+	qib_get_counters(ppd, &cntrs);
+	ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
+	ibp->z_link_error_recovery_counter =
+		cntrs.link_error_recovery_counter;
+	ibp->z_link_downed_counter = cntrs.link_downed_counter;
+	ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
+	ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
+	ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
+	ibp->z_port_xmit_data = cntrs.port_xmit_data;
+	ibp->z_port_rcv_data = cntrs.port_rcv_data;
+	ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
+	ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
+	ibp->z_local_link_integrity_errors =
+		cntrs.local_link_integrity_errors;
+	ibp->z_excessive_buffer_overrun_errors =
+		cntrs.excessive_buffer_overrun_errors;
+	ibp->z_vl15_dropped = cntrs.vl15_dropped;
+}
+
+/**
+ * qib_register_ib_device - register our device with the infiniband core
+ * @dd: the device data structure
+ * Return the allocated qib_ibdev pointer or NULL on error.
+ */
+int qib_register_ib_device(struct qib_devdata *dd)
+{
+	struct qib_ibdev *dev = &dd->verbs_dev;
+	struct ib_device *ibdev = &dev->ibdev;
+	struct qib_pportdata *ppd = dd->pport;
+	unsigned i, lk_tab_size;
+	int ret;
+
+	dev->qp_table_size = ib_qib_qp_table_size;
+	dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table,
+				GFP_KERNEL);
+	if (!dev->qp_table) {
+		ret = -ENOMEM;
+		goto err_qpt;
+	}
+
+	for (i = 0; i < dd->num_pports; i++)
+		init_ibport(ppd + i);
+
+	/* Only need to initialize non-zero fields. */
+	spin_lock_init(&dev->qpt_lock);
+	spin_lock_init(&dev->n_pds_lock);
+	spin_lock_init(&dev->n_ahs_lock);
+	spin_lock_init(&dev->n_cqs_lock);
+	spin_lock_init(&dev->n_qps_lock);
+	spin_lock_init(&dev->n_srqs_lock);
+	spin_lock_init(&dev->n_mcast_grps_lock);
+	init_timer(&dev->mem_timer);
+	dev->mem_timer.function = mem_timer;
+	dev->mem_timer.data = (unsigned long) dev;
+
+	qib_init_qpn_table(dd, &dev->qpn_table);
+
+	/*
+	 * The top ib_qib_lkey_table_size bits are used to index the
+	 * table.  The lower 8 bits can be owned by the user (copied from
+	 * the LKEY).  The remaining bits act as a generation number or tag.
+	 */
+	spin_lock_init(&dev->lk_table.lock);
+	dev->lk_table.max = 1 << ib_qib_lkey_table_size;
+	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
+	dev->lk_table.table = (struct qib_mregion **)
+		__get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
+	if (dev->lk_table.table == NULL) {
+		ret = -ENOMEM;
+		goto err_lk;
+	}
+	memset(dev->lk_table.table, 0, lk_tab_size);
+	INIT_LIST_HEAD(&dev->pending_mmaps);
+	spin_lock_init(&dev->pending_lock);
+	dev->mmap_offset = PAGE_SIZE;
+	spin_lock_init(&dev->mmap_offset_lock);
+	INIT_LIST_HEAD(&dev->piowait);
+	INIT_LIST_HEAD(&dev->dmawait);
+	INIT_LIST_HEAD(&dev->txwait);
+	INIT_LIST_HEAD(&dev->memwait);
+	INIT_LIST_HEAD(&dev->txreq_free);
+
+	if (ppd->sdma_descq_cnt) {
+		dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
+						ppd->sdma_descq_cnt *
+						sizeof(struct qib_pio_header),
+						&dev->pio_hdrs_phys,
+						GFP_KERNEL);
+		if (!dev->pio_hdrs) {
+			ret = -ENOMEM;
+			goto err_hdrs;
+		}
+	}
+
+	for (i = 0; i < ppd->sdma_descq_cnt; i++) {
+		struct qib_verbs_txreq *tx;
+
+		tx = kzalloc(sizeof *tx, GFP_KERNEL);
+		if (!tx) {
+			ret = -ENOMEM;
+			goto err_tx;
+		}
+		tx->hdr_inx = i;
+		list_add(&tx->txreq.list, &dev->txreq_free);
+	}
+
+	/*
+	 * The system image GUID is supposed to be the same for all
+	 * IB HCAs in a single system but since there can be other
+	 * device types in the system, we can't be sure this is unique.
+	 */
+	if (!ib_qib_sys_image_guid)
+		ib_qib_sys_image_guid = ppd->guid;
+
+	strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
+	ibdev->owner = THIS_MODULE;
+	ibdev->node_guid = ppd->guid;
+	ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION;
+	ibdev->uverbs_cmd_mask =
+		(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)         |
+		(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)        |
+		(1ull << IB_USER_VERBS_CMD_QUERY_PORT)          |
+		(1ull << IB_USER_VERBS_CMD_ALLOC_PD)            |
+		(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)          |
+		(1ull << IB_USER_VERBS_CMD_CREATE_AH)           |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_AH)           |
+		(1ull << IB_USER_VERBS_CMD_QUERY_AH)            |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_AH)          |
+		(1ull << IB_USER_VERBS_CMD_REG_MR)              |
+		(1ull << IB_USER_VERBS_CMD_DEREG_MR)            |
+		(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+		(1ull << IB_USER_VERBS_CMD_CREATE_CQ)           |
+		(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)          |
+		(1ull << IB_USER_VERBS_CMD_POLL_CQ)             |
+		(1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ)       |
+		(1ull << IB_USER_VERBS_CMD_CREATE_QP)           |
+		(1ull << IB_USER_VERBS_CMD_QUERY_QP)            |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_QP)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_QP)          |
+		(1ull << IB_USER_VERBS_CMD_POST_SEND)           |
+		(1ull << IB_USER_VERBS_CMD_POST_RECV)           |
+		(1ull << IB_USER_VERBS_CMD_ATTACH_MCAST)        |
+		(1ull << IB_USER_VERBS_CMD_DETACH_MCAST)        |
+		(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)          |
+		(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)          |
+		(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)           |
+		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)         |
+		(1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+	ibdev->node_type = RDMA_NODE_IB_CA;
+	ibdev->phys_port_cnt = dd->num_pports;
+	ibdev->num_comp_vectors = 1;
+	ibdev->dma_device = &dd->pcidev->dev;
+	ibdev->query_device = qib_query_device;
+	ibdev->modify_device = qib_modify_device;
+	ibdev->query_port = qib_query_port;
+	ibdev->modify_port = qib_modify_port;
+	ibdev->query_pkey = qib_query_pkey;
+	ibdev->query_gid = qib_query_gid;
+	ibdev->alloc_ucontext = qib_alloc_ucontext;
+	ibdev->dealloc_ucontext = qib_dealloc_ucontext;
+	ibdev->alloc_pd = qib_alloc_pd;
+	ibdev->dealloc_pd = qib_dealloc_pd;
+	ibdev->create_ah = qib_create_ah;
+	ibdev->destroy_ah = qib_destroy_ah;
+	ibdev->modify_ah = qib_modify_ah;
+	ibdev->query_ah = qib_query_ah;
+	ibdev->create_srq = qib_create_srq;
+	ibdev->modify_srq = qib_modify_srq;
+	ibdev->query_srq = qib_query_srq;
+	ibdev->destroy_srq = qib_destroy_srq;
+	ibdev->create_qp = qib_create_qp;
+	ibdev->modify_qp = qib_modify_qp;
+	ibdev->query_qp = qib_query_qp;
+	ibdev->destroy_qp = qib_destroy_qp;
+	ibdev->post_send = qib_post_send;
+	ibdev->post_recv = qib_post_receive;
+	ibdev->post_srq_recv = qib_post_srq_receive;
+	ibdev->create_cq = qib_create_cq;
+	ibdev->destroy_cq = qib_destroy_cq;
+	ibdev->resize_cq = qib_resize_cq;
+	ibdev->poll_cq = qib_poll_cq;
+	ibdev->req_notify_cq = qib_req_notify_cq;
+	ibdev->get_dma_mr = qib_get_dma_mr;
+	ibdev->reg_phys_mr = qib_reg_phys_mr;
+	ibdev->reg_user_mr = qib_reg_user_mr;
+	ibdev->dereg_mr = qib_dereg_mr;
+	ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr;
+	ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
+	ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
+	ibdev->alloc_fmr = qib_alloc_fmr;
+	ibdev->map_phys_fmr = qib_map_phys_fmr;
+	ibdev->unmap_fmr = qib_unmap_fmr;
+	ibdev->dealloc_fmr = qib_dealloc_fmr;
+	ibdev->attach_mcast = qib_multicast_attach;
+	ibdev->detach_mcast = qib_multicast_detach;
+	ibdev->process_mad = qib_process_mad;
+	ibdev->mmap = qib_mmap;
+	ibdev->dma_ops = &qib_dma_mapping_ops;
+
+	snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
+		 QIB_IDSTR " %s", init_utsname()->nodename);
+
+	ret = ib_register_device(ibdev, qib_create_port_files);
+	if (ret)
+		goto err_reg;
+
+	ret = qib_create_agents(dev);
+	if (ret)
+		goto err_agents;
+
+	if (qib_verbs_register_sysfs(dd))
+		goto err_class;
+
+	goto bail;
+
+err_class:
+	qib_free_agents(dev);
+err_agents:
+	ib_unregister_device(ibdev);
+err_reg:
+err_tx:
+	while (!list_empty(&dev->txreq_free)) {
+		struct list_head *l = dev->txreq_free.next;
+		struct qib_verbs_txreq *tx;
+
+		list_del(l);
+		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+		kfree(tx);
+	}
+	if (ppd->sdma_descq_cnt)
+		dma_free_coherent(&dd->pcidev->dev,
+				  ppd->sdma_descq_cnt *
+					sizeof(struct qib_pio_header),
+				  dev->pio_hdrs, dev->pio_hdrs_phys);
+err_hdrs:
+	free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size));
+err_lk:
+	kfree(dev->qp_table);
+err_qpt:
+	qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
+bail:
+	return ret;
+}
+
+void qib_unregister_ib_device(struct qib_devdata *dd)
+{
+	struct qib_ibdev *dev = &dd->verbs_dev;
+	struct ib_device *ibdev = &dev->ibdev;
+	u32 qps_inuse;
+	unsigned lk_tab_size;
+
+	qib_verbs_unregister_sysfs(dd);
+
+	qib_free_agents(dev);
+
+	ib_unregister_device(ibdev);
+
+	if (!list_empty(&dev->piowait))
+		qib_dev_err(dd, "piowait list not empty!\n");
+	if (!list_empty(&dev->dmawait))
+		qib_dev_err(dd, "dmawait list not empty!\n");
+	if (!list_empty(&dev->txwait))
+		qib_dev_err(dd, "txwait list not empty!\n");
+	if (!list_empty(&dev->memwait))
+		qib_dev_err(dd, "memwait list not empty!\n");
+	if (dev->dma_mr)
+		qib_dev_err(dd, "DMA MR not NULL!\n");
+
+	qps_inuse = qib_free_all_qps(dd);
+	if (qps_inuse)
+		qib_dev_err(dd, "QP memory leak! %u still in use\n",
+			    qps_inuse);
+
+	del_timer_sync(&dev->mem_timer);
+	qib_free_qpn_table(&dev->qpn_table);
+	while (!list_empty(&dev->txreq_free)) {
+		struct list_head *l = dev->txreq_free.next;
+		struct qib_verbs_txreq *tx;
+
+		list_del(l);
+		tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+		kfree(tx);
+	}
+	if (dd->pport->sdma_descq_cnt)
+		dma_free_coherent(&dd->pcidev->dev,
+				  dd->pport->sdma_descq_cnt *
+					sizeof(struct qib_pio_header),
+				  dev->pio_hdrs, dev->pio_hdrs_phys);
+	lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
+	free_pages((unsigned long) dev->lk_table.table,
+		   get_order(lk_tab_size));
+	kfree(dev->qp_table);
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 46/52] IB/qib: Add qib_verbs.h
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_verbs.h file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_verbs.h | 1099 +++++++++++++++++++++++++++++++++
 1 files changed, 1099 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_verbs.h

diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
new file mode 100644
index 0000000..fd972e0
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -0,0 +1,1099 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef QIB_VERBS_H
+#define QIB_VERBS_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
+
+struct qib_ctxtdata;
+struct qib_pportdata;
+struct qib_devdata;
+struct qib_verbs_txreq;
+
+#define QIB_MAX_RDMA_ATOMIC     16
+#define QIB_GUIDS_PER_PORT	5
+
+#define QPN_MAX                 (1 << 24)
+#define QPNMAP_ENTRIES          (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define QIB_UVERBS_ABI_VERSION       2
+
+/*
+ * Define an ib_cq_notify value that is not valid so we know when CQ
+ * notifications are armed.
+ */
+#define IB_CQ_NONE      (IB_CQ_NEXT_COMP + 1)
+
+#define IB_SEQ_NAK	(3 << 29)
+
+/* AETH NAK opcode values */
+#define IB_RNR_NAK                      0x20
+#define IB_NAK_PSN_ERROR                0x60
+#define IB_NAK_INVALID_REQUEST          0x61
+#define IB_NAK_REMOTE_ACCESS_ERROR      0x62
+#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
+#define IB_NAK_INVALID_RD_REQUEST       0x64
+
+/* Flags for checking QP state (see ib_qib_state_ops[]) */
+#define QIB_POST_SEND_OK                0x01
+#define QIB_POST_RECV_OK                0x02
+#define QIB_PROCESS_RECV_OK             0x04
+#define QIB_PROCESS_SEND_OK             0x08
+#define QIB_PROCESS_NEXT_SEND_OK        0x10
+#define QIB_FLUSH_SEND			0x20
+#define QIB_FLUSH_RECV			0x40
+#define QIB_PROCESS_OR_FLUSH_SEND \
+	(QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND)
+
+/* IB Performance Manager status values */
+#define IB_PMA_SAMPLE_STATUS_DONE       0x00
+#define IB_PMA_SAMPLE_STATUS_STARTED    0x01
+#define IB_PMA_SAMPLE_STATUS_RUNNING    0x02
+
+/* Mandatory IB performance counter select values. */
+#define IB_PMA_PORT_XMIT_DATA   cpu_to_be16(0x0001)
+#define IB_PMA_PORT_RCV_DATA    cpu_to_be16(0x0002)
+#define IB_PMA_PORT_XMIT_PKTS   cpu_to_be16(0x0003)
+#define IB_PMA_PORT_RCV_PKTS    cpu_to_be16(0x0004)
+#define IB_PMA_PORT_XMIT_WAIT   cpu_to_be16(0x0005)
+
+#define QIB_VENDOR_IPG		cpu_to_be16(0xFFA0)
+
+#define IB_BTH_REQ_ACK		(1 << 31)
+#define IB_BTH_SOLICITED	(1 << 23)
+#define IB_BTH_MIG_REQ		(1 << 22)
+
+/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
+#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
+
+#define IB_GRH_VERSION		6
+#define IB_GRH_VERSION_MASK	0xF
+#define IB_GRH_VERSION_SHIFT	28
+#define IB_GRH_TCLASS_MASK	0xFF
+#define IB_GRH_TCLASS_SHIFT	20
+#define IB_GRH_FLOW_MASK	0xFFFFF
+#define IB_GRH_FLOW_SHIFT	0
+#define IB_GRH_NEXT_HDR		0x1B
+
+#define IB_DEFAULT_GID_PREFIX	cpu_to_be64(0xfe80000000000000ULL)
+
+/* Values for set/get portinfo VLCap OperationalVLs */
+#define IB_VL_VL0       1
+#define IB_VL_VL0_1     2
+#define IB_VL_VL0_3     3
+#define IB_VL_VL0_7     4
+#define IB_VL_VL0_14    5
+
+static inline int qib_num_vls(int vls)
+{
+	switch (vls) {
+	default:
+	case IB_VL_VL0:
+		return 1;
+	case IB_VL_VL0_1:
+		return 2;
+	case IB_VL_VL0_3:
+		return 4;
+	case IB_VL_VL0_7:
+		return 8;
+	case IB_VL_VL0_14:
+		return 15;
+	}
+}
+
+struct ib_reth {
+	__be64 vaddr;
+	__be32 rkey;
+	__be32 length;
+} __attribute__ ((packed));
+
+struct ib_atomic_eth {
+	__be32 vaddr[2];        /* unaligned so access as 2 32-bit words */
+	__be32 rkey;
+	__be64 swap_data;
+	__be64 compare_data;
+} __attribute__ ((packed));
+
+struct qib_other_headers {
+	__be32 bth[3];
+	union {
+		struct {
+			__be32 deth[2];
+			__be32 imm_data;
+		} ud;
+		struct {
+			struct ib_reth reth;
+			__be32 imm_data;
+		} rc;
+		struct {
+			__be32 aeth;
+			__be32 atomic_ack_eth[2];
+		} at;
+		__be32 imm_data;
+		__be32 aeth;
+		struct ib_atomic_eth atomic_eth;
+	} u;
+} __attribute__ ((packed));
+
+/*
+ * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
+ * long (72 w/ imm_data).  Only the first 56 bytes of the IB header
+ * will be in the eager header buffer.  The remaining 12 or 16 bytes
+ * are in the data buffer.
+ */
+struct qib_ib_header {
+	__be16 lrh[4];
+	union {
+		struct {
+			struct ib_grh grh;
+			struct qib_other_headers oth;
+		} l;
+		struct qib_other_headers oth;
+	} u;
+} __attribute__ ((packed));
+
+struct qib_pio_header {
+	__le32 pbc[2];
+	struct qib_ib_header hdr;
+} __attribute__ ((packed));
+
+/*
+ * There is one struct qib_mcast for each multicast GID.
+ * All attached QPs are then stored as a list of
+ * struct qib_mcast_qp.
+ */
+struct qib_mcast_qp {
+	struct list_head list;
+	struct qib_qp *qp;
+};
+
+struct qib_mcast {
+	struct rb_node rb_node;
+	union ib_gid mgid;
+	struct list_head qp_list;
+	wait_queue_head_t wait;
+	atomic_t refcount;
+	int n_attached;
+};
+
+/* Protection domain */
+struct qib_pd {
+	struct ib_pd ibpd;
+	int user;               /* non-zero if created from user space */
+};
+
+/* Address Handle */
+struct qib_ah {
+	struct ib_ah ibah;
+	struct ib_ah_attr attr;
+	atomic_t refcount;
+};
+
+/*
+ * This structure is used by qib_mmap() to validate an offset
+ * when an mmap() request is made.  The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct qib_mmap_info {
+	struct list_head pending_mmaps;
+	struct ib_ucontext *context;
+	void *obj;
+	__u64 offset;
+	struct kref ref;
+	unsigned size;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct qib_cq_wc {
+	u32 head;               /* index of next entry to fill */
+	u32 tail;               /* index of next ib_poll_cq() entry */
+	union {
+		/* these are actually size ibcq.cqe + 1 */
+		struct ib_uverbs_wc uqueue[0];
+		struct ib_wc kqueue[0];
+	};
+};
+
+/*
+ * The completion queue structure.
+ */
+struct qib_cq {
+	struct ib_cq ibcq;
+	struct work_struct comptask;
+	spinlock_t lock; /* protect changes in this struct */
+	u8 notify;
+	u8 triggered;
+	struct qib_cq_wc *queue;
+	struct qib_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct qib_seg {
+	void *vaddr;
+	size_t length;
+};
+
+/* The number of qib_segs that fit in a page. */
+#define QIB_SEGSZ     (PAGE_SIZE / sizeof(struct qib_seg))
+
+struct qib_segarray {
+	struct qib_seg segs[QIB_SEGSZ];
+};
+
+struct qib_mregion {
+	struct ib_pd *pd;       /* shares refcnt of ibmr.pd */
+	u64 user_base;          /* User's address for this region */
+	u64 iova;               /* IB start address of this region */
+	size_t length;
+	u32 lkey;
+	u32 offset;             /* offset (bytes) to start of region */
+	int access_flags;
+	u32 max_segs;           /* number of qib_segs in all the arrays */
+	u32 mapsz;              /* size of the map array */
+	atomic_t refcount;
+	struct qib_segarray *map[0];    /* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct qib_sge {
+	struct qib_mregion *mr;
+	void *vaddr;            /* kernel virtual address of segment */
+	u32 sge_length;         /* length of the SGE */
+	u32 length;             /* remaining length of the segment */
+	u16 m;                  /* current index: mr->map[m] */
+	u16 n;                  /* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct qib_mr {
+	struct ib_mr ibmr;
+	struct ib_umem *umem;
+	struct qib_mregion mr;  /* must be last */
+};
+
+/*
+ * Send work request queue entry.
+ * The size of the sg_list is determined when the QP is created and stored
+ * in qp->s_max_sge.
+ */
+struct qib_swqe {
+	struct ib_send_wr wr;   /* don't use wr.sg_list */
+	u32 psn;                /* first packet sequence number */
+	u32 lpsn;               /* last packet sequence number */
+	u32 ssn;                /* send sequence number */
+	u32 length;             /* total length of data in sg_list */
+	struct qib_sge sg_list[0];
+};
+
+/*
+ * Receive work request queue entry.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+ */
+struct qib_rwqe {
+	u64 wr_id;
+	u8 num_sge;
+	struct ib_sge sg_list[0];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct qib_rwq {
+	u32 head;               /* new work requests posted to the head */
+	u32 tail;               /* receives pull requests from here. */
+	struct qib_rwqe wq[0];
+};
+
+struct qib_rq {
+	struct qib_rwq *wq;
+	spinlock_t lock; /* protect changes in this struct */
+	u32 size;               /* size of RWQE array */
+	u8 max_sge;
+};
+
+struct qib_srq {
+	struct ib_srq ibsrq;
+	struct qib_rq rq;
+	struct qib_mmap_info *ip;
+	/* send signal when number of RWQEs < limit */
+	u32 limit;
+};
+
+struct qib_sge_state {
+	struct qib_sge *sg_list;      /* next SGE to be used if any */
+	struct qib_sge sge;   /* progress state for the current SGE */
+	u32 total_len;
+	u8 num_sge;
+};
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct qib_ack_entry {
+	u8 opcode;
+	u8 sent;
+	u32 psn;
+	u32 lpsn;
+	union {
+		struct qib_sge rdma_sge;
+		u64 atomic_data;
+	};
+};
+
+/*
+ * Variables prefixed with s_ are for the requester (sender).
+ * Variables prefixed with r_ are for the responder (receiver).
+ * Variables prefixed with ack_ are for responder replies.
+ *
+ * Common variables are protected by both r_rq.lock and s_lock in that order
+ * which only happens in modify_qp() or changing the QP 'state'.
+ */
+struct qib_qp {
+	struct ib_qp ibqp;
+	struct qib_qp *next;            /* link list for QPN hash table */
+	struct qib_qp *timer_next;      /* link list for qib_ib_timer() */
+	struct list_head iowait;        /* link for wait PIO buf */
+	struct list_head rspwait;       /* link for waititing to respond */
+	struct ib_ah_attr remote_ah_attr;
+	struct ib_ah_attr alt_ah_attr;
+	struct qib_ib_header s_hdr;     /* next packet header to send */
+	atomic_t refcount;
+	wait_queue_head_t wait;
+	wait_queue_head_t wait_dma;
+	struct timer_list s_timer;
+	struct work_struct s_work;
+	struct qib_mmap_info *ip;
+	struct qib_sge_state *s_cur_sge;
+	struct qib_verbs_txreq *s_tx;
+	struct qib_mregion *s_rdma_mr;
+	struct qib_sge_state s_sge;     /* current send request data */
+	struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1];
+	struct qib_sge_state s_ack_rdma_sge;
+	struct qib_sge_state s_rdma_read_sge;
+	struct qib_sge_state r_sge;     /* current receive data */
+	spinlock_t r_lock;      /* used for APM */
+	spinlock_t s_lock;
+	atomic_t s_dma_busy;
+	unsigned processor_id;	/* Processor ID QP is bound to */
+	u32 s_flags;
+	u32 s_cur_size;         /* size of send packet in bytes */
+	u32 s_len;              /* total length of s_sge */
+	u32 s_rdma_read_len;    /* total length of s_rdma_read_sge */
+	u32 s_next_psn;         /* PSN for next request */
+	u32 s_last_psn;         /* last response PSN processed */
+	u32 s_sending_psn;      /* lowest PSN that is being sent */
+	u32 s_sending_hpsn;     /* highest PSN that is being sent */
+	u32 s_psn;              /* current packet sequence number */
+	u32 s_ack_rdma_psn;     /* PSN for sending RDMA read responses */
+	u32 s_ack_psn;          /* PSN for acking sends and RDMA writes */
+	u32 s_rnr_timeout;      /* number of milliseconds for RNR timeout */
+	u32 r_ack_psn;          /* PSN for next ACK or atomic ACK */
+	u64 r_wr_id;            /* ID for current receive WQE */
+	unsigned long r_aflags;
+	u32 r_len;              /* total length of r_sge */
+	u32 r_rcv_len;          /* receive data len processed */
+	u32 r_psn;              /* expected rcv packet sequence number */
+	u32 r_msn;              /* message sequence number */
+	u16 s_hdrwords;         /* size of s_hdr in 32 bit words */
+	u16 s_rdma_ack_cnt;
+	u8 state;               /* QP state */
+	u8 s_state;             /* opcode of last packet sent */
+	u8 s_ack_state;         /* opcode of packet to ACK */
+	u8 s_nak_state;         /* non-zero if NAK is pending */
+	u8 r_state;             /* opcode of last packet received */
+	u8 r_nak_state;         /* non-zero if NAK is pending */
+	u8 r_min_rnr_timer;     /* retry timeout value for RNR NAKs */
+	u8 r_flags;
+	u8 r_max_rd_atomic;     /* max number of RDMA read/atomic to receive */
+	u8 r_head_ack_queue;    /* index into s_ack_queue[] */
+	u8 qp_access_flags;
+	u8 s_max_sge;           /* size of s_wq->sg_list */
+	u8 s_retry_cnt;         /* number of times to retry */
+	u8 s_rnr_retry_cnt;
+	u8 s_retry;             /* requester retry counter */
+	u8 s_rnr_retry;         /* requester RNR retry counter */
+	u8 s_pkey_index;        /* PKEY index to use */
+	u8 s_alt_pkey_index;    /* Alternate path PKEY index to use */
+	u8 s_max_rd_atomic;     /* max number of RDMA read/atomic to send */
+	u8 s_num_rd_atomic;     /* number of RDMA read/atomic pending */
+	u8 s_tail_ack_queue;    /* index into s_ack_queue[] */
+	u8 s_srate;
+	u8 s_draining;
+	u8 s_mig_state;
+	u8 timeout;             /* Timeout for this QP */
+	u8 alt_timeout;         /* Alternate path timeout for this QP */
+	u8 port_num;
+	enum ib_mtu path_mtu;
+	u32 remote_qpn;
+	u32 qkey;               /* QKEY for this QP (for UD or RD) */
+	u32 s_size;             /* send work queue size */
+	u32 s_head;             /* new entries added here */
+	u32 s_tail;             /* next entry to process */
+	u32 s_cur;              /* current work queue entry */
+	u32 s_acked;            /* last un-ACK'ed entry */
+	u32 s_last;             /* last completed entry */
+	u32 s_ssn;              /* SSN of tail entry */
+	u32 s_lsn;              /* limit sequence number (credit) */
+	struct qib_swqe *s_wq;  /* send work queue */
+	struct qib_swqe *s_wqe;
+	struct qib_rq r_rq;             /* receive work queue */
+	struct qib_sge r_sg_list[0];    /* verified SGEs */
+};
+
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define QIB_R_WRID_VALID        0
+#define QIB_R_REWIND_SGE        1
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define QIB_R_REUSE_SGE 0x01
+#define QIB_R_RDMAR_SEQ 0x02
+#define QIB_R_RSP_NAK   0x04
+#define QIB_R_RSP_SEND  0x08
+#define QIB_R_COMM_EST  0x10
+
+/*
+ * Bit definitions for s_flags.
+ *
+ * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
+ * QIB_S_BUSY - send tasklet is processing the QP
+ * QIB_S_TIMER - the RC retry timer is active
+ * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
+ * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
+ *                         before processing the next SWQE
+ * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
+ *                         before processing the next SWQE
+ * QIB_S_WAIT_RNR - waiting for RNR timeout
+ * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ *                  next send completion entry not via send DMA
+ * QIB_S_WAIT_PIO - waiting for a send buffer to be available
+ * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available
+ * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
+ * QIB_S_WAIT_KMEM - waiting for kernel memory to be available
+ * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
+ * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests
+ * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK
+ */
+#define QIB_S_SIGNAL_REQ_WR	0x0001
+#define QIB_S_BUSY		0x0002
+#define QIB_S_TIMER		0x0004
+#define QIB_S_RESP_PENDING	0x0008
+#define QIB_S_ACK_PENDING	0x0010
+#define QIB_S_WAIT_FENCE	0x0020
+#define QIB_S_WAIT_RDMAR	0x0040
+#define QIB_S_WAIT_RNR		0x0080
+#define QIB_S_WAIT_SSN_CREDIT	0x0100
+#define QIB_S_WAIT_DMA		0x0200
+#define QIB_S_WAIT_PIO		0x0400
+#define QIB_S_WAIT_TX		0x0800
+#define QIB_S_WAIT_DMA_DESC	0x1000
+#define QIB_S_WAIT_KMEM		0x2000
+#define QIB_S_WAIT_PSN		0x4000
+#define QIB_S_WAIT_ACK		0x8000
+#define QIB_S_SEND_ONE		0x10000
+#define QIB_S_UNLIMITED_CREDIT	0x20000
+
+/*
+ * Wait flags that would prevent any packet type from being sent.
+ */
+#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \
+	QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM)
+
+/*
+ * Wait flags that would prevent send work requests from making progress.
+ */
+#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \
+	QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \
+	QIB_S_WAIT_PSN | QIB_S_WAIT_ACK)
+
+#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND)
+
+#define QIB_PSN_CREDIT  16
+
+/*
+ * Since struct qib_swqe is not a fixed size, we can't simply index into
+ * struct qib_qp.s_wq.  This function does the array index computation.
+ */
+static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp,
+					      unsigned n)
+{
+	return (struct qib_swqe *)((char *)qp->s_wq +
+				     (sizeof(struct qib_swqe) +
+				      qp->s_max_sge *
+				      sizeof(struct qib_sge)) * n);
+}
+
+/*
+ * Since struct qib_rwqe is not a fixed size, we can't simply index into
+ * struct qib_rwq.wq.  This function does the array index computation.
+ */
+static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n)
+{
+	return (struct qib_rwqe *)
+		((char *) rq->wq->wq +
+		 (sizeof(struct qib_rwqe) +
+		  rq->max_sge * sizeof(struct ib_sge)) * n);
+}
+
+/*
+ * QPN-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way,
+ * large bitmaps are not allocated unless large numbers of QPs are used.
+ */
+struct qpn_map {
+	void *page;
+};
+
+struct qib_qpn_table {
+	spinlock_t lock; /* protect changes in this struct */
+	unsigned flags;         /* flags for QP0/1 allocated for each port */
+	u32 last;               /* last QP number allocated */
+	u32 nmaps;              /* size of the map table */
+	u16 limit;
+	u16 mask;
+	/* bit map of free QP numbers other than 0/1 */
+	struct qpn_map map[QPNMAP_ENTRIES];
+};
+
+struct qib_lkey_table {
+	spinlock_t lock; /* protect changes in this struct */
+	u32 next;               /* next unused index (speeds search) */
+	u32 gen;                /* generation count */
+	u32 max;                /* size of the table */
+	struct qib_mregion **table;
+};
+
+struct qib_opcode_stats {
+	u64 n_packets;          /* number of packets */
+	u64 n_bytes;            /* total number of bytes */
+};
+
+struct qib_ibport {
+	struct qib_qp *qp0;
+	struct qib_qp *qp1;
+	struct ib_mad_agent *send_agent;	/* agent for SMI (traps) */
+	struct qib_ah *sm_ah;
+	struct qib_ah *smi_ah;
+	struct rb_root mcast_tree;
+	spinlock_t lock;		/* protect changes in this struct */
+
+	/* non-zero when timer is set */
+	unsigned long mkey_lease_timeout;
+	unsigned long trap_timeout;
+	__be64 gid_prefix;      /* in network order */
+	__be64 mkey;
+	__be64 guids[QIB_GUIDS_PER_PORT	- 1];	/* writable GUIDs */
+	u64 tid;		/* TID for traps */
+	u64 n_unicast_xmit;     /* total unicast packets sent */
+	u64 n_unicast_rcv;      /* total unicast packets received */
+	u64 n_multicast_xmit;   /* total multicast packets sent */
+	u64 n_multicast_rcv;    /* total multicast packets received */
+	u64 z_symbol_error_counter;             /* starting count for PMA */
+	u64 z_link_error_recovery_counter;      /* starting count for PMA */
+	u64 z_link_downed_counter;              /* starting count for PMA */
+	u64 z_port_rcv_errors;                  /* starting count for PMA */
+	u64 z_port_rcv_remphys_errors;          /* starting count for PMA */
+	u64 z_port_xmit_discards;               /* starting count for PMA */
+	u64 z_port_xmit_data;                   /* starting count for PMA */
+	u64 z_port_rcv_data;                    /* starting count for PMA */
+	u64 z_port_xmit_packets;                /* starting count for PMA */
+	u64 z_port_rcv_packets;                 /* starting count for PMA */
+	u32 z_local_link_integrity_errors;      /* starting count for PMA */
+	u32 z_excessive_buffer_overrun_errors;  /* starting count for PMA */
+	u32 z_vl15_dropped;                     /* starting count for PMA */
+	u32 n_rc_resends;
+	u32 n_rc_acks;
+	u32 n_rc_qacks;
+	u32 n_rc_delayed_comp;
+	u32 n_seq_naks;
+	u32 n_rdma_seq;
+	u32 n_rnr_naks;
+	u32 n_other_naks;
+	u32 n_loop_pkts;
+	u32 n_pkt_drops;
+	u32 n_vl15_dropped;
+	u32 n_rc_timeouts;
+	u32 n_dmawait;
+	u32 n_unaligned;
+	u32 n_rc_dupreq;
+	u32 n_rc_seqnak;
+	u32 port_cap_flags;
+	u32 pma_sample_start;
+	u32 pma_sample_interval;
+	__be16 pma_counter_select[5];
+	u16 pma_tag;
+	u16 pkey_violations;
+	u16 qkey_violations;
+	u16 mkey_violations;
+	u16 mkey_lease_period;
+	u16 sm_lid;
+	u16 repress_traps;
+	u8 sm_sl;
+	u8 mkeyprot;
+	u8 subnet_timeout;
+	u8 vl_high_limit;
+	u8 sl_to_vl[16];
+
+	struct qib_opcode_stats opstats[128];
+};
+
+struct qib_ibdev {
+	struct ib_device ibdev;
+	struct list_head pending_mmaps;
+	spinlock_t mmap_offset_lock; /* protect mmap_offset */
+	u32 mmap_offset;
+	struct qib_mregion *dma_mr;
+
+	/* QP numbers are shared by all IB ports */
+	struct qib_qpn_table qpn_table;
+	struct qib_lkey_table lk_table;
+	struct list_head piowait;       /* list for wait PIO buf */
+	struct list_head dmawait;	/* list for wait DMA */
+	struct list_head txwait;        /* list for wait qib_verbs_txreq */
+	struct list_head memwait;       /* list for wait kernel memory */
+	struct list_head txreq_free;
+	struct timer_list mem_timer;
+	struct qib_qp **qp_table;
+	struct qib_pio_header *pio_hdrs;
+	dma_addr_t pio_hdrs_phys;
+	/* list of QPs waiting for RNR timer */
+	spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
+	unsigned qp_table_size; /* size of the hash table */
+	spinlock_t qpt_lock;
+
+	u32 n_piowait;
+	u32 n_txwait;
+
+	u32 n_pds_allocated;    /* number of PDs allocated for device */
+	spinlock_t n_pds_lock;
+	u32 n_ahs_allocated;    /* number of AHs allocated for device */
+	spinlock_t n_ahs_lock;
+	u32 n_cqs_allocated;    /* number of CQs allocated for device */
+	spinlock_t n_cqs_lock;
+	u32 n_qps_allocated;    /* number of QPs allocated for device */
+	spinlock_t n_qps_lock;
+	u32 n_srqs_allocated;   /* number of SRQs allocated for device */
+	spinlock_t n_srqs_lock;
+	u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+	spinlock_t n_mcast_grps_lock;
+};
+
+struct qib_verbs_counters {
+	u64 symbol_error_counter;
+	u64 link_error_recovery_counter;
+	u64 link_downed_counter;
+	u64 port_rcv_errors;
+	u64 port_rcv_remphys_errors;
+	u64 port_xmit_discards;
+	u64 port_xmit_data;
+	u64 port_rcv_data;
+	u64 port_xmit_packets;
+	u64 port_rcv_packets;
+	u32 local_link_integrity_errors;
+	u32 excessive_buffer_overrun_errors;
+	u32 vl15_dropped;
+};
+
+static inline struct qib_mr *to_imr(struct ib_mr *ibmr)
+{
+	return container_of(ibmr, struct qib_mr, ibmr);
+}
+
+static inline struct qib_pd *to_ipd(struct ib_pd *ibpd)
+{
+	return container_of(ibpd, struct qib_pd, ibpd);
+}
+
+static inline struct qib_ah *to_iah(struct ib_ah *ibah)
+{
+	return container_of(ibah, struct qib_ah, ibah);
+}
+
+static inline struct qib_cq *to_icq(struct ib_cq *ibcq)
+{
+	return container_of(ibcq, struct qib_cq, ibcq);
+}
+
+static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq)
+{
+	return container_of(ibsrq, struct qib_srq, ibsrq);
+}
+
+static inline struct qib_qp *to_iqp(struct ib_qp *ibqp)
+{
+	return container_of(ibqp, struct qib_qp, ibqp);
+}
+
+static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
+{
+	return container_of(ibdev, struct qib_ibdev, ibdev);
+}
+
+/*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int qib_send_ok(struct qib_qp *qp)
+{
+	return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) &&
+		(qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) ||
+		 !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
+}
+
+extern struct workqueue_struct *qib_wq;
+
+/*
+ * This must be called with s_lock held.
+ */
+static inline void qib_schedule_send(struct qib_qp *qp)
+{
+	if (qib_send_ok(qp)) {
+		if (qp->processor_id == smp_processor_id())
+			queue_work(qib_wq, &qp->s_work);
+		else
+			queue_work_on(qp->processor_id,
+				      qib_wq, &qp->s_work);
+	}
+}
+
+static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
+{
+	u16 p1 = pkey1 & 0x7FFF;
+	u16 p2 = pkey2 & 0x7FFF;
+
+	/*
+	 * Low 15 bits must be non-zero and match, and
+	 * one of the two must be a full member.
+	 */
+	return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0);
+}
+
+void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
+		   u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
+void qib_cap_mask_chg(struct qib_ibport *ibp);
+void qib_sys_guid_chg(struct qib_ibport *ibp);
+void qib_node_desc_chg(struct qib_ibport *ibp);
+int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+		    struct ib_wc *in_wc, struct ib_grh *in_grh,
+		    struct ib_mad *in_mad, struct ib_mad *out_mad);
+int qib_create_agents(struct qib_ibdev *dev);
+void qib_free_agents(struct qib_ibdev *dev);
+
+/*
+ * Compare the lower 24 bits of the two values.
+ * Returns an integer <, ==, or > than zero.
+ */
+static inline int qib_cmp24(u32 a, u32 b)
+{
+	return (((int) a) - ((int) b)) << 8;
+}
+
+struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid);
+
+int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
+			  u64 *rwords, u64 *spkts, u64 *rpkts,
+			  u64 *xmit_wait);
+
+int qib_get_counters(struct qib_pportdata *ppd,
+		     struct qib_verbs_counters *cntrs);
+
+int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int qib_mcast_tree_empty(struct qib_ibport *ibp);
+
+__be32 qib_compute_aeth(struct qib_qp *qp);
+
+struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn);
+
+struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
+			    struct ib_qp_init_attr *init_attr,
+			    struct ib_udata *udata);
+
+int qib_destroy_qp(struct ib_qp *ibqp);
+
+int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err);
+
+int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		  int attr_mask, struct ib_udata *udata);
+
+int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		 int attr_mask, struct ib_qp_init_attr *init_attr);
+
+unsigned qib_free_all_qps(struct qib_devdata *dd);
+
+void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
+
+void qib_free_qpn_table(struct qib_qpn_table *qpt);
+
+void qib_get_credit(struct qib_qp *qp, u32 aeth);
+
+unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
+
+void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
+
+void qib_put_txreq(struct qib_verbs_txreq *tx);
+
+int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
+		   u32 hdrwords, struct qib_sge_state *ss, u32 len);
+
+void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length,
+		  int release);
+
+void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release);
+
+void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+
+void qib_rc_rnr_retry(unsigned long arg);
+
+void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
+
+void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err);
+
+int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
+
+void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+		int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr);
+
+int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr);
+
+int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
+		struct qib_sge *isge, struct ib_sge *sge, int acc);
+
+int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+		u32 len, u64 vaddr, u32 rkey, int acc);
+
+int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+			 struct ib_recv_wr **bad_wr);
+
+struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
+			      struct ib_srq_init_attr *srq_init_attr,
+			      struct ib_udata *udata);
+
+int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+		   enum ib_srq_attr_mask attr_mask,
+		   struct ib_udata *udata);
+
+int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+
+int qib_destroy_srq(struct ib_srq *ibsrq);
+
+void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
+
+int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+
+struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
+			    int comp_vector, struct ib_ucontext *context,
+			    struct ib_udata *udata);
+
+int qib_destroy_cq(struct ib_cq *ibcq);
+
+int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+
+int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+
+struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
+
+struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
+			      struct ib_phys_buf *buffer_list,
+			      int num_phys_buf, int acc, u64 *iova_start);
+
+struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+			      u64 virt_addr, int mr_access_flags,
+			      struct ib_udata *udata);
+
+int qib_dereg_mr(struct ib_mr *ibmr);
+
+struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+
+struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
+				struct ib_device *ibdev, int page_list_len);
+
+void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
+
+int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr);
+
+struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+			     struct ib_fmr_attr *fmr_attr);
+
+int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+		     int list_len, u64 iova);
+
+int qib_unmap_fmr(struct list_head *fmr_list);
+
+int qib_dealloc_fmr(struct ib_fmr *ibfmr);
+
+void qib_release_mmap_info(struct kref *ref);
+
+struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
+					   struct ib_ucontext *context,
+					   void *obj);
+
+void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
+			  u32 size, void *obj);
+
+int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+int qib_get_rwqe(struct qib_qp *qp, int wr_id_only);
+
+void qib_migrate_qp(struct qib_qp *qp);
+
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+		      int has_grh, struct qib_qp *qp, u32 bth0);
+
+u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
+		 struct ib_global_route *grh, u32 hwords, u32 nwords);
+
+void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+			 u32 bth0, u32 bth2);
+
+void qib_do_send(struct work_struct *work);
+
+void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+		       enum ib_wc_status status);
+
+void qib_send_rc_ack(struct qib_qp *qp);
+
+int qib_make_rc_req(struct qib_qp *qp);
+
+int qib_make_uc_req(struct qib_qp *qp);
+
+int qib_make_ud_req(struct qib_qp *qp);
+
+int qib_register_ib_device(struct qib_devdata *);
+
+void qib_unregister_ib_device(struct qib_devdata *);
+
+void qib_ib_rcv(struct qib_ctxtdata *, void *, void *, u32);
+
+void qib_ib_piobufavail(struct qib_devdata *);
+
+unsigned qib_get_npkeys(struct qib_devdata *);
+
+unsigned qib_get_pkey(struct qib_ibport *, unsigned);
+
+extern const enum ib_wc_opcode ib_qib_wc_opcode[];
+
+/*
+ * Below  HCA-independent IB PhysPortState values, returned
+ * by the f_ibphys_portstate() routine.
+ */
+#define IB_PHYSPORTSTATE_SLEEP 1
+#define IB_PHYSPORTSTATE_POLL 2
+#define IB_PHYSPORTSTATE_DISABLED 3
+#define IB_PHYSPORTSTATE_CFG_TRAIN 4
+#define IB_PHYSPORTSTATE_LINKUP 5
+#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
+#define IB_PHYSPORTSTATE_CFG_DEBOUNCE 8
+#define IB_PHYSPORTSTATE_CFG_IDLE 0xB
+#define IB_PHYSPORTSTATE_RECOVERY_RETRAIN 0xC
+#define IB_PHYSPORTSTATE_RECOVERY_WAITRMT 0xE
+#define IB_PHYSPORTSTATE_RECOVERY_IDLE 0xF
+#define IB_PHYSPORTSTATE_CFG_ENH 0x10
+#define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
+
+extern const int ib_qib_state_ops[];
+
+extern __be64 ib_qib_sys_image_guid;    /* in network order */
+
+extern unsigned int ib_qib_lkey_table_size;
+
+extern unsigned int ib_qib_max_cqes;
+
+extern unsigned int ib_qib_max_cqs;
+
+extern unsigned int ib_qib_max_qp_wrs;
+
+extern unsigned int ib_qib_max_qps;
+
+extern unsigned int ib_qib_max_sges;
+
+extern unsigned int ib_qib_max_mcast_grps;
+
+extern unsigned int ib_qib_max_mcast_qp_attached;
+
+extern unsigned int ib_qib_max_srqs;
+
+extern unsigned int ib_qib_max_srq_sges;
+
+extern unsigned int ib_qib_max_srq_wrs;
+
+extern const u32 ib_qib_rnr_table[];
+
+extern struct ib_dma_mapping_ops qib_dma_mapping_ops;
+
+#endif                          /* QIB_VERBS_H */

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 47/52] IB/qib: Add qib_verbs_mcast.c
From: Ralph Campbell @ 2010-05-07  0:02 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_verbs_mcast.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_verbs_mcast.c |  368 +++++++++++++++++++++++++++
 1 files changed, 368 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_verbs_mcast.c

diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
new file mode 100644
index 0000000..dabb697
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/rculist.h>
+
+#include "qib.h"
+
+/**
+ * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
+ * @qp: the QP to link
+ */
+static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp)
+{
+	struct qib_mcast_qp *mqp;
+
+	mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
+	if (!mqp)
+		goto bail;
+
+	mqp->qp = qp;
+	atomic_inc(&qp->refcount);
+
+bail:
+	return mqp;
+}
+
+static void qib_mcast_qp_free(struct qib_mcast_qp *mqp)
+{
+	struct qib_qp *qp = mqp->qp;
+
+	/* Notify qib_destroy_qp() if it is waiting. */
+	if (atomic_dec_and_test(&qp->refcount))
+		wake_up(&qp->wait);
+
+	kfree(mqp);
+}
+
+/**
+ * qib_mcast_alloc - allocate the multicast GID structure
+ * @mgid: the multicast GID
+ *
+ * A list of QPs will be attached to this structure.
+ */
+static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid)
+{
+	struct qib_mcast *mcast;
+
+	mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
+	if (!mcast)
+		goto bail;
+
+	mcast->mgid = *mgid;
+	INIT_LIST_HEAD(&mcast->qp_list);
+	init_waitqueue_head(&mcast->wait);
+	atomic_set(&mcast->refcount, 0);
+	mcast->n_attached = 0;
+
+bail:
+	return mcast;
+}
+
+static void qib_mcast_free(struct qib_mcast *mcast)
+{
+	struct qib_mcast_qp *p, *tmp;
+
+	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
+		qib_mcast_qp_free(p);
+
+	kfree(mcast);
+}
+
+/**
+ * qib_mcast_find - search the global table for the given multicast GID
+ * @ibp: the IB port structure
+ * @mgid: the multicast GID to search for
+ *
+ * Returns NULL if not found.
+ *
+ * The caller is responsible for decrementing the reference count if found.
+ */
+struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid)
+{
+	struct rb_node *n;
+	unsigned long flags;
+	struct qib_mcast *mcast;
+
+	spin_lock_irqsave(&ibp->lock, flags);
+	n = ibp->mcast_tree.rb_node;
+	while (n) {
+		int ret;
+
+		mcast = rb_entry(n, struct qib_mcast, rb_node);
+
+		ret = memcmp(mgid->raw, mcast->mgid.raw,
+			     sizeof(union ib_gid));
+		if (ret < 0)
+			n = n->rb_left;
+		else if (ret > 0)
+			n = n->rb_right;
+		else {
+			atomic_inc(&mcast->refcount);
+			spin_unlock_irqrestore(&ibp->lock, flags);
+			goto bail;
+		}
+	}
+	spin_unlock_irqrestore(&ibp->lock, flags);
+
+	mcast = NULL;
+
+bail:
+	return mcast;
+}
+
+/**
+ * qib_mcast_add - insert mcast GID into table and attach QP struct
+ * @mcast: the mcast GID table
+ * @mqp: the QP to attach
+ *
+ * Return zero if both were added.  Return EEXIST if the GID was already in
+ * the table but the QP was added.  Return ESRCH if the QP was already
+ * attached and neither structure was added.
+ */
+static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp,
+			 struct qib_mcast *mcast, struct qib_mcast_qp *mqp)
+{
+	struct rb_node **n = &ibp->mcast_tree.rb_node;
+	struct rb_node *pn = NULL;
+	int ret;
+
+	spin_lock_irq(&ibp->lock);
+
+	while (*n) {
+		struct qib_mcast *tmcast;
+		struct qib_mcast_qp *p;
+
+		pn = *n;
+		tmcast = rb_entry(pn, struct qib_mcast, rb_node);
+
+		ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
+			     sizeof(union ib_gid));
+		if (ret < 0) {
+			n = &pn->rb_left;
+			continue;
+		}
+		if (ret > 0) {
+			n = &pn->rb_right;
+			continue;
+		}
+
+		/* Search the QP list to see if this is already there. */
+		list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
+			if (p->qp == mqp->qp) {
+				ret = ESRCH;
+				goto bail;
+			}
+		}
+		if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) {
+			ret = ENOMEM;
+			goto bail;
+		}
+
+		tmcast->n_attached++;
+
+		list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
+		ret = EEXIST;
+		goto bail;
+	}
+
+	spin_lock(&dev->n_mcast_grps_lock);
+	if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) {
+		spin_unlock(&dev->n_mcast_grps_lock);
+		ret = ENOMEM;
+		goto bail;
+	}
+
+	dev->n_mcast_grps_allocated++;
+	spin_unlock(&dev->n_mcast_grps_lock);
+
+	mcast->n_attached++;
+
+	list_add_tail_rcu(&mqp->list, &mcast->qp_list);
+
+	atomic_inc(&mcast->refcount);
+	rb_link_node(&mcast->rb_node, pn, n);
+	rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
+
+	ret = 0;
+
+bail:
+	spin_unlock_irq(&ibp->lock);
+
+	return ret;
+}
+
+int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct qib_qp *qp = to_iqp(ibqp);
+	struct qib_ibdev *dev = to_idev(ibqp->device);
+	struct qib_ibport *ibp;
+	struct qib_mcast *mcast;
+	struct qib_mcast_qp *mqp;
+	int ret;
+
+	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	/*
+	 * Allocate data structures since its better to do this outside of
+	 * spin locks and it will most likely be needed.
+	 */
+	mcast = qib_mcast_alloc(gid);
+	if (mcast == NULL) {
+		ret = -ENOMEM;
+		goto bail;
+	}
+	mqp = qib_mcast_qp_alloc(qp);
+	if (mqp == NULL) {
+		qib_mcast_free(mcast);
+		ret = -ENOMEM;
+		goto bail;
+	}
+	ibp = to_iport(ibqp->device, qp->port_num);
+	switch (qib_mcast_add(dev, ibp, mcast, mqp)) {
+	case ESRCH:
+		/* Neither was used: OK to attach the same QP twice. */
+		qib_mcast_qp_free(mqp);
+		qib_mcast_free(mcast);
+		break;
+
+	case EEXIST:            /* The mcast wasn't used */
+		qib_mcast_free(mcast);
+		break;
+
+	case ENOMEM:
+		/* Exceeded the maximum number of mcast groups. */
+		qib_mcast_qp_free(mqp);
+		qib_mcast_free(mcast);
+		ret = -ENOMEM;
+		goto bail;
+
+	default:
+		break;
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+	struct qib_qp *qp = to_iqp(ibqp);
+	struct qib_ibdev *dev = to_idev(ibqp->device);
+	struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
+	struct qib_mcast *mcast = NULL;
+	struct qib_mcast_qp *p, *tmp;
+	struct rb_node *n;
+	int last = 0;
+	int ret;
+
+	if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
+		ret = -EINVAL;
+		goto bail;
+	}
+
+	spin_lock_irq(&ibp->lock);
+
+	/* Find the GID in the mcast table. */
+	n = ibp->mcast_tree.rb_node;
+	while (1) {
+		if (n == NULL) {
+			spin_unlock_irq(&ibp->lock);
+			ret = -EINVAL;
+			goto bail;
+		}
+
+		mcast = rb_entry(n, struct qib_mcast, rb_node);
+		ret = memcmp(gid->raw, mcast->mgid.raw,
+			     sizeof(union ib_gid));
+		if (ret < 0)
+			n = n->rb_left;
+		else if (ret > 0)
+			n = n->rb_right;
+		else
+			break;
+	}
+
+	/* Search the QP list. */
+	list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
+		if (p->qp != qp)
+			continue;
+		/*
+		 * We found it, so remove it, but don't poison the forward
+		 * link until we are sure there are no list walkers.
+		 */
+		list_del_rcu(&p->list);
+		mcast->n_attached--;
+
+		/* If this was the last attached QP, remove the GID too. */
+		if (list_empty(&mcast->qp_list)) {
+			rb_erase(&mcast->rb_node, &ibp->mcast_tree);
+			last = 1;
+		}
+		break;
+	}
+
+	spin_unlock_irq(&ibp->lock);
+
+	if (p) {
+		/*
+		 * Wait for any list walkers to finish before freeing the
+		 * list element.
+		 */
+		wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+		qib_mcast_qp_free(p);
+	}
+	if (last) {
+		atomic_dec(&mcast->refcount);
+		wait_event(mcast->wait, !atomic_read(&mcast->refcount));
+		qib_mcast_free(mcast);
+		spin_lock_irq(&dev->n_mcast_grps_lock);
+		dev->n_mcast_grps_allocated--;
+		spin_unlock_irq(&dev->n_mcast_grps_lock);
+	}
+
+	ret = 0;
+
+bail:
+	return ret;
+}
+
+int qib_mcast_tree_empty(struct qib_ibport *ibp)
+{
+	return ibp->mcast_tree.rb_node == NULL;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 48/52] IB/qib: Add qib_wc_ppc64.c
From: Ralph Campbell @ 2010-05-07  0:03 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_wc_ppc64.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_wc_ppc64.c |   62 ++++++++++++++++++++++++++++++
 1 files changed, 62 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_wc_ppc64.c

diff --git a/drivers/infiniband/hw/qib/qib_wc_ppc64.c b/drivers/infiniband/hw/qib/qib_wc_ppc64.c
new file mode 100644
index 0000000..673cf4c
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_wc_ppc64.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only.  Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "qib.h"
+
+/**
+ * qib_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ *
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int qib_enable_wc(struct qib_devdata *dd)
+{
+	return 0;
+}
+
+/**
+ * qib_unordered_wc - indicate whether write combining is unordered
+ *
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
+ */
+int qib_unordered_wc(void)
+{
+	return 1;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 49/52] IB/qib: Add qib_wc_x86_64.c
From: Ralph Campbell @ 2010-05-07  0:03 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

creates the qib_wc_x86_64.c file.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/hw/qib/qib_wc_x86_64.c |  171 +++++++++++++++++++++++++++++
 1 files changed, 171 insertions(+), 0 deletions(-)
 create mode 100644 drivers/infiniband/hw/qib/qib_wc_x86_64.c

diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
new file mode 100644
index 0000000..561b8bc
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on x86_64 only.  Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include <linux/pci.h>
+#include <asm/mtrr.h>
+#include <asm/processor.h>
+
+#include "qib.h"
+
+/**
+ * qib_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ *
+ * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
+ * write combining.
+ */
+int qib_enable_wc(struct qib_devdata *dd)
+{
+	int ret = 0;
+	u64 pioaddr, piolen;
+	unsigned bits;
+	const unsigned long addr = pci_resource_start(dd->pcidev, 0);
+	const size_t len = pci_resource_len(dd->pcidev, 0);
+
+	/*
+	 * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
+	 * chip.  Linux (possibly the hardware) requires it to be on a power
+	 * of 2 address matching the length (which has to be a power of 2).
+	 * For rev1, that means the base address, for rev2, it will be just
+	 * the PIO buffers themselves.
+	 * For chips with two sets of buffers, the calculations are
+	 * somewhat more complicated; we need to sum, and the piobufbase
+	 * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
+	 * The buffers are still packed, so a single range covers both.
+	 */
+	if (dd->piobcnt2k && dd->piobcnt4k) {
+		/* 2 sizes for chip */
+		unsigned long pio2kbase, pio4kbase;
+		pio2kbase = dd->piobufbase & 0xffffffffUL;
+		pio4kbase = (dd->piobufbase >> 32) & 0xffffffffUL;
+		if (pio2kbase < pio4kbase) {
+			/* all current chips */
+			pioaddr = addr + pio2kbase;
+			piolen = pio4kbase - pio2kbase +
+				dd->piobcnt4k * dd->align4k;
+		} else {
+			pioaddr = addr + pio4kbase;
+			piolen = pio2kbase - pio4kbase +
+				dd->piobcnt2k * dd->palign;
+		}
+	} else {  /* single buffer size (2K, currently) */
+		pioaddr = addr + dd->piobufbase;
+		piolen = dd->piobcnt2k * dd->palign +
+			dd->piobcnt4k * dd->align4k;
+	}
+
+	for (bits = 0; !(piolen & (1ULL << bits)); bits++)
+		/* do nothing */ ;
+
+	if (piolen != (1ULL << bits)) {
+		piolen >>= bits;
+		while (piolen >>= 1)
+			bits++;
+		piolen = 1ULL << (bits + 1);
+	}
+	if (pioaddr & (piolen - 1)) {
+		u64 atmp;
+		atmp = pioaddr & ~(piolen - 1);
+		if (atmp < addr || (atmp + piolen) > (addr + len)) {
+			qib_dev_err(dd, "No way to align address/size "
+				    "(%llx/%llx), no WC mtrr\n",
+				    (unsigned long long) atmp,
+				    (unsigned long long) piolen << 1);
+			ret = -ENODEV;
+		} else {
+			pioaddr = atmp;
+			piolen <<= 1;
+		}
+	}
+
+	if (!ret) {
+		int cookie;
+
+		cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
+		if (cookie < 0) {
+			{
+				qib_devinfo(dd->pcidev,
+					 "mtrr_add()  WC for PIO bufs "
+					 "failed (%d)\n",
+					 cookie);
+				ret = -EINVAL;
+			}
+		} else {
+			dd->wc_cookie = cookie;
+			dd->wc_base = (unsigned long) pioaddr;
+			dd->wc_len = (unsigned long) piolen;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * qib_disable_wc - disable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ */
+void qib_disable_wc(struct qib_devdata *dd)
+{
+	if (dd->wc_cookie) {
+		int r;
+
+		r = mtrr_del(dd->wc_cookie, dd->wc_base,
+			     dd->wc_len);
+		if (r < 0)
+			qib_devinfo(dd->pcidev,
+				 "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+				 dd->wc_cookie, dd->wc_base,
+				 dd->wc_len, r);
+		dd->wc_cookie = 0; /* even on failure */
+	}
+}
+
+/**
+ * qib_unordered_wc - indicate whether write combining is ordered
+ *
+ * Because our performance depends on our ability to do write combining mmio
+ * writes in the most efficient way, we need to know if we are on an Intel
+ * or AMD x86_64 processor.  AMD x86_64 processors flush WC buffers out in
+ * the order completed, and so no special flushing is required to get
+ * correct ordering.  Intel processors, however, will flush write buffers
+ * out in "random" orders, and so explicit ordering is needed at times.
+ */
+int qib_unordered_wc(void)
+{
+	return boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
+}

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 50/52] IB/qib: Hooks for adding the QIB driver into the framework
From: Ralph Campbell @ 2010-05-07  0:03 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/Kconfig  |    1 +
 drivers/infiniband/Makefile |    1 +
 2 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 975adce..3a04822 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -43,6 +43,7 @@ config INFINIBAND_ADDR_TRANS
 
 source "drivers/infiniband/hw/mthca/Kconfig"
 source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/ehca/Kconfig"
 source "drivers/infiniband/hw/amso1100/Kconfig"
 source "drivers/infiniband/hw/cxgb3/Kconfig"
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index ed35e44..27b66b1 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_INFINIBAND)		+= core/
 obj-$(CONFIG_INFINIBAND_MTHCA)		+= hw/mthca/
 obj-$(CONFIG_INFINIBAND_IPATH)		+= hw/ipath/
+obj-$(CONFIG_INFINIBAND_QIB)		+= hw/qib/
 obj-$(CONFIG_INFINIBAND_EHCA)		+= hw/ehca/
 obj-$(CONFIG_INFINIBAND_AMSO1100)	+= hw/amso1100/
 obj-$(CONFIG_INFINIBAND_CXGB3)		+= hw/cxgb3/

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH v3 52/52] IB/core: allow HCAs to create IB port sysfs files
From: Ralph Campbell @ 2010-05-07  0:03 UTC (permalink / raw)
  To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>

This patch adds a new parameter to ib_register_device() so that
HCAs can create files in /sys/class/infiniband/<hca>/ports/<N>/.
There is no need for an unregister function since the kobject
reference will go to zero when ib_unregister_device() is called.

Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---

 drivers/infiniband/core/core_priv.h          |    2 +-
 drivers/infiniband/core/device.c             |    4 ++--
 drivers/infiniband/core/sysfs.c              |   24 +++++++++++++++++++++++-
 drivers/infiniband/hw/amso1100/c2_provider.c |    2 +-
 drivers/infiniband/hw/cxgb3/iwch_provider.c  |    2 +-
 drivers/infiniband/hw/ehca/ehca_main.c       |    2 +-
 drivers/infiniband/hw/ipath/ipath_verbs.c    |    2 +-
 drivers/infiniband/hw/mlx4/main.c            |    2 +-
 drivers/infiniband/hw/mthca/mthca_provider.c |    2 +-
 drivers/infiniband/hw/nes/nes_verbs.c        |    2 +-
 include/rdma/ib_verbs.h                      |    5 ++++-
 11 files changed, 37 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 05ac36e..e70c809 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -38,7 +38,7 @@
 
 #include <rdma/ib_verbs.h>
 
-int  ib_device_register_sysfs(struct ib_device *device);
+int  ib_device_register_sysfs(struct ib_device *device, sysfs_cb cb);
 void ib_device_unregister_sysfs(struct ib_device *device);
 
 int  ib_sysfs_setup(void);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d1fba41..9ef8093 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -267,7 +267,7 @@ out:
  * callback for each device that is added. @device must be allocated
  * with ib_alloc_device().
  */
-int ib_register_device(struct ib_device *device)
+int ib_register_device(struct ib_device *device, sysfs_cb cb)
 {
 	int ret;
 
@@ -296,7 +296,7 @@ int ib_register_device(struct ib_device *device)
 		goto out;
 	}
 
-	ret = ib_device_register_sysfs(device);
+	ret = ib_device_register_sysfs(device, cb);
 	if (ret) {
 		printk(KERN_WARNING "Couldn't register device %s with driver model\n",
 		       device->name);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index f901957..a47bac8 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -754,7 +754,23 @@ static struct attribute_group iw_stats_group = {
 	.attrs	= iw_proto_stats_attrs,
 };
 
-int ib_device_register_sysfs(struct ib_device *device)
+static int sysfs_create_port_files(struct ib_device *device, sysfs_cb cb)
+{
+	struct kobject *p;
+	struct ib_port *port;
+	int ret = 0;
+
+	list_for_each_entry(p, &device->port_list, entry) {
+		port = container_of(p, struct ib_port, kobj);
+		ret = cb(device, port->port_num, &port->kobj);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+int ib_device_register_sysfs(struct ib_device *device, sysfs_cb cb)
 {
 	struct device *class_dev = &device->dev;
 	int ret;
@@ -802,6 +818,12 @@ int ib_device_register_sysfs(struct ib_device *device)
 			goto err_put;
 	}
 
+	if (cb) {
+		ret = sysfs_create_port_files(device, cb);
+		if (ret)
+			goto err_put;
+	}
+
 	return 0;
 
 err_put:
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index ad723bd..d128386 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -864,7 +864,7 @@ int c2_register_device(struct c2_dev *dev)
 	dev->ibdev.iwcm->create_listen = c2_service_create;
 	dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
 
-	ret = ib_register_device(&dev->ibdev);
+	ret = ib_register_device(&dev->ibdev, NULL);
 	if (ret)
 		goto out_free_iwcm;
 
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 47b35c6..26f22ec 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1427,7 +1427,7 @@ int iwch_register_device(struct iwch_dev *dev)
 	dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
 	dev->ibdev.iwcm->get_qp = iwch_get_qp;
 
-	ret = ib_register_device(&dev->ibdev);
+	ret = ib_register_device(&dev->ibdev, NULL);
 	if (ret)
 		goto bail1;
 
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 129a6be..d1a9278 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -798,7 +798,7 @@ static int __devinit ehca_probe(struct of_device *dev,
 		goto probe5;
 	}
 
-	ret = ib_register_device(&shca->ib_device);
+	ret = ib_register_device(&shca->ib_device, NULL);
 	if (ret) {
 		ehca_err(&shca->ib_device,
 			 "ib_register_device() failed ret=%i", ret);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 9289ab4..055e7fd 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -2181,7 +2181,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
 	snprintf(dev->node_desc, sizeof(dev->node_desc),
 		 IPATH_IDSTR " %s", init_utsname()->nodename);
 
-	ret = ib_register_device(dev);
+	ret = ib_register_device(dev, NULL);
 	if (ret)
 		goto err_reg;
 
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index e596537..76dcd81 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -660,7 +660,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	spin_lock_init(&ibdev->sm_lock);
 	mutex_init(&ibdev->cap_mask_mutex);
 
-	if (ib_register_device(&ibdev->ib_dev))
+	if (ib_register_device(&ibdev->ib_dev, NULL))
 		goto err_map;
 
 	if (mlx4_ib_mad_init(ibdev))
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index bcf7a40..e53a773 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1402,7 +1402,7 @@ int mthca_register_device(struct mthca_dev *dev)
 
 	mutex_init(&dev->cap_mask_mutex);
 
-	ret = ib_register_device(&dev->ib_dev);
+	ret = ib_register_device(&dev->ib_dev, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 36348bf..a45c749 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3961,7 +3961,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
 	struct nes_adapter *nesadapter = nesdev->nesadapter;
 	int i, ret;
 
-	ret = ib_register_device(&nesvnic->nesibdev->ibdev);
+	ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL);
 	if (ret) {
 		return ret;
 	}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6ecd85c..c36a0bd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1189,7 +1189,10 @@ struct ib_client {
 struct ib_device *ib_alloc_device(size_t size);
 void ib_dealloc_device(struct ib_device *device);
 
-int ib_register_device   (struct ib_device *device);
+typedef int (*sysfs_cb)(struct ib_device *dev, u8 port_num,
+			struct kobject *kobj);
+
+int ib_register_device(struct ib_device *device, sysfs_cb cb);
 void ib_unregister_device(struct ib_device *device);
 
 int ib_register_client   (struct ib_client *client);

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [RFC] libibverbs: ibv_fork_init() and libhugetlbfs
From: Alexander Schmidt @ 2010-05-07 10:19 UTC (permalink / raw)
  To: Roland Dreier
  Cc: of-ewg, Linux RDMA, Hoang-Nam Nguyen, Stefan Roscher,
	Joachim Fenkes, Christoph Raisch, Alex Vainman
In-Reply-To: <adavdb092d8.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>

On Thu, 06 May 2010 13:55:31 -0700
Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
> I think that we cannot assume huge pages only come from libhugetlbfs --
> we should support an application directly enabling huge pages (possibly
> via another library too, so we can't assume that an application knows
> the page size for a memory range it is about to register).
> 
> And also the 16 MB page size constant is of course not feasible -- with
> all due respect, the x86 page size of 2 MB is much more likely in
> practice :)  (Although perhaps the much slower PowerPC TLB refill makes
> users more likely to try and use hugetlb pages ;)
> 
> Alex suggested parsing files in the same way as libhugetlbfs does to get
> the page size, and that seems to be the best solution, since I don't
> think the libhugetlbfs license is compatible with the BSD license for
> libibverbs.
> 
> But your trick of using /proc/*/maps looks nice.  Does that only work
> for libhugetlbfs or can we recognize direct mmap of hugetlb pages?

Hi Roland, thanks for your comments!

I've reworked my patch:
 * added get_huge_page_size() to read the huge page size from
   /proc/meminfo. This is done at ibv_fork_init() time.
 * I noticed that some applications like ibv_rc_pingpong already
   get memory from libhugetlbfs when running ibv_fork_init(). So
   I changed the code for testing madvise() to allocate a huge page
   if the huge page size is set in the system.

I have not tested this code with different libraries providing huge
pages / mmaped pages yet, but I hope this can be added later on when
we have agreed on an approach to handle huge pages.

Signed-off-by: Alexander Schmidt <alexs-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
 src/memory.c |  103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 95 insertions(+), 8 deletions(-)

--- libibverbs-1.1.2.orig/src/memory.c
+++ libibverbs-1.1.2/src/memory.c
@@ -40,6 +40,8 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <stdio.h>
+#include <string.h>
 
 #include "ibverbs.h"
 
@@ -68,12 +70,45 @@ struct ibv_mem_node {
 static struct ibv_mem_node *mm_root;
 static pthread_mutex_t mm_mutex = PTHREAD_MUTEX_INITIALIZER;
 static int page_size;
+static int huge_page_size;
 static int too_late;
 
+static int get_huge_page_size(void)
+{
+	int ret = -1;
+	FILE *file;
+	char *path = "/proc/meminfo";
+	char buf[1024], type[128];
+
+	file = fopen(path, "r");
+	if (!file)
+		goto out;
+
+	while (fgets(buf, sizeof(buf), file) != NULL) {
+		int n;
+		unsigned long size;
+
+		n = sscanf(buf, "%127s %lu %*s", &type, &size);
+
+		if (n < 2)
+			continue;
+
+		if (!strcmp(type, "Hugepagesize:")) {
+			/* huge page size is printed in Kb */
+			ret = size * 1024;
+			break;
+		}
+	}
+	fclose(file);
+
+out:
+	return ret;
+}
+
 int ibv_fork_init(void)
 {
 	void *tmp;
-	int ret;
+	int ret, size;
 
 	if (mm_root)
 		return 0;
@@ -85,11 +120,18 @@ int ibv_fork_init(void)
 	if (page_size < 0)
 		return errno;
 
-	if (posix_memalign(&tmp, page_size, page_size))
+	huge_page_size = get_huge_page_size();
+
+	if (huge_page_size > page_size)
+		size = huge_page_size;
+	else
+		size = page_size;
+
+	if (posix_memalign(&tmp, size, size))
 		return ENOMEM;
 
-	ret = madvise(tmp, page_size, MADV_DONTFORK) ||
-	      madvise(tmp, page_size, MADV_DOFORK);
+	ret = madvise(tmp, size, MADV_DONTFORK) ||
+	      madvise(tmp, size, MADV_DOFORK);
 
 	free(tmp);
 
@@ -446,11 +488,51 @@ static struct ibv_mem_node *__mm_find_st
 	return node;
 }
 
+static int is_huge_page(void *base)
+{
+	int ret = 0;
+	pid_t pid;
+	FILE *file;
+	char buf[1024], lib[128];
+
+	pid = getpid();
+	snprintf(buf, sizeof(buf), "/proc/%d/maps", pid);
+
+	file = fopen(buf, "r");
+	if (!file)
+		goto out;
+
+	while (fgets(buf, sizeof(buf), file) != NULL) {
+		int n;
+		char *substr;
+		uintptr_t range_start, range_end;
+
+		n = sscanf(buf, "%lx-%lx %*s %*x %*s %*u %127s",
+				&range_start, &range_end, &lib);
+
+		if (n < 3)
+			continue;
+
+		substr = strstr(lib, "libhugetlbfs");
+		if (substr) {
+			if ((uintptr_t) base >= range_start &&
+					(uintptr_t) base < range_end) {
+				ret = 1;
+				break;
+			}
+		}
+	}
+	fclose(file);
+
+out:
+	return ret;
+}
+
 static int ibv_madvise_range(void *base, size_t size, int advice)
 {
 	uintptr_t start, end;
 	struct ibv_mem_node *node, *tmp;
-	int inc;
+	int inc, range_page_size;
 	int ret = 0;
 
 	if (!size)
@@ -458,9 +540,14 @@ static int ibv_madvise_range(void *base,
 
 	inc = advice == MADV_DONTFORK ? 1 : -1;
 
-	start = (uintptr_t) base & ~(page_size - 1);
-	end   = ((uintptr_t) (base + size + page_size - 1) &
-		 ~(page_size - 1)) - 1;
+	if (huge_page_size > page_size && is_huge_page(base))
+		range_page_size = huge_page_size;
+	else
+		range_page_size = page_size;
+
+	start = (uintptr_t) base & ~(range_page_size - 1);
+	end   = ((uintptr_t) (base + size + range_page_size - 1) &
+		 ~(range_page_size - 1)) - 1;
 
 	pthread_mutex_lock(&mm_mutex);
 
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH] ummunotify: Userspace support for MMU notifications V2
From: Jeff Squyres @ 2010-05-07 12:01 UTC (permalink / raw)
  To: Eric B Munson
  Cc: akpm, linux-kernel, linux-rdma, linux-mm, rolandd, peterz, mingo,
	pavel, randy.dunlap
In-Reply-To: <1271943493-12120-1-git-send-email-ebmunson@us.ibm.com>

On Apr 22, 2010, at 9:38 AM, Eric B Munson wrote:

> From: Roland Dreier <rolandd@cisco.com>
> 
> As discussed in <http://article.gmane.org/gmane.linux.drivers.openib/61925>
> and follow-up messages, libraries using RDMA would like to track
> precisely when application code changes memory mapping via free(),
> munmap(), etc.  Current pure-userspace solutions using malloc hooks
> and other tricks are not robust, and the feeling among experts is that
> the issue is unfixable without kernel help.

Sorry for not replying earlier -- just to throw in my $0.02 here: the MPI community is *very interested* in having this stuff in upstream kernels.  It solves a fairly major problem for us. 

Open MPI (www.open-mpi.org) is ready to pretty much immediately take advantage of these capabilities.  The code to use ummunotify is in a Mercurial branch; we're only waiting for ummunotify to go upstream before committing our support for it to our main SVN development trunk.

> We solve this not by implementing the full API proposed in the email
> linked above but rather with a simpler and more generic interface,
> which may be useful in other contexts.  Specifically, we implement a
> new character device driver, ummunotify, that creates a /dev/ummunotify
> node.  A userspace process can open this node read-only and use the fd
> as follows:
> 
>  1. ioctl() to register/unregister an address range to watch in the
>     kernel (cf struct ummunotify_register_ioctl in <linux/ummunotify.h>).
> 
>  2. read() to retrieve events generated when a mapping in a watched
>     address range is invalidated (cf struct ummunotify_event in
>     <linux/ummunotify.h>).  select()/poll()/epoll() and SIGIO are
>     handled for this IO.
> 
>  3. mmap() one page at offset 0 to map a kernel page that contains a
>     generation counter that is incremented each time an event is
>     generated.  This allows userspace to have a fast path that checks
>     that no events have occurred without a system call.
> 
> Thanks to Jason Gunthorpe <jgunthorpe <at> obsidianresearch.com> for
> suggestions on the interface design.  Also thanks to Jeff Squyres
> <jsquyres <at> cisco.com> for prototyping support for this in Open MPI, which
> helped find several bugs during development.
> 
> Signed-off-by: Roland Dreier <rolandd@cisco.com>
> Signed-off-by: Eric B Munson <ebmunson@us.ibm.com>

Acked-by: Jeff Squyers <jsquyres@cisco.com>

> ---
> 
> Changes from V1:
> - Update Kbuild to handle test program build properly
> - Update documentation to cover questions not addressed in previous
>   thread
> ---
>  Documentation/Makefile                  |    3 +-
>  Documentation/ummunotify/Makefile       |    7 +
>  Documentation/ummunotify/ummunotify.txt |  162 +++++++++
>  Documentation/ummunotify/umn-test.c     |  200 +++++++++++
>  drivers/char/Kconfig                    |   12 +
>  drivers/char/Makefile                   |    1 +
>  drivers/char/ummunotify.c               |  567 +++++++++++++++++++++++++++++++
>  include/linux/Kbuild                    |    1 +
>  include/linux/ummunotify.h              |  121 +++++++
>  9 files changed, 1073 insertions(+), 1 deletions(-)
>  create mode 100644 Documentation/ummunotify/Makefile
>  create mode 100644 Documentation/ummunotify/ummunotify.txt
>  create mode 100644 Documentation/ummunotify/umn-test.c
>  create mode 100644 drivers/char/ummunotify.c
>  create mode 100644 include/linux/ummunotify.h
> 
> diff --git a/Documentation/Makefile b/Documentation/Makefile
> index 6fc7ea1..27ba76a 100644
> --- a/Documentation/Makefile
> +++ b/Documentation/Makefile
> @@ -1,3 +1,4 @@
>  obj-m := DocBook/ accounting/ auxdisplay/ connector/ \
>         filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \
> -       pcmcia/ spi/ timers/ video4linux/ vm/ watchdog/src/
> +       pcmcia/ spi/ timers/ video4linux/ vm/ ummunotify/ \
> +       watchdog/src/
> diff --git a/Documentation/ummunotify/Makefile b/Documentation/ummunotify/Makefile
> new file mode 100644
> index 0000000..89f31a0
> --- /dev/null
> +++ b/Documentation/ummunotify/Makefile
> @@ -0,0 +1,7 @@
> +# List of programs to build
> +hostprogs-y := umn-test
> +
> +# Tell kbuild to always build the programs
> +always := $(hostprogs-y)
> +
> +HOSTCFLAGS_umn-test.o += -I$(objtree)/usr/include
> diff --git a/Documentation/ummunotify/ummunotify.txt b/Documentation/ummunotify/ummunotify.txt
> new file mode 100644
> index 0000000..d6c2ccc
> --- /dev/null
> +++ b/Documentation/ummunotify/ummunotify.txt
> @@ -0,0 +1,162 @@
> +UMMUNOTIFY
> +
> +  Ummunotify relays MMU notifier events to userspace.  This is useful
> +  for libraries that need to track the memory mapping of applications;
> +  for example, MPI implementations using RDMA want to cache memory
> +  registrations for performance, but tracking all possible crazy cases
> +  such as when, say, the FORTRAN runtime frees memory is impossible
> +  without kernel help.
> +
> +Basic Model
> +
> +  A userspace process uses it by opening /dev/ummunotify, which
> +  returns a file descriptor.  Interest in address ranges is registered
> +  using ioctl() and MMU notifier events are retrieved using read(), as
> +  described in more detail below.  Userspace can register multiple
> +  address ranges to watch, and can unregister individual ranges.
> +
> +  Userspace can also mmap() a single read-only page at offset 0 on
> +  this file descriptor.  This page contains (at offest 0) a single
> +  64-bit generation counter that the kernel increments each time an
> +  MMU notifier event occurs.  Userspace can use this to very quickly
> +  check if there are any events to retrieve without needing to do a
> +  system call.
> +
> +Control
> +
> +  To start using ummunotify, a process opens /dev/ummunotify in
> +  read-only mode.  This will attach to current->mm because the current
> +  consumers of this functionality do all monitoring in the process
> +  being monitored.  It is currently not possible to use this device to
> +  monitor other processes.  Control from userspace is done via ioctl().
> +  An ioctl was chosen because the number of files required to register
> +  a new address range in sysfs would be unwieldy and new procfs entries
> +  are discouraged.  The defined ioctls are:
> +
> +    UMMUNOTIFY_EXCHANGE_FEATURES: This ioctl takes a single 32-bit
> +      word of feature flags as input, and the kernel updates the
> +      features flags word to contain only features requested by
> +      userspace and also supported by the kernel.
> +
> +      This ioctl is only included for forward compatibility; no
> +      feature flags are currently defined, and the kernel will simply
> +      update any requested feature mask to 0.  The kernel will always
> +      default to a feature mask of 0 if this ioctl is not used, so
> +      current userspace does not need to perform this ioctl.
> +
> +    UMMUNOTIFY_REGISTER_REGION: Userspace uses this ioctl to tell the
> +      kernel to start delivering events for an address range.  The
> +      range is described using struct ummunotify_register_ioctl:
> +
> +       struct ummunotify_register_ioctl {
> +               __u64   start;
> +               __u64   end;
> +               __u64   user_cookie;
> +               __u32   flags;
> +               __u32   reserved;
> +       };
> +
> +      start and end give the range of userspace virtual addresses;
> +      start is included in the range and end is not, so an example of
> +      a 4 KB range would be start=0x1000, end=0x2000.
> +
> +      user_cookie is an opaque 64-bit quantity that is returned by the
> +      kernel in events involving the range, and used by userspace to
> +      stop watching the range.  Each registered address range must
> +      have a distinct user_cookie.
> +
> +      It is fine with the kernel if userspace registers multiple
> +      overlapping or even duplicate address ranges, as long as a
> +      different cookie is used for each registration.
> +
> +      flags and reserved are included for forward compatibility;
> +      userspace should simply set them to 0 for the current interface.
> +
> +    UMMUNOTIFY_UNREGISTER_REGION: Userspace passes in the 64-bit
> +      user_cookie used to register a range to tell the kernel to stop
> +      watching an address range.  Once this ioctl completes, the
> +      kernel will not deliver any further events for the range that is
> +      unregistered.
> +
> +Events
> +
> +  When an event occurs that invalidates some of a process's memory
> +  mapping in an address range being watched, ummunotify queues an
> +  event report for that address range.  If more than one event
> +  invalidates parts of the same address range before userspace
> +  retrieves the queued report, then further reports for the same range
> +  will not be queued -- when userspace does read the queue, only a
> +  single report for a given range will be returned.
> +
> +  If multiple ranges being watched are invalidated by a single event
> +  (which is especially likely if userspace registers overlapping
> +  ranges), then an event report structure will be queued for each
> +  address range registration.
> +
> +  It is possible, if a large enough number of overlapping ranges are
> +  registered and the list of invalidated events is busy enough and
> +  ignored long enough, to cause the kernel to run out of memory.
> +  Because this situation is unlikely to occur, the event queue size
> +  is not bounded in order to avoid dropping events if the queue grows
> +  beyond set bounds.
> +
> +  Userspace retrieves queued events via read() on the ummunotify file
> +  descriptor; a buffer that is at least as big as struct
> +  ummunotify_event should be used to retrieve event reports, and if a
> +  larger buffer is passed to read(), multiple reports will be returned
> +  (if available).
> +
> +  If the ummunotify file descriptor is in blocking mode, a read() call
> +  will wait for an event report to be available.  Userspace may also
> +  set the ummunotify file descriptor to non-blocking mode and use all
> +  standard ways of waiting for data to be available on the ummunotify
> +  file descriptor, including epoll/poll()/select() and SIGIO.
> +
> +  The format of event reports is:
> +
> +       struct ummunotify_event {
> +               __u32   type;
> +               __u32   flags;
> +               __u64   hint_start;
> +               __u64   hint_end;
> +               __u64   user_cookie_counter;
> +       };
> +
> +  where the type field is either UMMUNOTIFY_EVENT_TYPE_INVAL or
> +  UMMUNOTIFY_EVENT_TYPE_LAST.  Events of type INVAL describe
> +  invalidation events as follows: user_cookie_counter contains the
> +  cookie passed in when userspace registered the range that the event
> +  is for.  hint_start and hint_end contain the start address and end
> +  address that were invalidated.
> +
> +  The flags word contains bit flags, with only UMMUNOTIFY_EVENT_FLAG_HINT
> +  defined at the moment.  If HINT is set, then the invalidation event
> +  invalidated less than the full address range and the kernel returns
> +  the exact range invalidated; if HINT is not sent then hint_start and
> +  hint_end are set to the original range registered by userspace.
> +  (HINT will not be set if, for example, multiple events invalidated
> +  disjoint parts of the range and so a single start/end pair cannot
> +  represent the parts of the range that were invalidated)
> +
> +  If the event type is LAST, then the read operation has emptied the
> +  list of invalidated regions, and the flags, hint_start and hint_end
> +  fields are not used.  user_cookie_counter holds the value of the
> +  kernel's generation counter (see below of more details) when the
> +  empty list occurred.
> +
> +Generation Count
> +
> +  Userspace may mmap() a page on a ummunotify file descriptor via
> +
> +       mmap(NULL, sizeof (__u64), PROT_READ, MAP_SHARED, ummunotify_fd, 0);
> +
> +  to get a read-only mapping of the kernel's 64-bit generation
> +  counter.  The kernel will increment this generation counter each
> +  time an event report is queued.
> +
> +  Userspace can use the generation counter as a quick check to avoid
> +  system calls; if the value read from the mapped kernel counter is
> +  still equal to the value returned in user_cookie_counter for the
> +  most recent LAST event retrieved, then no further events have been
> +  queued and there is no need to try a read() on the ummunotify file
> +  descriptor.
> diff --git a/Documentation/ummunotify/umn-test.c b/Documentation/ummunotify/umn-test.c
> new file mode 100644
> index 0000000..143db2c
> --- /dev/null
> +++ b/Documentation/ummunotify/umn-test.c
> @@ -0,0 +1,200 @@
> +/*
> + * Copyright (c) 2009 Cisco Systems.  All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#include <stdint.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +
> +#include <linux/ummunotify.h>
> +
> +#include <sys/mman.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <sys/ioctl.h>
> +
> +#define UMN_TEST_COOKIE 123
> +
> +static int             umn_fd;
> +static volatile __u64  *umn_counter;
> +
> +static int umn_init(void)
> +{
> +       __u32 flags;
> +
> +       umn_fd = open("/dev/ummunotify", O_RDONLY);
> +       if (umn_fd < 0) {
> +               perror("open");
> +               return 1;
> +       }
> +
> +       if (ioctl(umn_fd, UMMUNOTIFY_EXCHANGE_FEATURES, &flags)) {
> +               perror("exchange ioctl");
> +               return 1;
> +       }
> +
> +       printf("kernel feature flags: 0x%08x\n", flags);
> +
> +       umn_counter = mmap(NULL, sizeof *umn_counter, PROT_READ,
> +                          MAP_SHARED, umn_fd, 0);
> +       if (umn_counter == MAP_FAILED) {
> +               perror("mmap");
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +static int umn_register(void *buf, size_t size, __u64 cookie)
> +{
> +       struct ummunotify_register_ioctl r = {
> +               .start          = (unsigned long) buf,
> +               .end            = (unsigned long) buf + size,
> +               .user_cookie    = cookie,
> +       };
> +
> +       if (ioctl(umn_fd, UMMUNOTIFY_REGISTER_REGION, &r)) {
> +               perror("register ioctl");
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +static int umn_unregister(__u64 cookie)
> +{
> +       if (ioctl(umn_fd, UMMUNOTIFY_UNREGISTER_REGION, &cookie)) {
> +               perror("unregister ioctl");
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +       int                     page_size;
> +       __u64                   old_counter;
> +       void                   *t;
> +       int                     got_it;
> +
> +       if (umn_init())
> +               return 1;
> +
> +       printf("\n");
> +
> +       old_counter = *umn_counter;
> +       if (old_counter != 0) {
> +               fprintf(stderr, "counter = %lld (expected 0)\n", old_counter);
> +               return 1;
> +       }
> +
> +       page_size = sysconf(_SC_PAGESIZE);
> +       t = mmap(NULL, 3 * page_size, PROT_READ,
> +                MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
> +
> +       if (umn_register(t, 3 * page_size, UMN_TEST_COOKIE))
> +               return 1;
> +
> +       munmap(t + page_size, page_size);
> +
> +       old_counter = *umn_counter;
> +       if (old_counter != 1) {
> +               fprintf(stderr, "counter = %lld (expected 1)\n", old_counter);
> +               return 1;
> +       }
> +
> +       got_it = 0;
> +       while (1) {
> +               struct ummunotify_event ev;
> +               int                     len;
> +
> +               len = read(umn_fd, &ev, sizeof ev);
> +               if (len < 0) {
> +                       perror("read event");
> +                       return 1;
> +               }
> +               if (len != sizeof ev) {
> +                       fprintf(stderr, "Read gave %d bytes (!= event size %zd)\n",
> +                               len, sizeof ev);
> +                       return 1;
> +               }
> +
> +               switch (ev.type) {
> +               case UMMUNOTIFY_EVENT_TYPE_INVAL:
> +                       if (got_it) {
> +                               fprintf(stderr, "Extra invalidate event\n");
> +                               return 1;
> +                       }
> +                       if (ev.user_cookie_counter != UMN_TEST_COOKIE) {
> +                               fprintf(stderr, "Invalidate event for cookie %lld (expected %d)\n",
> +                                       ev.user_cookie_counter,
> +                                       UMN_TEST_COOKIE);
> +                               return 1;
> +                       }
> +
> +                       printf("Invalidate event:\tcookie %lld\n",
> +                              ev.user_cookie_counter);
> +
> +                       if (!(ev.flags & UMMUNOTIFY_EVENT_FLAG_HINT)) {
> +                               fprintf(stderr, "Hint flag not set\n");
> +                               return 1;
> +                       }
> +
> +                       if (ev.hint_start != (uintptr_t) t + page_size ||
> +                           ev.hint_end != (uintptr_t) t + page_size * 2) {
> +                               fprintf(stderr, "Got hint %llx..%llx, expected %p..%p\n",
> +                                       ev.hint_start, ev.hint_end,
> +                                       t + page_size, t + page_size * 2);
> +                               return 1;
> +                       }
> +
> +                       printf("\t\t\thint %llx...%llx\n",
> +                              ev.hint_start, ev.hint_end);
> +
> +                       got_it = 1;
> +                       break;
> +
> +               case UMMUNOTIFY_EVENT_TYPE_LAST:
> +                       if (!got_it) {
> +                               fprintf(stderr, "Last event without invalidate event\n");
> +                               return 1;
> +                       }
> +
> +                       printf("Empty event:\t\tcounter %lld\n",
> +                              ev.user_cookie_counter);
> +                       goto done;
> +
> +               default:
> +                       fprintf(stderr, "unknown event type %d\n",
> +                               ev.type);
> +                       return 1;
> +               }
> +       }
> +
> +done:
> +       umn_unregister(123);
> +       munmap(t, page_size);
> +
> +       old_counter = *umn_counter;
> +       if (old_counter != 1) {
> +               fprintf(stderr, "counter = %lld (expected 1)\n", old_counter);
> +               return 1;
> +       }
> +
> +       return 0;
> +}
> diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
> index 3141dd3..cf26019 100644
> --- a/drivers/char/Kconfig
> +++ b/drivers/char/Kconfig
> @@ -1111,6 +1111,18 @@ config DEVPORT
>         depends on ISA || PCI
>         default y
> 
> +config UMMUNOTIFY
> +       tristate "Userspace MMU notifications"
> +       select MMU_NOTIFIER
> +       help
> +         The ummunotify (userspace MMU notification) driver creates a
> +         character device that can be used by userspace libraries to
> +         get notifications when an application's memory mapping
> +         changed.  This is used, for example, by RDMA libraries to
> +         improve the reliability of memory registration caching, since
> +         the kernel's MMU notifications can be used to know precisely
> +         when to shoot down a cached registration.
> +
>  source "drivers/s390/char/Kconfig"
> 
>  endmenu
> diff --git a/drivers/char/Makefile b/drivers/char/Makefile
> index f957edf..521e5de 100644
> --- a/drivers/char/Makefile
> +++ b/drivers/char/Makefile
> @@ -97,6 +97,7 @@ obj-$(CONFIG_NSC_GPIO)                += nsc_gpio.o
>  obj-$(CONFIG_CS5535_GPIO)      += cs5535_gpio.o
>  obj-$(CONFIG_GPIO_TB0219)      += tb0219.o
>  obj-$(CONFIG_TELCLOCK)         += tlclk.o
> +obj-$(CONFIG_UMMUNOTIFY)       += ummunotify.o
> 
>  obj-$(CONFIG_MWAVE)            += mwave/
>  obj-$(CONFIG_AGP)              += agp/
> diff --git a/drivers/char/ummunotify.c b/drivers/char/ummunotify.c
> new file mode 100644
> index 0000000..c14df3f
> --- /dev/null
> +++ b/drivers/char/ummunotify.c
> @@ -0,0 +1,567 @@
> +/*
> + * Copyright (c) 2009 Cisco Systems.  All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#include <linux/fs.h>
> +#include <linux/init.h>
> +#include <linux/list.h>
> +#include <linux/miscdevice.h>
> +#include <linux/mm.h>
> +#include <linux/mmu_notifier.h>
> +#include <linux/module.h>
> +#include <linux/poll.h>
> +#include <linux/rbtree.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/uaccess.h>
> +#include <linux/ummunotify.h>
> +
> +#include <asm/cacheflush.h>
> +
> +MODULE_AUTHOR("Roland Dreier");
> +MODULE_DESCRIPTION("Userspace MMU notifiers");
> +MODULE_LICENSE("GPL v2");
> +
> +/*
> + * Information about an address range userspace has asked us to watch.
> + *
> + * user_cookie: Opaque cookie given to us when userspace registers the
> + *   address range.
> + *
> + * start, end: Address range; start is inclusive, end is exclusive.
> + *
> + * hint_start, hint_end: If a single MMU notification event
> + *   invalidates the address range, we hold the actual range of
> + *   addresses that were invalidated (and set UMMUNOTIFY_FLAG_HINT).
> + *   If another event hits this range before userspace reads the
> + *   event, we give up and don't try to keep track of which subsets
> + *   got invalidated.
> + *
> + * flags: Holds the INVALID flag for ranges that are on the invalid
> + *   list and/or the HINT flag for ranges where the hint range holds
> + *   good information.
> + *
> + * node: Used to put the range into an rbtree we use to be able to
> + *   scan address ranges in order.
> + *
> + * list: Used to put the range on the invalid list when an MMU
> + *   notification event hits the range.
> + */
> +enum {
> +       UMMUNOTIFY_FLAG_INVALID = 1,
> +       UMMUNOTIFY_FLAG_HINT    = 2,
> +};
> +
> +struct ummunotify_reg {
> +       u64                     user_cookie;
> +       unsigned long           start;
> +       unsigned long           end;
> +       unsigned long           hint_start;
> +       unsigned long           hint_end;
> +       unsigned long           flags;
> +       struct rb_node          node;
> +       struct list_head        list;
> +};
> +
> +/*
> + * Context attached to each file that userspace opens.
> + *
> + * mmu_notifier: MMU notifier registered for this context.
> + *
> + * mm: mm_struct for process that created the context; we use this to
> + *   hold a reference to the mm to make sure it doesn't go away until
> + *   we're done with it.
> + *
> + * reg_tree: RB tree of address ranges being watched, sorted by start
> + *   address.
> + *
> + * invalid_list: List of address ranges that have been invalidated by
> + *   MMU notification events; as userspace reads events, the address
> + *   range corresponding to the event is removed from the list.
> + *
> + * counter: Page that can be mapped read-only by userspace, which
> + *   holds a generation count that is incremented each time an event
> + *   occurs.
> + *
> + * lock: Spinlock used to protect all context.
> + *
> + * read_wait: Wait queue used to wait for data to become available in
> + *   blocking read()s.
> + *
> + * async_queue: Used to implement fasync().
> + *
> + * need_empty: Set when userspace reads an invalidation event, so that
> + *   read() knows it must generate an "empty" event when userspace
> + *   drains the invalid_list.
> + *
> + * used: Set after userspace does anything with the file, so that the
> + *   "exchange flags" ioctl() knows it's too late to change anything.
> + */
> +struct ummunotify_file {
> +       struct mmu_notifier     mmu_notifier;
> +       struct mm_struct       *mm;
> +       struct rb_root          reg_tree;
> +       struct list_head        invalid_list;
> +       u64                    *counter;
> +       spinlock_t              lock;
> +       wait_queue_head_t       read_wait;
> +       struct fasync_struct   *async_queue;
> +       int                     need_empty;
> +       int                     used;
> +};
> +
> +static void ummunotify_handle_notify(struct mmu_notifier *mn,
> +                                    unsigned long start, unsigned long end)
> +{
> +       struct ummunotify_file *priv =
> +               container_of(mn, struct ummunotify_file, mmu_notifier);
> +       struct rb_node *n;
> +       struct ummunotify_reg *reg;
> +       unsigned long flags;
> +       int hit = 0;
> +
> +       spin_lock_irqsave(&priv->lock, flags);
> +
> +       for (n = rb_first(&priv->reg_tree); n; n = rb_next(n)) {
> +               reg = rb_entry(n, struct ummunotify_reg, node);
> +
> +               /*
> +                * Ranges overlap if they're not disjoint; and they're
> +                * disjoint if the end of one is before the start of
> +                * the other one.  So if both disjointness comparisons
> +                * fail then the ranges overlap.
> +                *
> +                * Since we keep the tree of regions we're watching
> +                * sorted by start address, we can end this loop as
> +                * soon as we hit a region that starts past the end of
> +                * the range for the event we're handling.
> +                */
> +               if (reg->start >= end)
> +                       break;
> +
> +               /*
> +                * Just go to the next region if the start of the
> +                * range is after the end of the region -- there
> +                * might still be more overlapping ranges that have a
> +                * greater start.
> +                */
> +               if (start >= reg->end)
> +                       continue;
> +
> +               hit = 1;
> +
> +               if (test_and_set_bit(UMMUNOTIFY_FLAG_INVALID, &reg->flags)) {
> +                       /* Already on invalid list */
> +                       clear_bit(UMMUNOTIFY_FLAG_HINT, &reg->flags);
> +               } else {
> +                       list_add_tail(&reg->list, &priv->invalid_list);
> +                       set_bit(UMMUNOTIFY_FLAG_HINT, &reg->flags);
> +                       reg->hint_start = start;
> +                       reg->hint_end   = end;
> +               }
> +       }
> +
> +       if (hit) {
> +               ++(*priv->counter);
> +               flush_dcache_page(virt_to_page(priv->counter));
> +               wake_up_interruptible(&priv->read_wait);
> +               kill_fasync(&priv->async_queue, SIGIO, POLL_IN);
> +       }
> +
> +       spin_unlock_irqrestore(&priv->lock, flags);
> +}
> +
> +static void ummunotify_invalidate_page(struct mmu_notifier *mn,
> +                                      struct mm_struct *mm,
> +                                      unsigned long addr)
> +{
> +       ummunotify_handle_notify(mn, addr, addr + PAGE_SIZE);
> +}
> +
> +static void ummunotify_invalidate_range_start(struct mmu_notifier *mn,
> +                                             struct mm_struct *mm,
> +                                             unsigned long start,
> +                                             unsigned long end)
> +{
> +       ummunotify_handle_notify(mn, start, end);
> +}
> +
> +static const struct mmu_notifier_ops ummunotify_mmu_notifier_ops = {
> +       .invalidate_page        = ummunotify_invalidate_page,
> +       .invalidate_range_start = ummunotify_invalidate_range_start,
> +};
> +
> +static int ummunotify_open(struct inode *inode, struct file *filp)
> +{
> +       struct ummunotify_file *priv;
> +       int ret;
> +
> +       if (filp->f_mode & FMODE_WRITE)
> +               return -EINVAL;
> +
> +       priv = kmalloc(sizeof *priv, GFP_KERNEL);
> +       if (!priv)
> +               return -ENOMEM;
> +
> +       priv->counter = (void *) get_zeroed_page(GFP_KERNEL);
> +       if (!priv->counter) {
> +               ret = -ENOMEM;
> +               goto err;
> +       }
> +
> +       priv->reg_tree = RB_ROOT;
> +       INIT_LIST_HEAD(&priv->invalid_list);
> +       spin_lock_init(&priv->lock);
> +       init_waitqueue_head(&priv->read_wait);
> +       priv->async_queue = NULL;
> +       priv->need_empty  = 0;
> +       priv->used        = 0;
> +
> +       priv->mmu_notifier.ops = &ummunotify_mmu_notifier_ops;
> +       /*
> +        * Register notifier last, since notifications can occur as
> +        * soon as we register....
> +        */
> +       ret = mmu_notifier_register(&priv->mmu_notifier, current->mm);
> +       if (ret)
> +               goto err_page;
> +
> +       priv->mm = current->mm;
> +       atomic_inc(&priv->mm->mm_count);
> +
> +       filp->private_data = priv;
> +
> +       return 0;
> +
> +err_page:
> +       free_page((unsigned long) priv->counter);
> +
> +err:
> +       kfree(priv);
> +       return ret;
> +}
> +
> +static int ummunotify_close(struct inode *inode, struct file *filp)
> +{
> +       struct ummunotify_file *priv = filp->private_data;
> +       struct rb_node *n;
> +       struct ummunotify_reg *reg;
> +
> +       mmu_notifier_unregister(&priv->mmu_notifier, priv->mm);
> +       mmdrop(priv->mm);
> +       free_page((unsigned long) priv->counter);
> +
> +       for (n = rb_first(&priv->reg_tree); n; n = rb_next(n)) {
> +               reg = rb_entry(n, struct ummunotify_reg, node);
> +               kfree(reg);
> +       }
> +
> +       kfree(priv);
> +
> +       return 0;
> +}
> +
> +static bool ummunotify_readable(struct ummunotify_file *priv)
> +{
> +       return priv->need_empty || !list_empty(&priv->invalid_list);
> +}
> +
> +static ssize_t ummunotify_read(struct file *filp, char __user *buf,
> +                              size_t count, loff_t *pos)
> +{
> +       struct ummunotify_file *priv = filp->private_data;
> +       struct ummunotify_reg *reg;
> +       ssize_t ret;
> +       struct ummunotify_event *events;
> +       int max;
> +       int n;
> +
> +       priv->used = 1;
> +
> +       events = (void *) get_zeroed_page(GFP_KERNEL);
> +       if (!events) {
> +               ret = -ENOMEM;
> +               goto out;
> +       }
> +
> +       spin_lock_irq(&priv->lock);
> +
> +       while (!ummunotify_readable(priv)) {
> +               spin_unlock_irq(&priv->lock);
> +
> +               if (filp->f_flags & O_NONBLOCK) {
> +                       ret = -EAGAIN;
> +                       goto out;
> +               }
> +
> +               if (wait_event_interruptible(priv->read_wait,
> +                                            ummunotify_readable(priv))) {
> +                       ret = -ERESTARTSYS;
> +                       goto out;
> +               }
> +
> +               spin_lock_irq(&priv->lock);
> +       }
> +
> +       max = min_t(size_t, PAGE_SIZE, count) / sizeof *events;
> +
> +       for (n = 0; n < max; ++n) {
> +               if (list_empty(&priv->invalid_list)) {
> +                       events[n].type = UMMUNOTIFY_EVENT_TYPE_LAST;
> +                       events[n].user_cookie_counter = *priv->counter;
> +                       ++n;
> +                       priv->need_empty = 0;
> +                       break;
> +               }
> +
> +               reg = list_first_entry(&priv->invalid_list,
> +                                      struct ummunotify_reg, list);
> +
> +               events[n].type = UMMUNOTIFY_EVENT_TYPE_INVAL;
> +               if (test_bit(UMMUNOTIFY_FLAG_HINT, &reg->flags)) {
> +                       events[n].flags      = UMMUNOTIFY_EVENT_FLAG_HINT;
> +                       events[n].hint_start = max(reg->start, reg->hint_start);
> +                       events[n].hint_end   = min(reg->end, reg->hint_end);
> +               } else {
> +                       events[n].hint_start = reg->start;
> +                       events[n].hint_end   = reg->end;
> +               }
> +               events[n].user_cookie_counter = reg->user_cookie;
> +
> +               list_del(&reg->list);
> +               reg->flags = 0;
> +               priv->need_empty = 1;
> +       }
> +
> +       spin_unlock_irq(&priv->lock);
> +
> +       if (copy_to_user(buf, events, n * sizeof *events))
> +               ret = -EFAULT;
> +       else
> +               ret = n * sizeof *events;
> +
> +out:
> +       free_page((unsigned long) events);
> +       return ret;
> +}
> +
> +static unsigned int ummunotify_poll(struct file *filp,
> +                                   struct poll_table_struct *wait)
> +{
> +       struct ummunotify_file *priv = filp->private_data;
> +
> +       poll_wait(filp, &priv->read_wait, wait);
> +
> +       return ummunotify_readable(priv) ? (POLLIN | POLLRDNORM) : 0;
> +}
> +
> +static long ummunotify_exchange_features(struct ummunotify_file *priv,
> +                                        __u32 __user *arg)
> +{
> +       u32 feature_mask;
> +
> +       if (priv->used)
> +               return -EINVAL;
> +
> +       priv->used = 1;
> +
> +       if (copy_from_user(&feature_mask, arg, sizeof(feature_mask)))
> +               return -EFAULT;
> +
> +       /* No extensions defined at present. */
> +       feature_mask = 0;
> +
> +       if (copy_to_user(arg, &feature_mask, sizeof(feature_mask)))
> +               return -EFAULT;
> +
> +       return 0;
> +}
> +
> +static long ummunotify_register_region(struct ummunotify_file *priv,
> +                                      void __user *arg)
> +{
> +       struct ummunotify_register_ioctl parm;
> +       struct ummunotify_reg *reg, *treg;
> +       struct rb_node **n = &priv->reg_tree.rb_node;
> +       struct rb_node *pn;
> +       int ret = 0;
> +
> +       if (copy_from_user(&parm, arg, sizeof parm))
> +               return -EFAULT;
> +
> +       priv->used = 1;
> +
> +       reg = kmalloc(sizeof *reg, GFP_KERNEL);
> +       if (!reg)
> +               return -ENOMEM;
> +
> +       reg->user_cookie        = parm.user_cookie;
> +       reg->start              = parm.start;
> +       reg->end                = parm.end;
> +       reg->flags              = 0;
> +
> +       spin_lock_irq(&priv->lock);
> +
> +       for (pn = rb_first(&priv->reg_tree); pn; pn = rb_next(pn)) {
> +               treg = rb_entry(pn, struct ummunotify_reg, node);
> +
> +               if (treg->user_cookie == parm.user_cookie) {
> +                       kfree(reg);
> +                       ret = -EINVAL;
> +                       goto out;
> +               }
> +       }
> +
> +       pn = NULL;
> +       while (*n) {
> +               pn = *n;
> +               treg = rb_entry(pn, struct ummunotify_reg, node);
> +
> +               if (reg->start <= treg->start)
> +                       n = &pn->rb_left;
> +               else
> +                       n = &pn->rb_right;
> +       }
> +
> +       rb_link_node(&reg->node, pn, n);
> +       rb_insert_color(&reg->node, &priv->reg_tree);
> +
> +out:
> +       spin_unlock_irq(&priv->lock);
> +
> +       return ret;
> +}
> +
> +static long ummunotify_unregister_region(struct ummunotify_file *priv,
> +                                        __u64 __user *arg)
> +{
> +       u64 user_cookie;
> +       struct rb_node *n;
> +       struct ummunotify_reg *reg;
> +       int ret = -EINVAL;
> +
> +       if (copy_from_user(&user_cookie, arg, sizeof(user_cookie)))
> +               return -EFAULT;
> +
> +       spin_lock_irq(&priv->lock);
> +
> +       for (n = rb_first(&priv->reg_tree); n; n = rb_next(n)) {
> +               reg = rb_entry(n, struct ummunotify_reg, node);
> +
> +               if (reg->user_cookie == user_cookie) {
> +                       rb_erase(n, &priv->reg_tree);
> +                       if (test_bit(UMMUNOTIFY_FLAG_INVALID, &reg->flags))
> +                               list_del(&reg->list);
> +                       kfree(reg);
> +                       ret = 0;
> +                       break;
> +               }
> +       }
> +
> +       spin_unlock_irq(&priv->lock);
> +
> +       return ret;
> +}
> +
> +static long ummunotify_ioctl(struct file *filp, unsigned int cmd,
> +                            unsigned long arg)
> +{
> +       struct ummunotify_file *priv = filp->private_data;
> +       void __user *argp = (void __user *) arg;
> +
> +       switch (cmd) {
> +       case UMMUNOTIFY_EXCHANGE_FEATURES:
> +               return ummunotify_exchange_features(priv, argp);
> +       case UMMUNOTIFY_REGISTER_REGION:
> +               return ummunotify_register_region(priv, argp);
> +       case UMMUNOTIFY_UNREGISTER_REGION:
> +               return ummunotify_unregister_region(priv, argp);
> +       default:
> +               return -ENOIOCTLCMD;
> +       }
> +}
> +
> +static int ummunotify_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> +       struct ummunotify_file *priv = vma->vm_private_data;
> +
> +       if (vmf->pgoff != 0)
> +               return VM_FAULT_SIGBUS;
> +
> +       vmf->page = virt_to_page(priv->counter);
> +       get_page(vmf->page);
> +
> +       return 0;
> +
> +}
> +
> +static struct vm_operations_struct ummunotify_vm_ops = {
> +       .fault          = ummunotify_fault,
> +};
> +
> +static int ummunotify_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> +       struct ummunotify_file *priv = filp->private_data;
> +
> +       if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff != 0)
> +               return -EINVAL;
> +
> +       vma->vm_ops             = &ummunotify_vm_ops;
> +       vma->vm_private_data    = priv;
> +
> +       return 0;
> +}
> +
> +static int ummunotify_fasync(int fd, struct file *filp, int on)
> +{
> +       struct ummunotify_file *priv = filp->private_data;
> +
> +       return fasync_helper(fd, filp, on, &priv->async_queue);
> +}
> +
> +static const struct file_operations ummunotify_fops = {
> +       .owner          = THIS_MODULE,
> +       .open           = ummunotify_open,
> +       .release        = ummunotify_close,
> +       .read           = ummunotify_read,
> +       .poll           = ummunotify_poll,
> +       .unlocked_ioctl = ummunotify_ioctl,
> +#ifdef CONFIG_COMPAT
> +       .compat_ioctl   = ummunotify_ioctl,
> +#endif
> +       .mmap           = ummunotify_mmap,
> +       .fasync         = ummunotify_fasync,
> +};
> +
> +static struct miscdevice ummunotify_misc = {
> +       .minor  = MISC_DYNAMIC_MINOR,
> +       .name   = "ummunotify",
> +       .fops   = &ummunotify_fops,
> +};
> +
> +static int __init ummunotify_init(void)
> +{
> +       return misc_register(&ummunotify_misc);
> +}
> +
> +static void __exit ummunotify_cleanup(void)
> +{
> +       misc_deregister(&ummunotify_misc);
> +}
> +
> +module_init(ummunotify_init);
> +module_exit(ummunotify_cleanup);
> diff --git a/include/linux/Kbuild b/include/linux/Kbuild
> index e2ea0b2..e086b39 100644
> --- a/include/linux/Kbuild
> +++ b/include/linux/Kbuild
> @@ -163,6 +163,7 @@ header-y += tipc_config.h
>  header-y += toshiba.h
>  header-y += udf_fs_i.h
>  header-y += ultrasound.h
> +header-y += ummunotify.h
>  header-y += un.h
>  header-y += utime.h
>  header-y += veth.h
> diff --git a/include/linux/ummunotify.h b/include/linux/ummunotify.h
> new file mode 100644
> index 0000000..21b0d03
> --- /dev/null
> +++ b/include/linux/ummunotify.h
> @@ -0,0 +1,121 @@
> +/*
> + * Copyright (c) 2009 Cisco Systems.  All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#ifndef _LINUX_UMMUNOTIFY_H
> +#define _LINUX_UMMUNOTIFY_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +/*
> + * Ummunotify relays MMU notifier events to userspace.  A userspace
> + * process uses it by opening /dev/ummunotify, which returns a file
> + * descriptor.  Interest in address ranges is registered using ioctl()
> + * and MMU notifier events are retrieved using read(), as described in
> + * more detail below.
> + *
> + * Userspace can also mmap() a single read-only page at offset 0 on
> + * this file descriptor.  This page contains (at offest 0) a single
> + * 64-bit generation counter that the kernel increments each time an
> + * MMU notifier event occurs.  Userspace can use this to very quickly
> + * check if there are any events to retrieve without needing to do a
> + * system call.
> + */
> +
> +/*
> + * struct ummunotify_register_ioctl describes an address range from
> + * start to end (including start but not including end) to be
> + * monitored.  user_cookie is an opaque handle that userspace assigns,
> + * and which is used to unregister.  flags and reserved are currently
> + * unused and should be set to 0 for forward compatibility.
> + */
> +struct ummunotify_register_ioctl {
> +       __u64   start;
> +       __u64   end;
> +       __u64   user_cookie;
> +       __u32   flags;
> +       __u32   reserved;
> +};
> +
> +#define UMMUNOTIFY_MAGIC               'U'
> +
> +/*
> + * Forward compatibility: Userspace passes in a 32-bit feature mask
> + * with feature flags set indicating which extensions it wishes to
> + * use.  The kernel will return a feature mask with the bits of
> + * userspace's mask that the kernel implements; from that point on
> + * both userspace and the kernel should behave as described by the
> + * kernel's feature mask.
> + *
> + * If userspace does not perform a UMMUNOTIFY_EXCHANGE_FEATURES ioctl,
> + * then the kernel will use a feature mask of 0.
> + *
> + * No feature flags are currently defined, so the kernel will always
> + * return a feature mask of 0 at present.
> + */
> +#define UMMUNOTIFY_EXCHANGE_FEATURES   _IOWR(UMMUNOTIFY_MAGIC, 1, __u32)
> +
> +/*
> + * Register interest in an address range; userspace should pass in a
> + * struct ummunotify_register_ioctl describing the region.
> + */
> +#define UMMUNOTIFY_REGISTER_REGION     _IOW(UMMUNOTIFY_MAGIC, 2, \
> +                                            struct ummunotify_register_ioctl)
> +/*
> + * Unregister interest in an address range; userspace should pass in
> + * the user_cookie value that was used to register the address range.
> + * No events for the address range will be reported once it is
> + * unregistered.
> + */
> +#define UMMUNOTIFY_UNREGISTER_REGION   _IOW(UMMUNOTIFY_MAGIC, 3, __u64)
> +
> +/*
> + * Invalidation events are returned whenever the kernel changes the
> + * mapping for a monitored address.  These events are retrieved by
> + * read() on the ummunotify file descriptor, which will fill the
> + * read() buffer with struct ummunotify_event.
> + *
> + * If type field is INVAL, then user_cookie_counter holds the
> + * user_cookie for the region being reported; if the HINT flag is set
> + * then hint_start/hint_end hold the start and end of the mapping that
> + * was invalidated.  (If HINT is not set, then multiple events
> + * invalidated parts of the registered range and hint_start/hint_end
> + * and set to the start/end of the whole registered range)
> + *
> + * If type is LAST, then the read operation has emptied the list of
> + * invalidated regions, and user_cookie_counter holds the value of the
> + * kernel's generation counter when the empty list occurred.  The
> + * other fields are not filled in for this event.
> + */
> +enum {
> +       UMMUNOTIFY_EVENT_TYPE_INVAL     = 0,
> +       UMMUNOTIFY_EVENT_TYPE_LAST      = 1,
> +};
> +
> +enum {
> +       UMMUNOTIFY_EVENT_FLAG_HINT      = 1 << 0,
> +};
> +
> +struct ummunotify_event {
> +       __u32   type;
> +       __u32   flags;
> +       __u64   hint_start;
> +       __u64   hint_end;
> +       __u64   user_cookie_counter;
> +};
> +
> +#endif /* _LINUX_UMMUNOTIFY_H */
> --
> 1.6.3.3
> 
> 


-- 
Jeff Squyres
jsquyres@cisco.com
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox