* [PATCH v3 29/52] IB/qib: Add qib_qsfp.c
From: Ralph Campbell @ 2010-05-07 0:01 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_qsfp.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_qsfp.c | 564 ++++++++++++++++++++++++++++++++++
1 files changed, 564 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_qsfp.c
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.c b/drivers/infiniband/hw/qib/qib_qsfp.c
new file mode 100644
index 0000000..35b3604
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_qsfp.c
@@ -0,0 +1,564 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+#include "qib_qsfp.h"
+
+/*
+ * QSFP support for ib_qib driver, using "Two Wire Serial Interface" driver
+ * in qib_twsi.c
+ */
+#define QSFP_MAX_RETRY 4
+
+static int qsfp_read(struct qib_pportdata *ppd, int addr, void *bp, int len)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u32 out, mask;
+ int ret, cnt, pass = 0;
+ int stuck = 0;
+ u8 *buff = bp;
+
+ ret = mutex_lock_interruptible(&dd->eep_lock);
+ if (ret)
+ goto no_unlock;
+
+ if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) {
+ ret = -ENXIO;
+ goto bail;
+ }
+
+ /*
+ * We presume, if we are called at all, that this board has
+ * QSFP. This is on the same i2c chain as the legacy parts,
+ * but only responds if the module is selected via GPIO pins.
+ * Further, there are very long setup and hold requirements
+ * on MODSEL.
+ */
+ mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ if (ppd->hw_pidx) {
+ mask <<= QSFP_GPIO_PORT2_SHIFT;
+ out <<= QSFP_GPIO_PORT2_SHIFT;
+ }
+
+ dd->f_gpio_mod(dd, out, mask, mask);
+
+ /*
+ * Module could take up to 2 Msec to respond to MOD_SEL, and there
+ * is no way to tell if it is ready, so we must wait.
+ */
+ msleep(2);
+
+ /* Make sure TWSI bus is in sane state. */
+ ret = qib_twsi_reset(dd);
+ if (ret) {
+ qib_dev_porterr(dd, ppd->port,
+ "QSFP interface Reset for read failed\n");
+ ret = -EIO;
+ stuck = 1;
+ goto deselect;
+ }
+
+ /* All QSFP modules are at A0 */
+
+ cnt = 0;
+ while (cnt < len) {
+ unsigned in_page;
+ int wlen = len - cnt;
+ in_page = addr % QSFP_PAGESIZE;
+ if ((in_page + wlen) > QSFP_PAGESIZE)
+ wlen = QSFP_PAGESIZE - in_page;
+ ret = qib_twsi_blk_rd(dd, QSFP_DEV, addr, buff + cnt, wlen);
+ /* Some QSFP's fail first try. Retry as experiment */
+ if (ret && cnt == 0 && ++pass < QSFP_MAX_RETRY)
+ continue;
+ if (ret) {
+ /* qib_twsi_blk_rd() 1 for error, else 0 */
+ ret = -EIO;
+ goto deselect;
+ }
+ addr += wlen;
+ cnt += wlen;
+ }
+ ret = cnt;
+
+deselect:
+ /*
+ * Module could take up to 10 uSec after transfer before
+ * ready to respond to MOD_SEL negation, and there is no way
+ * to tell if it is ready, so we must wait.
+ */
+ udelay(10);
+ /* set QSFP MODSEL, RST. LP all high */
+ dd->f_gpio_mod(dd, mask, mask, mask);
+
+ /*
+ * Module could take up to 2 Msec to respond to MOD_SEL
+ * going away, and there is no way to tell if it is ready.
+ * so we must wait.
+ */
+ if (stuck)
+ qib_dev_err(dd, "QSFP interface bus stuck non-idle\n");
+
+ if (pass >= QSFP_MAX_RETRY && ret)
+ qib_dev_porterr(dd, ppd->port, "QSFP failed even retrying\n");
+ else if (pass)
+ qib_dev_porterr(dd, ppd->port, "QSFP retries: %d\n", pass);
+
+ msleep(2);
+
+bail:
+ mutex_unlock(&dd->eep_lock);
+
+no_unlock:
+ return ret;
+}
+
+/*
+ * qsfp_write
+ * We do not ordinarily write the QSFP, but this is needed to select
+ * the page on non-flat QSFPs, and possibly later unusual cases
+ */
+static int qib_qsfp_write(struct qib_pportdata *ppd, int addr, void *bp,
+ int len)
+{
+ struct qib_devdata *dd = ppd->dd;
+ u32 out, mask;
+ int ret, cnt;
+ u8 *buff = bp;
+
+ ret = mutex_lock_interruptible(&dd->eep_lock);
+ if (ret)
+ goto no_unlock;
+
+ if (dd->twsi_eeprom_dev == QIB_TWSI_NO_DEV) {
+ ret = -ENXIO;
+ goto bail;
+ }
+
+ /*
+ * We presume, if we are called at all, that this board has
+ * QSFP. This is on the same i2c chain as the legacy parts,
+ * but only responds if the module is selected via GPIO pins.
+ * Further, there are very long setup and hold requirements
+ * on MODSEL.
+ */
+ mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ out = QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ if (ppd->hw_pidx) {
+ mask <<= QSFP_GPIO_PORT2_SHIFT;
+ out <<= QSFP_GPIO_PORT2_SHIFT;
+ }
+ dd->f_gpio_mod(dd, out, mask, mask);
+
+ /*
+ * Module could take up to 2 Msec to respond to MOD_SEL,
+ * and there is no way to tell if it is ready, so we must wait.
+ */
+ msleep(2);
+
+ /* Make sure TWSI bus is in sane state. */
+ ret = qib_twsi_reset(dd);
+ if (ret) {
+ qib_dev_porterr(dd, ppd->port,
+ "QSFP interface Reset for write failed\n");
+ ret = -EIO;
+ goto deselect;
+ }
+
+ /* All QSFP modules are at A0 */
+
+ cnt = 0;
+ while (cnt < len) {
+ unsigned in_page;
+ int wlen = len - cnt;
+ in_page = addr % QSFP_PAGESIZE;
+ if ((in_page + wlen) > QSFP_PAGESIZE)
+ wlen = QSFP_PAGESIZE - in_page;
+ ret = qib_twsi_blk_wr(dd, QSFP_DEV, addr, buff + cnt, wlen);
+ if (ret) {
+ /* qib_twsi_blk_wr() 1 for error, else 0 */
+ ret = -EIO;
+ goto deselect;
+ }
+ addr += wlen;
+ cnt += wlen;
+ }
+ ret = cnt;
+
+deselect:
+ /*
+ * Module could take up to 10 uSec after transfer before
+ * ready to respond to MOD_SEL negation, and there is no way
+ * to tell if it is ready, so we must wait.
+ */
+ udelay(10);
+ /* set QSFP MODSEL, RST, LP high */
+ dd->f_gpio_mod(dd, mask, mask, mask);
+ /*
+ * Module could take up to 2 Msec to respond to MOD_SEL
+ * going away, and there is no way to tell if it is ready.
+ * so we must wait.
+ */
+ msleep(2);
+
+bail:
+ mutex_unlock(&dd->eep_lock);
+
+no_unlock:
+ return ret;
+}
+
+/*
+ * For validation, we want to check the checksums, even of the
+ * fields we do not otherwise use. This function reads the bytes from
+ * <first> to <next-1> and returns the 8lsbs of the sum, or <0 for errors
+ */
+static int qsfp_cks(struct qib_pportdata *ppd, int first, int next)
+{
+ int ret;
+ u16 cks;
+ u8 bval;
+
+ cks = 0;
+ while (first < next) {
+ ret = qsfp_read(ppd, first, &bval, 1);
+ if (ret < 0)
+ goto bail;
+ cks += bval;
+ ++first;
+ }
+ ret = cks & 0xFF;
+bail:
+ return ret;
+
+}
+
+int qib_refresh_qsfp_cache(struct qib_pportdata *ppd, struct qib_qsfp_cache *cp)
+{
+ int ret;
+ int idx;
+ u16 cks;
+ u32 mask;
+ u8 peek[4];
+
+ /* ensure sane contents on invalid reads, for cable swaps */
+ memset(cp, 0, sizeof(*cp));
+
+ mask = QSFP_GPIO_MOD_PRS_N;
+ if (ppd->hw_pidx)
+ mask <<= QSFP_GPIO_PORT2_SHIFT;
+
+ ret = ppd->dd->f_gpio_mod(ppd->dd, 0, 0, 0);
+ if (ret & mask) {
+ ret = -ENODEV;
+ goto bail;
+ }
+
+ ret = qsfp_read(ppd, 0, peek, 3);
+ if (ret < 0)
+ goto bail;
+ if ((peek[0] & 0xFE) != 0x0C)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "QSFP byte0 is 0x%02X, S/B 0x0C/D\n", peek[0]);
+
+ if ((peek[2] & 2) == 0) {
+ /*
+ * If cable is paged, rather than "flat memory", we need to
+ * set the page to zero, Even if it already appears to be zero.
+ */
+ u8 poke = 0;
+ ret = qib_qsfp_write(ppd, 127, &poke, 1);
+ udelay(50);
+ if (ret != 1) {
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "Failed QSFP Page set\n");
+ goto bail;
+ }
+ }
+
+ ret = qsfp_read(ppd, QSFP_MOD_ID_OFFS, &cp->id, 1);
+ if (ret < 0)
+ goto bail;
+ if ((cp->id & 0xFE) != 0x0C)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "QSFP ID byte is 0x%02X, S/B 0x0C/D\n", cp->id);
+ cks = cp->id;
+
+ ret = qsfp_read(ppd, QSFP_MOD_PWR_OFFS, &cp->pwr, 1);
+ if (ret < 0)
+ goto bail;
+ cks += cp->pwr;
+
+ ret = qsfp_cks(ppd, QSFP_MOD_PWR_OFFS + 1, QSFP_MOD_LEN_OFFS);
+ if (ret < 0)
+ goto bail;
+ cks += ret;
+
+ ret = qsfp_read(ppd, QSFP_MOD_LEN_OFFS, &cp->len, 1);
+ if (ret < 0)
+ goto bail;
+ cks += cp->len;
+
+ ret = qsfp_read(ppd, QSFP_MOD_TECH_OFFS, &cp->tech, 1);
+ if (ret < 0)
+ goto bail;
+ cks += cp->tech;
+
+ ret = qsfp_read(ppd, QSFP_VEND_OFFS, &cp->vendor, QSFP_VEND_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_VEND_LEN; ++idx)
+ cks += cp->vendor[idx];
+
+ ret = qsfp_read(ppd, QSFP_IBXCV_OFFS, &cp->xt_xcv, 1);
+ if (ret < 0)
+ goto bail;
+ cks += cp->xt_xcv;
+
+ ret = qsfp_read(ppd, QSFP_VOUI_OFFS, &cp->oui, QSFP_VOUI_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_VOUI_LEN; ++idx)
+ cks += cp->oui[idx];
+
+ ret = qsfp_read(ppd, QSFP_PN_OFFS, &cp->partnum, QSFP_PN_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_PN_LEN; ++idx)
+ cks += cp->partnum[idx];
+
+ ret = qsfp_read(ppd, QSFP_REV_OFFS, &cp->rev, QSFP_REV_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_REV_LEN; ++idx)
+ cks += cp->rev[idx];
+
+ ret = qsfp_read(ppd, QSFP_ATTEN_OFFS, &cp->atten, QSFP_ATTEN_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_ATTEN_LEN; ++idx)
+ cks += cp->atten[idx];
+
+ ret = qsfp_cks(ppd, QSFP_ATTEN_OFFS + QSFP_ATTEN_LEN, QSFP_CC_OFFS);
+ if (ret < 0)
+ goto bail;
+ cks += ret;
+
+ cks &= 0xFF;
+ ret = qsfp_read(ppd, QSFP_CC_OFFS, &cp->cks1, 1);
+ if (ret < 0)
+ goto bail;
+ if (cks != cp->cks1)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "QSFP cks1 is %02X, computed %02X\n", cp->cks1,
+ cks);
+
+ /* Second checksum covers 192 to (serial, date, lot) */
+ ret = qsfp_cks(ppd, QSFP_CC_OFFS + 1, QSFP_SN_OFFS);
+ if (ret < 0)
+ goto bail;
+ cks = ret;
+
+ ret = qsfp_read(ppd, QSFP_SN_OFFS, &cp->serial, QSFP_SN_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_SN_LEN; ++idx)
+ cks += cp->serial[idx];
+
+ ret = qsfp_read(ppd, QSFP_DATE_OFFS, &cp->date, QSFP_DATE_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_DATE_LEN; ++idx)
+ cks += cp->date[idx];
+
+ ret = qsfp_read(ppd, QSFP_LOT_OFFS, &cp->lot, QSFP_LOT_LEN);
+ if (ret < 0)
+ goto bail;
+ for (idx = 0; idx < QSFP_LOT_LEN; ++idx)
+ cks += cp->lot[idx];
+
+ ret = qsfp_cks(ppd, QSFP_LOT_OFFS + QSFP_LOT_LEN, QSFP_CC_EXT_OFFS);
+ if (ret < 0)
+ goto bail;
+ cks += ret;
+
+ ret = qsfp_read(ppd, QSFP_CC_EXT_OFFS, &cp->cks2, 1);
+ if (ret < 0)
+ goto bail;
+ cks &= 0xFF;
+ if (cks != cp->cks2)
+ qib_dev_porterr(ppd->dd, ppd->port,
+ "QSFP cks2 is %02X, computed %02X\n", cp->cks2,
+ cks);
+ return 0;
+
+bail:
+ cp->id = 0;
+ return ret;
+}
+
+const char * const qib_qsfp_devtech[16] = {
+ "850nm VCSEL", "1310nm VCSEL", "1550nm VCSEL", "1310nm FP",
+ "1310nm DFB", "1550nm DFB", "1310nm EML", "1550nm EML",
+ "Cu Misc", "1490nm DFB", "Cu NoEq", "Cu Eq",
+ "Undef", "Cu Active BothEq", "Cu FarEq", "Cu NearEq"
+};
+
+#define QSFP_DUMP_CHUNK 16 /* Holds longest string */
+#define QSFP_DEFAULT_HDR_CNT 224
+
+static const char *pwr_codes = "1.5W2.0W2.5W3.5W";
+
+/*
+ * Initialize structures that control access to QSFP. Called once per port
+ * on cards that support QSFP.
+ */
+void qib_qsfp_init(struct qib_qsfp_data *qd,
+ void (*fevent)(struct work_struct *))
+{
+ u32 mask, highs;
+ int pins;
+
+ struct qib_devdata *dd = qd->ppd->dd;
+
+ /* Initialize work struct for later QSFP events */
+ INIT_WORK(&qd->work, fevent);
+
+ /*
+ * Later, we may want more validation. For now, just set up pins and
+ * blip reset. If module is present, call qib_refresh_qsfp_cache(),
+ * to do further init.
+ */
+ mask = QSFP_GPIO_MOD_SEL_N | QSFP_GPIO_MOD_RST_N | QSFP_GPIO_LP_MODE;
+ highs = mask - QSFP_GPIO_MOD_RST_N;
+ if (qd->ppd->hw_pidx) {
+ mask <<= QSFP_GPIO_PORT2_SHIFT;
+ highs <<= QSFP_GPIO_PORT2_SHIFT;
+ }
+ dd->f_gpio_mod(dd, highs, mask, mask);
+ udelay(20); /* Generous RST dwell */
+
+ dd->f_gpio_mod(dd, mask, mask, mask);
+ /* Spec says module can take up to two seconds! */
+ mask = QSFP_GPIO_MOD_PRS_N;
+ if (qd->ppd->hw_pidx)
+ mask <<= QSFP_GPIO_PORT2_SHIFT;
+
+ /* Do not try to wait here. Better to let event handle it */
+ pins = dd->f_gpio_mod(dd, 0, 0, 0);
+ if (pins & mask)
+ goto bail;
+ /* We see a module, but it may be unwise to look yet. Just schedule */
+ qd->t_insert = get_jiffies_64();
+ schedule_work(&qd->work);
+bail:
+ return;
+}
+
+void qib_qsfp_deinit(struct qib_qsfp_data *qd)
+{
+ /*
+ * There is nothing to do here for now. our
+ * work is scheduled with schedule_work(), and
+ * flush_scheduled_work() from remove_one will
+ * block until all work ssetup with schedule_work()
+ * completes.
+ */
+}
+
+int qib_qsfp_dump(struct qib_pportdata *ppd, char *buf, int len)
+{
+ struct qib_qsfp_cache cd;
+ u8 bin_buff[QSFP_DUMP_CHUNK];
+ char lenstr[6];
+ int sofar, ret;
+ int bidx = 0;
+
+ sofar = 0;
+ ret = qib_refresh_qsfp_cache(ppd, &cd);
+ if (ret < 0)
+ goto bail;
+
+ lenstr[0] = ' ';
+ lenstr[1] = '\0';
+ if (QSFP_IS_CU(cd.tech))
+ sprintf(lenstr, "%dM ", cd.len);
+
+ sofar += scnprintf(buf + sofar, len - sofar, "PWR:%.3sW\n", pwr_codes +
+ (QSFP_PWR(cd.pwr) * 4));
+
+ sofar += scnprintf(buf + sofar, len - sofar, "TECH:%s%s\n", lenstr,
+ qib_qsfp_devtech[cd.tech >> 4]);
+
+ sofar += scnprintf(buf + sofar, len - sofar, "Vendor:%.*s\n",
+ QSFP_VEND_LEN, cd.vendor);
+
+ sofar += scnprintf(buf + sofar, len - sofar, "OUI:%06X\n",
+ QSFP_OUI(cd.oui));
+
+ sofar += scnprintf(buf + sofar, len - sofar, "Part#:%.*s\n",
+ QSFP_PN_LEN, cd.partnum);
+ sofar += scnprintf(buf + sofar, len - sofar, "Rev:%.*s\n",
+ QSFP_REV_LEN, cd.rev);
+ if (QSFP_IS_CU(cd.tech))
+ sofar += scnprintf(buf + sofar, len - sofar, "Atten:%d, %d\n",
+ QSFP_ATTEN_SDR(cd.atten),
+ QSFP_ATTEN_DDR(cd.atten));
+ sofar += scnprintf(buf + sofar, len - sofar, "Serial:%.*s\n",
+ QSFP_SN_LEN, cd.serial);
+ sofar += scnprintf(buf + sofar, len - sofar, "Date:%.*s\n",
+ QSFP_DATE_LEN, cd.date);
+ sofar += scnprintf(buf + sofar, len - sofar, "Lot:%.*s\n",
+ QSFP_LOT_LEN, cd.date);
+
+ while (bidx < QSFP_DEFAULT_HDR_CNT) {
+ int iidx;
+ ret = qsfp_read(ppd, bidx, bin_buff, QSFP_DUMP_CHUNK);
+ if (ret < 0)
+ goto bail;
+ for (iidx = 0; iidx < ret; ++iidx) {
+ sofar += scnprintf(buf + sofar, len-sofar, " %02X",
+ bin_buff[iidx]);
+ }
+ sofar += scnprintf(buf + sofar, len - sofar, "\n");
+ bidx += QSFP_DUMP_CHUNK;
+ }
+ ret = sofar;
+bail:
+ return ret;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 30/52] IB/qib: Add qib_qsfp.h
From: Ralph Campbell @ 2010-05-07 0:01 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_qsfp.h file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_qsfp.h | 184 ++++++++++++++++++++++++++++++++++
1 files changed, 184 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_qsfp.h
diff --git a/drivers/infiniband/hw/qib/qib_qsfp.h b/drivers/infiniband/hw/qib/qib_qsfp.h
new file mode 100644
index 0000000..19b527b
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_qsfp.h
@@ -0,0 +1,184 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/* QSFP support common definitions, for ib_qib driver */
+
+#define QSFP_DEV 0xA0
+#define QSFP_PWR_LAG_MSEC 2000
+
+/*
+ * Below are masks for various QSFP signals, for Port 1.
+ * Port2 equivalents are shifted by QSFP_GPIO_PORT2_SHIFT.
+ * _N means asserted low
+ */
+#define QSFP_GPIO_MOD_SEL_N (4)
+#define QSFP_GPIO_MOD_PRS_N (8)
+#define QSFP_GPIO_INT_N (0x10)
+#define QSFP_GPIO_MOD_RST_N (0x20)
+#define QSFP_GPIO_LP_MODE (0x40)
+#define QSFP_GPIO_PORT2_SHIFT 5
+
+#define QSFP_PAGESIZE 128
+/* Defined fields that QLogic requires of qualified cables */
+/* Byte 0 is Identifier, not checked */
+/* Byte 1 is reserved "status MSB" */
+/* Byte 2 is "status LSB" We only care that D2 "Flat Mem" is set. */
+/*
+ * Rest of first 128 not used, although 127 is reserved for page select
+ * if module is not "Flat memory".
+ */
+/* Byte 128 is Identifier: must be 0x0c for QSFP, or 0x0d for QSFP+ */
+#define QSFP_MOD_ID_OFFS 128
+/*
+ * Byte 129 is "Extended Identifier". We only care about D7,D6: Power class
+ * 0:1.5W, 1:2.0W, 2:2.5W, 3:3.5W
+ */
+#define QSFP_MOD_PWR_OFFS 129
+/* Byte 130 is Connector type. Not QLogic req'd */
+/* Bytes 131..138 are Transceiver types, bit maps for various tech, none IB */
+/* Byte 139 is encoding. code 0x01 is 8b10b. Not QLogic req'd */
+/* byte 140 is nominal bit-rate, in units of 100Mbits/sec Not QLogic req'd */
+/* Byte 141 is Extended Rate Select. Not QLogic req'd */
+/* Bytes 142..145 are lengths for various fiber types. Not QLogic req'd */
+/* Byte 146 is length for Copper. Units of 1 meter */
+#define QSFP_MOD_LEN_OFFS 146
+/*
+ * Byte 147 is Device technology. D0..3 not Qlogc req'd
+ * D4..7 select from 15 choices, translated by table:
+ */
+#define QSFP_MOD_TECH_OFFS 147
+extern const char *const qib_qsfp_devtech[16];
+/* Active Equalization includes fiber, copper full EQ, and copper near Eq */
+#define QSFP_IS_ACTIVE(tech) ((0xA2FF >> ((tech) >> 4)) & 1)
+/* Attenuation should be valid for copper other than full/near Eq */
+#define QSFP_HAS_ATTEN(tech) ((0x4D00 >> ((tech) >> 4)) & 1)
+/* Length is only valid if technology is "copper" */
+#define QSFP_IS_CU(tech) ((0xED00 >> ((tech) >> 4)) & 1)
+#define QSFP_TECH_1490 9
+
+#define QSFP_OUI(oui) (((unsigned)oui[0] << 16) | ((unsigned)oui[1] << 8) | \
+ oui[2])
+#define QSFP_OUI_AMPHENOL 0x415048
+#define QSFP_OUI_FINISAR 0x009065
+#define QSFP_OUI_GORE 0x002177
+
+/* Bytes 148..163 are Vendor Name, Left-justified Blank-filled */
+#define QSFP_VEND_OFFS 148
+#define QSFP_VEND_LEN 16
+/* Byte 164 is IB Extended tranceiver codes Bits D0..3 are SDR,DDR,QDR,EDR */
+#define QSFP_IBXCV_OFFS 164
+/* Bytes 165..167 are Vendor OUI number */
+#define QSFP_VOUI_OFFS 165
+#define QSFP_VOUI_LEN 3
+/* Bytes 168..183 are Vendor Part Number, string */
+#define QSFP_PN_OFFS 168
+#define QSFP_PN_LEN 16
+/* Bytes 184,185 are Vendor Rev. Left Justified, Blank-filled */
+#define QSFP_REV_OFFS 184
+#define QSFP_REV_LEN 2
+/*
+ * Bytes 186,187 are Wavelength, if Optical. Not Qlogic req'd
+ * If copper, they are attenuation in dB:
+ * Byte 186 is at 2.5Gb/sec (SDR), Byte 187 at 5.0Gb/sec (DDR)
+ */
+#define QSFP_ATTEN_OFFS 186
+#define QSFP_ATTEN_LEN 2
+/* Bytes 188,189 are Wavelength tolerance, not QLogic req'd */
+/* Byte 190 is Max Case Temp. Not QLogic req'd */
+/* Byte 191 is LSB of sum of bytes 128..190. Not QLogic req'd */
+#define QSFP_CC_OFFS 191
+/* Bytes 192..195 are Options implemented in qsfp. Not Qlogic req'd */
+/* Bytes 196..211 are Serial Number, String */
+#define QSFP_SN_OFFS 196
+#define QSFP_SN_LEN 16
+/* Bytes 212..219 are date-code YYMMDD (MM==1 for Jan) */
+#define QSFP_DATE_OFFS 212
+#define QSFP_DATE_LEN 6
+/* Bytes 218,219 are optional lot-code, string */
+#define QSFP_LOT_OFFS 218
+#define QSFP_LOT_LEN 2
+/* Bytes 220, 221 indicate monitoring options, Not QLogic req'd */
+/* Byte 223 is LSB of sum of bytes 192..222 */
+#define QSFP_CC_EXT_OFFS 223
+
+/*
+ * struct qib_qsfp_data encapsulates state of QSFP device for one port.
+ * it will be part of port-chip-specific data if a board supports QSFP.
+ *
+ * Since multiple board-types use QSFP, and their pport_data structs
+ * differ (in the chip-specific section), we need a pointer to its head.
+ *
+ * Avoiding premature optimization, we will have one work_struct per port,
+ * and let the (increasingly inaccurately named) eep_lock arbitrate
+ * access to common resources.
+ *
+ */
+
+/*
+ * Hold the parts of the onboard EEPROM that we care about, so we aren't
+ * coonstantly bit-boffing
+ */
+struct qib_qsfp_cache {
+ u8 id; /* must be 0x0C or 0x0D; 0 indicates invalid EEPROM read */
+ u8 pwr; /* in D6,7 */
+ u8 len; /* in meters, Cu only */
+ u8 tech;
+ char vendor[QSFP_VEND_LEN];
+ u8 xt_xcv; /* Ext. tranceiver codes, 4 lsbs are IB speed supported */
+ u8 oui[QSFP_VOUI_LEN];
+ u8 partnum[QSFP_PN_LEN];
+ u8 rev[QSFP_REV_LEN];
+ u8 atten[QSFP_ATTEN_LEN];
+ u8 cks1; /* Checksum of bytes 128..190 */
+ u8 serial[QSFP_SN_LEN];
+ u8 date[QSFP_DATE_LEN];
+ u8 lot[QSFP_LOT_LEN];
+ u8 cks2; /* Checsum of bytes 192..222 */
+};
+
+#define QSFP_PWR(pbyte) (((pbyte) >> 6) & 3)
+#define QSFP_ATTEN_SDR(attenarray) (attenarray[0])
+#define QSFP_ATTEN_DDR(attenarray) (attenarray[1])
+
+struct qib_qsfp_data {
+ /* Helps to find our way */
+ struct qib_pportdata *ppd;
+ struct work_struct work;
+ struct qib_qsfp_cache cache;
+ u64 t_insert;
+};
+
+extern int qib_refresh_qsfp_cache(struct qib_pportdata *ppd,
+ struct qib_qsfp_cache *cp);
+extern void qib_qsfp_init(struct qib_qsfp_data *qd,
+ void (*fevent)(struct work_struct *));
+extern void qib_qsfp_deinit(struct qib_qsfp_data *qd);
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 31/52] IB/qib: Add qib_rc.c
From: Ralph Campbell @ 2010-05-07 0:01 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_rc.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_rc.c | 2288 ++++++++++++++++++++++++++++++++++++
1 files changed, 2288 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_rc.c
diff --git a/drivers/infiniband/hw/qib/qib_rc.c b/drivers/infiniband/hw/qib/qib_rc.c
new file mode 100644
index 0000000..40c0a37
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_rc.c
@@ -0,0 +1,2288 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/io.h>
+
+#include "qib.h"
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_RC_##x
+
+static void rc_timeout(unsigned long arg);
+
+static u32 restart_sge(struct qib_sge_state *ss, struct qib_swqe *wqe,
+ u32 psn, u32 pmtu)
+{
+ u32 len;
+
+ len = ((psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
+ ss->sge = wqe->sg_list[0];
+ ss->sg_list = wqe->sg_list + 1;
+ ss->num_sge = wqe->wr.num_sge;
+ ss->total_len = wqe->length;
+ qib_skip_sge(ss, len, 0);
+ return wqe->length - len;
+}
+
+static void start_timer(struct qib_qp *qp)
+{
+ qp->s_flags |= QIB_S_TIMER;
+ qp->s_timer.function = rc_timeout;
+ /* 4.096 usec. * (1 << qp->timeout) */
+ qp->s_timer.expires = jiffies +
+ usecs_to_jiffies((4096UL * (1UL << qp->timeout)) / 1000UL);
+ add_timer(&qp->s_timer);
+}
+
+/**
+ * qib_make_rc_ack - construct a response packet (ACK, NAK, or RDMA read)
+ * @dev: the device for this QP
+ * @qp: a pointer to the QP
+ * @ohdr: a pointer to the IB header being constructed
+ * @pmtu: the path MTU
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ * Note that we are in the responder's side of the QP context.
+ * Note the QP s_lock must be held.
+ */
+static int qib_make_rc_ack(struct qib_ibdev *dev, struct qib_qp *qp,
+ struct qib_other_headers *ohdr, u32 pmtu)
+{
+ struct qib_ack_entry *e;
+ u32 hwords;
+ u32 len;
+ u32 bth0;
+ u32 bth2;
+
+ /* Don't send an ACK if we aren't supposed to. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto bail;
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+
+ switch (qp->s_ack_state) {
+ case OP(RDMA_READ_RESPONSE_LAST):
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->rdma_sge.mr) {
+ atomic_dec(&e->rdma_sge.mr->refcount);
+ e->rdma_sge.mr = NULL;
+ }
+ /* FALLTHROUGH */
+ case OP(ATOMIC_ACKNOWLEDGE):
+ /*
+ * We can increment the tail pointer now that the last
+ * response has been sent instead of only being
+ * constructed.
+ */
+ if (++qp->s_tail_ack_queue > QIB_MAX_RDMA_ATOMIC)
+ qp->s_tail_ack_queue = 0;
+ /* FALLTHROUGH */
+ case OP(SEND_ONLY):
+ case OP(ACKNOWLEDGE):
+ /* Check for no next entry in the queue. */
+ if (qp->r_head_ack_queue == qp->s_tail_ack_queue) {
+ if (qp->s_flags & QIB_S_ACK_PENDING)
+ goto normal;
+ goto bail;
+ }
+
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST)) {
+ /*
+ * If a RDMA read response is being resent and
+ * we haven't seen the duplicate request yet,
+ * then stop sending the remaining responses the
+ * responder has seen until the requester resends it.
+ */
+ len = e->rdma_sge.sge_length;
+ if (len && !e->rdma_sge.mr) {
+ qp->s_tail_ack_queue = qp->r_head_ack_queue;
+ goto bail;
+ }
+ /* Copy SGE state in case we need to resend */
+ qp->s_rdma_mr = e->rdma_sge.mr;
+ if (qp->s_rdma_mr)
+ atomic_inc(&qp->s_rdma_mr->refcount);
+ qp->s_ack_rdma_sge.sge = e->rdma_sge;
+ qp->s_ack_rdma_sge.num_sge = 1;
+ qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ if (len > pmtu) {
+ len = pmtu;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_FIRST);
+ } else {
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_ONLY);
+ e->sent = 1;
+ }
+ ohdr->u.aeth = qib_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_rdma_psn = e->psn;
+ bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
+ } else {
+ /* COMPARE_SWAP or FETCH_ADD */
+ qp->s_cur_sge = NULL;
+ len = 0;
+ qp->s_ack_state = OP(ATOMIC_ACKNOWLEDGE);
+ ohdr->u.at.aeth = qib_compute_aeth(qp);
+ ohdr->u.at.atomic_ack_eth[0] =
+ cpu_to_be32(e->atomic_data >> 32);
+ ohdr->u.at.atomic_ack_eth[1] =
+ cpu_to_be32(e->atomic_data);
+ hwords += sizeof(ohdr->u.at) / sizeof(u32);
+ bth2 = e->psn & QIB_PSN_MASK;
+ e->sent = 1;
+ }
+ bth0 = qp->s_ack_state << 24;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ qp->s_cur_sge = &qp->s_ack_rdma_sge;
+ qp->s_rdma_mr = qp->s_ack_rdma_sge.sge.mr;
+ if (qp->s_rdma_mr)
+ atomic_inc(&qp->s_rdma_mr->refcount);
+ len = qp->s_ack_rdma_sge.sge.sge_length;
+ if (len > pmtu)
+ len = pmtu;
+ else {
+ ohdr->u.aeth = qib_compute_aeth(qp);
+ hwords++;
+ qp->s_ack_state = OP(RDMA_READ_RESPONSE_LAST);
+ e = &qp->s_ack_queue[qp->s_tail_ack_queue];
+ e->sent = 1;
+ }
+ bth0 = qp->s_ack_state << 24;
+ bth2 = qp->s_ack_rdma_psn++ & QIB_PSN_MASK;
+ break;
+
+ default:
+normal:
+ /*
+ * Send a regular ACK.
+ * Set the s_ack_state so we wait until after sending
+ * the ACK before setting s_ack_state to ACKNOWLEDGE
+ * (see above).
+ */
+ qp->s_ack_state = OP(SEND_ONLY);
+ qp->s_flags &= ~QIB_S_ACK_PENDING;
+ qp->s_cur_sge = NULL;
+ if (qp->s_nak_state)
+ ohdr->u.aeth =
+ cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+ (qp->s_nak_state <<
+ QIB_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = qib_compute_aeth(qp);
+ hwords++;
+ len = 0;
+ bth0 = OP(ACKNOWLEDGE) << 24;
+ bth2 = qp->s_ack_psn & QIB_PSN_MASK;
+ }
+ qp->s_rdma_ack_cnt++;
+ qp->s_hdrwords = hwords;
+ qp->s_cur_size = len;
+ qib_make_ruc_header(qp, ohdr, bth0, bth2);
+ return 1;
+
+bail:
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_flags &= ~(QIB_S_RESP_PENDING | QIB_S_ACK_PENDING);
+ return 0;
+}
+
+/**
+ * qib_make_rc_req - construct a request packet (SEND, RDMA r/w, ATOMIC)
+ * @qp: a pointer to the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_rc_req(struct qib_qp *qp)
+{
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+ struct qib_other_headers *ohdr;
+ struct qib_sge_state *ss;
+ struct qib_swqe *wqe;
+ u32 hwords;
+ u32 len;
+ u32 bth0;
+ u32 bth2;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ char newreq;
+ unsigned long flags;
+ int ret = 0;
+ int delta;
+
+ ohdr = &qp->s_hdr.u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr.u.l.oth;
+
+ /*
+ * The lock is needed to synchronize between the sending tasklet,
+ * the receive interrupt handler, and timeout resends.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Sending responses has higher priority over sending requests. */
+ if ((qp->s_flags & QIB_S_RESP_PENDING) &&
+ qib_make_rc_ack(dev, qp, ohdr, pmtu))
+ goto done;
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
+ if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= QIB_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ while (qp->s_last != qp->s_acked) {
+ qib_send_complete(qp, wqe, IB_WC_SUCCESS);
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ }
+ qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
+
+ if (qp->s_flags & (QIB_S_WAIT_RNR | QIB_S_WAIT_ACK))
+ goto bail;
+
+ if (qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) {
+ if (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0) {
+ qp->s_flags |= QIB_S_WAIT_PSN;
+ goto bail;
+ }
+ qp->s_sending_psn = qp->s_psn;
+ qp->s_sending_hpsn = qp->s_psn - 1;
+ }
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ bth0 = 0;
+
+ /* Send a request. */
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ switch (qp->s_state) {
+ default:
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK))
+ goto bail;
+ /*
+ * Resend an old request or start a new one.
+ *
+ * We keep track of the current SWQE so that
+ * we don't reset the "furthest progress" state
+ * if we need to back up.
+ */
+ newreq = 0;
+ if (qp->s_cur == qp->s_tail) {
+ /* Check if send work queue is empty. */
+ if (qp->s_tail == qp->s_head)
+ goto bail;
+ /*
+ * If a fence is requested, wait for previous
+ * RDMA read and atomic operations to finish.
+ */
+ if ((wqe->wr.send_flags & IB_SEND_FENCE) &&
+ qp->s_num_rd_atomic) {
+ qp->s_flags |= QIB_S_WAIT_FENCE;
+ goto bail;
+ }
+ wqe->psn = qp->s_next_psn;
+ newreq = 1;
+ }
+ /*
+ * Note that we have to be careful not to modify the
+ * original work request since we may need to resend
+ * it.
+ */
+ len = wqe->length;
+ ss = &qp->s_sge;
+ bth2 = qp->s_psn & QIB_PSN_MASK;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ /* If no credit, return. */
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+ qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+ goto bail;
+ }
+ wqe->lpsn = wqe->psn;
+ if (len > pmtu) {
+ wqe->lpsn += (len - 1) / pmtu;
+ qp->s_state = OP(SEND_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_ONLY);
+ else {
+ qp->s_state = OP(SEND_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ bth2 |= IB_BTH_REQ_ACK;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ if (newreq && !(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ /* FALLTHROUGH */
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ /* If no credit, return. */
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT) &&
+ qib_cmp24(wqe->ssn, qp->s_lsn + 1) > 0) {
+ qp->s_flags |= QIB_S_WAIT_SSN_CREDIT;
+ goto bail;
+ }
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ hwords += sizeof(struct ib_reth) / sizeof(u32);
+ wqe->lpsn = wqe->psn;
+ if (len > pmtu) {
+ wqe->lpsn += (len - 1) / pmtu;
+ qp->s_state = OP(RDMA_WRITE_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_ONLY);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after RETH */
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ }
+ bth2 |= IB_BTH_REQ_ACK;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_READ:
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
+ if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= QIB_S_WAIT_RDMAR;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ /*
+ * Adjust s_next_psn to count the
+ * expected number of responses.
+ */
+ if (len > pmtu)
+ qp->s_next_psn += (len - 1) / pmtu;
+ wqe->lpsn = qp->s_next_psn++;
+ }
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+ ss = NULL;
+ len = 0;
+ bth2 |= IB_BTH_REQ_ACK;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ /*
+ * Don't allow more operations to be started
+ * than the QP limits allow.
+ */
+ if (newreq) {
+ if (qp->s_num_rd_atomic >=
+ qp->s_max_rd_atomic) {
+ qp->s_flags |= QIB_S_WAIT_RDMAR;
+ goto bail;
+ }
+ qp->s_num_rd_atomic++;
+ if (!(qp->s_flags & QIB_S_UNLIMITED_CREDIT))
+ qp->s_lsn++;
+ wqe->lpsn = wqe->psn;
+ }
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
+ qp->s_state = OP(COMPARE_SWAP);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.swap);
+ ohdr->u.atomic_eth.compare_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ } else {
+ qp->s_state = OP(FETCH_ADD);
+ ohdr->u.atomic_eth.swap_data = cpu_to_be64(
+ wqe->wr.wr.atomic.compare_add);
+ ohdr->u.atomic_eth.compare_data = 0;
+ }
+ ohdr->u.atomic_eth.vaddr[0] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr >> 32);
+ ohdr->u.atomic_eth.vaddr[1] = cpu_to_be32(
+ wqe->wr.wr.atomic.remote_addr);
+ ohdr->u.atomic_eth.rkey = cpu_to_be32(
+ wqe->wr.wr.atomic.rkey);
+ hwords += sizeof(struct ib_atomic_eth) / sizeof(u32);
+ ss = NULL;
+ len = 0;
+ bth2 |= IB_BTH_REQ_ACK;
+ if (++qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ default:
+ goto bail;
+ }
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.total_len = wqe->length;
+ qp->s_len = wqe->length;
+ if (newreq) {
+ qp->s_tail++;
+ if (qp->s_tail >= qp->s_size)
+ qp->s_tail = 0;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ qp->s_psn = wqe->lpsn + 1;
+ else {
+ qp->s_psn++;
+ if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ }
+ break;
+
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ /*
+ * qp->s_state is normally set to the opcode of the
+ * last packet constructed for new requests and therefore
+ * is never set to RDMA read response.
+ * RDMA_READ_RESPONSE_FIRST is used by the ACK processing
+ * thread to indicate a SEND needs to be restarted from an
+ * earlier PSN without interferring with the sending thread.
+ * See qib_restart_rc().
+ */
+ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
+ /* FALLTHROUGH */
+ case OP(SEND_FIRST):
+ qp->s_state = OP(SEND_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ bth2 = qp->s_psn++ & QIB_PSN_MASK;
+ if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ ss = &qp->s_sge;
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_LAST);
+ else {
+ qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ bth2 |= IB_BTH_REQ_ACK;
+ qp->s_cur++;
+ if (qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_LAST):
+ /*
+ * qp->s_state is normally set to the opcode of the
+ * last packet constructed for new requests and therefore
+ * is never set to RDMA read response.
+ * RDMA_READ_RESPONSE_LAST is used by the ACK processing
+ * thread to indicate a RDMA write needs to be restarted from
+ * an earlier PSN without interferring with the sending thread.
+ * See qib_restart_rc().
+ */
+ qp->s_len = restart_sge(&qp->s_sge, wqe, qp->s_psn, pmtu);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_FIRST):
+ qp->s_state = OP(RDMA_WRITE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ bth2 = qp->s_psn++ & QIB_PSN_MASK;
+ if (qib_cmp24(qp->s_psn, qp->s_next_psn) > 0)
+ qp->s_next_psn = qp->s_psn;
+ ss = &qp->s_sge;
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_LAST);
+ else {
+ qp->s_state = OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ }
+ bth2 |= IB_BTH_REQ_ACK;
+ qp->s_cur++;
+ if (qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ /*
+ * qp->s_state is normally set to the opcode of the
+ * last packet constructed for new requests and therefore
+ * is never set to RDMA read response.
+ * RDMA_READ_RESPONSE_MIDDLE is used by the ACK processing
+ * thread to indicate a RDMA read needs to be restarted from
+ * an earlier PSN without interferring with the sending thread.
+ * See qib_restart_rc().
+ */
+ len = ((qp->s_psn - wqe->psn) & QIB_PSN_MASK) * pmtu;
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr + len);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(wqe->length - len);
+ qp->s_state = OP(RDMA_READ_REQUEST);
+ hwords += sizeof(ohdr->u.rc.reth) / sizeof(u32);
+ bth2 = (qp->s_psn & QIB_PSN_MASK) | IB_BTH_REQ_ACK;
+ qp->s_psn = wqe->lpsn + 1;
+ ss = NULL;
+ len = 0;
+ qp->s_cur++;
+ if (qp->s_cur == qp->s_size)
+ qp->s_cur = 0;
+ break;
+ }
+ qp->s_sending_hpsn = bth2;
+ delta = (((int) bth2 - (int) wqe->psn) << 8) >> 8;
+ if (delta && delta % QIB_PSN_CREDIT == 0)
+ bth2 |= IB_BTH_REQ_ACK;
+ if (qp->s_flags & QIB_S_SEND_ONE) {
+ qp->s_flags &= ~QIB_S_SEND_ONE;
+ qp->s_flags |= QIB_S_WAIT_ACK;
+ bth2 |= IB_BTH_REQ_ACK;
+ }
+ qp->s_len -= len;
+ qp->s_hdrwords = hwords;
+ qp->s_cur_sge = ss;
+ qp->s_cur_size = len;
+ qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24), bth2);
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * qib_send_rc_ack - Construct an ACK packet and send it
+ * @qp: a pointer to the QP
+ *
+ * This is called from qib_rc_rcv() and qib_kreceive().
+ * Note that RDMA reads and atomics are handled in the
+ * send side QP state and tasklet.
+ */
+void qib_send_rc_ack(struct qib_qp *qp)
+{
+ struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ u64 pbc;
+ u16 lrh0;
+ u32 bth0;
+ u32 hwords;
+ u32 pbufn;
+ u32 __iomem *piobuf;
+ struct qib_ib_header hdr;
+ struct qib_other_headers *ohdr;
+ u32 control;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto unlock;
+
+ /* Don't send ACK or NAK if a RDMA read or atomic is pending. */
+ if ((qp->s_flags & QIB_S_RESP_PENDING) || qp->s_rdma_ack_cnt)
+ goto queue_ack;
+
+ /* Construct the header with s_lock held so APM doesn't change it. */
+ ohdr = &hdr.u.oth;
+ lrh0 = QIB_LRH_BTH;
+ /* header size in 32-bit words LRH+BTH+AETH = (8+12+4)/4. */
+ hwords = 6;
+ if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ hwords += qib_make_grh(ibp, &hdr.u.l.grh,
+ &qp->remote_ah_attr.grh, hwords, 0);
+ ohdr = &hdr.u.l.oth;
+ lrh0 = QIB_LRH_GRH;
+ }
+ /* read pkey_index w/o lock (its atomic) */
+ bth0 = qib_get_pkey(ibp, qp->s_pkey_index) | (OP(ACKNOWLEDGE) << 24);
+ if (qp->s_mig_state == IB_MIG_MIGRATED)
+ bth0 |= IB_BTH_MIG_REQ;
+ if (qp->r_nak_state)
+ ohdr->u.aeth = cpu_to_be32((qp->r_msn & QIB_MSN_MASK) |
+ (qp->r_nak_state <<
+ QIB_AETH_CREDIT_SHIFT));
+ else
+ ohdr->u.aeth = qib_compute_aeth(qp);
+ lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
+ qp->remote_ah_attr.sl << 4;
+ hdr.lrh[0] = cpu_to_be16(lrh0);
+ hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ hdr.lrh[2] = cpu_to_be16(hwords + SIZE_OF_CRC);
+ hdr.lrh[3] = cpu_to_be16(ppd->lid | qp->remote_ah_attr.src_path_bits);
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(qp->r_ack_psn & QIB_PSN_MASK);
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Don't try to send ACKs if the link isn't ACTIVE */
+ if (!(ppd->lflags & QIBL_LINKACTIVE))
+ goto done;
+
+ control = dd->f_setpbc_control(ppd, hwords + SIZE_OF_CRC,
+ qp->s_srate, lrh0 >> 12);
+ /* length is + 1 for the control dword */
+ pbc = ((u64) control << 32) | (hwords + 1);
+
+ piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
+ if (!piobuf) {
+ /*
+ * We are out of PIO buffers at the moment.
+ * Pass responsibility for sending the ACK to the
+ * send tasklet so that when a PIO buffer becomes
+ * available, the ACK is sent ahead of other outgoing
+ * packets.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+ goto queue_ack;
+ }
+
+ /*
+ * Write the pbc.
+ * We have to flush after the PBC for correctness
+ * on some cpus or WC buffer can be written out of order.
+ */
+ writeq(pbc, piobuf);
+
+ if (dd->flags & QIB_PIO_FLUSH_WC) {
+ u32 *hdrp = (u32 *) &hdr;
+
+ qib_flush_wc();
+ qib_pio_copy(piobuf + 2, hdrp, hwords - 1);
+ qib_flush_wc();
+ __raw_writel(hdrp[hwords - 1], piobuf + hwords + 1);
+ } else
+ qib_pio_copy(piobuf + 2, (u32 *) &hdr, hwords);
+
+ if (dd->flags & QIB_USE_SPCL_TRIG) {
+ u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
+
+ qib_flush_wc();
+ __raw_writel(0xaebecede, piobuf + spcl_off);
+ }
+
+ qib_flush_wc();
+ qib_sendbuf_done(dd, pbufn);
+
+ ibp->n_unicast_xmit++;
+ goto done;
+
+queue_ack:
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ ibp->n_rc_qacks++;
+ qp->s_flags |= QIB_S_ACK_PENDING | QIB_S_RESP_PENDING;
+ qp->s_nak_state = qp->r_nak_state;
+ qp->s_ack_psn = qp->r_ack_psn;
+
+ /* Schedule the send tasklet. */
+ qib_schedule_send(qp);
+ }
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return;
+}
+
+/**
+ * reset_psn - reset the QP state to send starting from PSN
+ * @qp: the QP
+ * @psn: the packet sequence number to restart at
+ *
+ * This is called from qib_rc_rcv() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held.
+ */
+static void reset_psn(struct qib_qp *qp, u32 psn)
+{
+ u32 n = qp->s_acked;
+ struct qib_swqe *wqe = get_swqe_ptr(qp, n);
+ u32 opcode;
+
+ qp->s_cur = n;
+
+ /*
+ * If we are starting the request from the beginning,
+ * let the normal send code handle initialization.
+ */
+ if (qib_cmp24(psn, wqe->psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ goto done;
+ }
+
+ /* Find the work request opcode corresponding to the given PSN. */
+ opcode = wqe->wr.opcode;
+ for (;;) {
+ int diff;
+
+ if (++n == qp->s_size)
+ n = 0;
+ if (n == qp->s_tail)
+ break;
+ wqe = get_swqe_ptr(qp, n);
+ diff = qib_cmp24(psn, wqe->psn);
+ if (diff < 0)
+ break;
+ qp->s_cur = n;
+ /*
+ * If we are starting the request from the beginning,
+ * let the normal send code handle initialization.
+ */
+ if (diff == 0) {
+ qp->s_state = OP(SEND_LAST);
+ goto done;
+ }
+ opcode = wqe->wr.opcode;
+ }
+
+ /*
+ * Set the state to restart in the middle of a request.
+ * Don't change the s_sge, s_cur_sge, or s_cur_size.
+ * See qib_make_rc_req().
+ */
+ switch (opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ qp->s_state = OP(RDMA_READ_RESPONSE_FIRST);
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ qp->s_state = OP(RDMA_READ_RESPONSE_LAST);
+ break;
+
+ case IB_WR_RDMA_READ:
+ qp->s_state = OP(RDMA_READ_RESPONSE_MIDDLE);
+ break;
+
+ default:
+ /*
+ * This case shouldn't happen since its only
+ * one PSN per req.
+ */
+ qp->s_state = OP(SEND_LAST);
+ }
+done:
+ qp->s_psn = psn;
+ /*
+ * Set QIB_S_WAIT_PSN as qib_rc_complete() may start the timer
+ * asynchronously before the send tasklet can get scheduled.
+ * Doing it in qib_make_rc_req() is too late.
+ */
+ if ((qib_cmp24(qp->s_psn, qp->s_sending_hpsn) <= 0) &&
+ (qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0))
+ qp->s_flags |= QIB_S_WAIT_PSN;
+}
+
+/*
+ * Back up requester to resend the last un-ACKed request.
+ * The QP s_lock should be held and interrupts disabled.
+ */
+static void qib_restart_rc(struct qib_qp *qp, u32 psn, int wait)
+{
+ struct qib_swqe *wqe = get_swqe_ptr(qp, qp->s_acked);
+ struct qib_ibport *ibp;
+
+ if (qp->s_retry == 0) {
+ if (qp->s_mig_state == IB_MIG_ARMED) {
+ qib_migrate_qp(qp);
+ qp->s_retry = qp->s_retry_cnt;
+ } else if (qp->s_last == qp->s_acked) {
+ qib_send_complete(qp, wqe, IB_WC_RETRY_EXC_ERR);
+ qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ return;
+ } else /* XXX need to handle delayed completion */
+ return;
+ } else
+ qp->s_retry--;
+
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ ibp->n_rc_resends++;
+ else
+ ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+
+ qp->s_flags &= ~(QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR |
+ QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_PSN |
+ QIB_S_WAIT_ACK);
+ if (wait)
+ qp->s_flags |= QIB_S_SEND_ONE;
+ reset_psn(qp, psn);
+}
+
+/*
+ * This is called from s_timer for missing responses.
+ */
+static void rc_timeout(unsigned long arg)
+{
+ struct qib_qp *qp = (struct qib_qp *)arg;
+ struct qib_ibport *ibp;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_TIMER) {
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ibp->n_rc_timeouts++;
+ qp->s_flags &= ~QIB_S_TIMER;
+ del_timer(&qp->s_timer);
+ qib_restart_rc(qp, qp->s_last_psn + 1, 1);
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+/*
+ * This is called from s_timer for RNR timeouts.
+ */
+void qib_rc_rnr_retry(unsigned long arg)
+{
+ struct qib_qp *qp = (struct qib_qp *)arg;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_WAIT_RNR) {
+ qp->s_flags &= ~QIB_S_WAIT_RNR;
+ del_timer(&qp->s_timer);
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
+
+/*
+ * Set qp->s_sending_psn to the next PSN after the given one.
+ * This would be psn+1 except when RDMA reads are present.
+ */
+static void reset_sending_psn(struct qib_qp *qp, u32 psn)
+{
+ struct qib_swqe *wqe;
+ u32 n = qp->s_last;
+
+ /* Find the work request corresponding to the given PSN. */
+ for (;;) {
+ wqe = get_swqe_ptr(qp, n);
+ if (qib_cmp24(psn, wqe->lpsn) <= 0) {
+ if (wqe->wr.opcode == IB_WR_RDMA_READ)
+ qp->s_sending_psn = wqe->lpsn + 1;
+ else
+ qp->s_sending_psn = psn + 1;
+ break;
+ }
+ if (++n == qp->s_size)
+ n = 0;
+ if (n == qp->s_tail)
+ break;
+ }
+}
+
+/*
+ * This should be called with the QP s_lock held and interrupts disabled.
+ */
+void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr)
+{
+ struct qib_other_headers *ohdr;
+ struct qib_swqe *wqe;
+ struct ib_wc wc;
+ unsigned i;
+ u32 opcode;
+ u32 psn;
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ return;
+
+ /* Find out where the BTH is */
+ if ((be16_to_cpu(hdr->lrh[0]) & 3) == QIB_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else
+ ohdr = &hdr->u.l.oth;
+
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+ opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
+ WARN_ON(!qp->s_rdma_ack_cnt);
+ qp->s_rdma_ack_cnt--;
+ return;
+ }
+
+ psn = be32_to_cpu(ohdr->bth[2]);
+ reset_sending_psn(qp, psn);
+
+ /*
+ * Start timer after a packet requesting an ACK has been sent and
+ * there are still requests that haven't been acked.
+ */
+ if ((psn & IB_BTH_REQ_ACK) && qp->s_acked != qp->s_tail &&
+ !(qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR | QIB_S_WAIT_PSN)))
+ start_timer(qp);
+
+ while (qp->s_last != qp->s_acked) {
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) >= 0 &&
+ qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) <= 0)
+ break;
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct qib_sge *sge = &wqe->sg_list[i];
+
+ atomic_dec(&sge->mr->refcount);
+ }
+ /* Post a send completion queue entry if requested. */
+ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ }
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ }
+ /*
+ * If we were waiting for sends to complete before resending,
+ * and they are now complete, restart sending.
+ */
+ if (qp->s_flags & QIB_S_WAIT_PSN &&
+ qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+ qp->s_flags &= ~QIB_S_WAIT_PSN;
+ qp->s_sending_psn = qp->s_psn;
+ qp->s_sending_hpsn = qp->s_psn - 1;
+ qib_schedule_send(qp);
+ }
+}
+
+static inline void update_last_psn(struct qib_qp *qp, u32 psn)
+{
+ qp->s_last_psn = psn;
+}
+
+/*
+ * Generate a SWQE completion.
+ * This is similar to qib_send_complete but has to check to be sure
+ * that the SGEs are not being referenced if the SWQE is being resent.
+ */
+static struct qib_swqe *do_rc_completion(struct qib_qp *qp,
+ struct qib_swqe *wqe,
+ struct qib_ibport *ibp)
+{
+ struct ib_wc wc;
+ unsigned i;
+
+ /*
+ * Don't decrement refcount and don't generate a
+ * completion if the SWQE is being resent until the send
+ * is finished.
+ */
+ if (qib_cmp24(wqe->lpsn, qp->s_sending_psn) < 0 ||
+ qib_cmp24(qp->s_sending_psn, qp->s_sending_hpsn) > 0) {
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct qib_sge *sge = &wqe->sg_list[i];
+
+ atomic_dec(&sge->mr->refcount);
+ }
+ /* Post a send completion queue entry if requested. */
+ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED)) {
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc, 0);
+ }
+ if (++qp->s_last >= qp->s_size)
+ qp->s_last = 0;
+ } else
+ ibp->n_rc_delayed_comp++;
+
+ qp->s_retry = qp->s_retry_cnt;
+ update_last_psn(qp, wqe->lpsn);
+
+ /*
+ * If we are completing a request which is in the process of
+ * being resent, we can stop resending it since we know the
+ * responder has already seen it.
+ */
+ if (qp->s_acked == qp->s_cur) {
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ qp->s_acked = qp->s_cur;
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ if (qp->s_acked != qp->s_tail) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = wqe->psn;
+ }
+ } else {
+ if (++qp->s_acked >= qp->s_size)
+ qp->s_acked = 0;
+ if (qp->state == IB_QPS_SQD && qp->s_acked == qp->s_cur)
+ qp->s_draining = 0;
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ }
+ return wqe;
+}
+
+/**
+ * do_rc_ack - process an incoming RC ACK
+ * @qp: the QP the ACK came in on
+ * @psn: the packet sequence number of the ACK
+ * @opcode: the opcode of the request that resulted in the ACK
+ *
+ * This is called from qib_rc_rcv_resp() to process an incoming RC ACK
+ * for the given QP.
+ * Called at interrupt level with the QP s_lock held.
+ * Returns 1 if OK, 0 if current operation should be aborted (NAK).
+ */
+static int do_rc_ack(struct qib_qp *qp, u32 aeth, u32 psn, int opcode,
+ u64 val, struct qib_ctxtdata *rcd)
+{
+ struct qib_ibport *ibp;
+ enum ib_wc_status status;
+ struct qib_swqe *wqe;
+ int ret = 0;
+ u32 ack_psn;
+ int diff;
+
+ /* Remove QP from retry timer */
+ if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
+ qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ }
+
+ /*
+ * Note that NAKs implicitly ACK outstanding SEND and RDMA write
+ * requests and implicitly NAK RDMA read and atomic requests issued
+ * before the NAK'ed request. The MSN won't include the NAK'ed
+ * request but will include an ACK'ed request(s).
+ */
+ ack_psn = psn;
+ if (aeth >> 29)
+ ack_psn--;
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+
+ /*
+ * The MSN might be for a later WQE than the PSN indicates so
+ * only complete WQEs that the PSN finishes.
+ */
+ while ((diff = qib_cmp24(ack_psn, wqe->lpsn)) >= 0) {
+ /*
+ * RDMA_READ_RESPONSE_ONLY is a special case since
+ * we want to generate completion events for everything
+ * before the RDMA read, copy the data, then generate
+ * the completion for the read.
+ */
+ if (wqe->wr.opcode == IB_WR_RDMA_READ &&
+ opcode == OP(RDMA_READ_RESPONSE_ONLY) &&
+ diff == 0) {
+ ret = 1;
+ goto bail;
+ }
+ /*
+ * If this request is a RDMA read or atomic, and the ACK is
+ * for a later operation, this ACK NAKs the RDMA read or
+ * atomic. In other words, only a RDMA_READ_LAST or ONLY
+ * can ACK a RDMA read and likewise for atomic ops. Note
+ * that the NAK case can only happen if relaxed ordering is
+ * used and requests are sent after an RDMA read or atomic
+ * is sent but before the response is received.
+ */
+ if ((wqe->wr.opcode == IB_WR_RDMA_READ &&
+ (opcode != OP(RDMA_READ_RESPONSE_LAST) || diff != 0)) ||
+ ((wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) &&
+ (opcode != OP(ATOMIC_ACKNOWLEDGE) || diff != 0))) {
+ /* Retry this request. */
+ if (!(qp->r_flags & QIB_R_RDMAR_SEQ)) {
+ qp->r_flags |= QIB_R_RDMAR_SEQ;
+ qib_restart_rc(qp, qp->s_last_psn + 1, 0);
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_SEND;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait,
+ &rcd->qp_wait_list);
+ }
+ }
+ /*
+ * No need to process the ACK/NAK since we are
+ * restarting an earlier request.
+ */
+ goto bail;
+ }
+ if (wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) {
+ u64 *vaddr = wqe->sg_list[0].vaddr;
+ *vaddr = val;
+ }
+ if (qp->s_num_rd_atomic &&
+ (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)) {
+ qp->s_num_rd_atomic--;
+ /* Restart sending task if fence is complete */
+ if ((qp->s_flags & QIB_S_WAIT_FENCE) &&
+ !qp->s_num_rd_atomic) {
+ qp->s_flags &= ~(QIB_S_WAIT_FENCE |
+ QIB_S_WAIT_ACK);
+ qib_schedule_send(qp);
+ } else if (qp->s_flags & QIB_S_WAIT_RDMAR) {
+ qp->s_flags &= ~(QIB_S_WAIT_RDMAR |
+ QIB_S_WAIT_ACK);
+ qib_schedule_send(qp);
+ }
+ }
+ wqe = do_rc_completion(qp, wqe, ibp);
+ if (qp->s_acked == qp->s_tail)
+ break;
+ }
+
+ switch (aeth >> 29) {
+ case 0: /* ACK */
+ ibp->n_rc_acks++;
+ if (qp->s_acked != qp->s_tail) {
+ /*
+ * We are expecting more ACKs so
+ * reset the retransmit timer.
+ */
+ start_timer(qp);
+ /*
+ * We can stop resending the earlier packets and
+ * continue with the next packet the receiver wants.
+ */
+ if (qib_cmp24(qp->s_psn, psn) <= 0)
+ reset_psn(qp, psn + 1);
+ } else if (qib_cmp24(qp->s_psn, psn) <= 0) {
+ qp->s_state = OP(SEND_LAST);
+ qp->s_psn = psn + 1;
+ }
+ if (qp->s_flags & QIB_S_WAIT_ACK) {
+ qp->s_flags &= ~QIB_S_WAIT_ACK;
+ qib_schedule_send(qp);
+ }
+ qib_get_credit(qp, aeth);
+ qp->s_rnr_retry = qp->s_rnr_retry_cnt;
+ qp->s_retry = qp->s_retry_cnt;
+ update_last_psn(qp, psn);
+ ret = 1;
+ goto bail;
+
+ case 1: /* RNR NAK */
+ ibp->n_rnr_naks++;
+ if (qp->s_acked == qp->s_tail)
+ goto bail;
+ if (qp->s_flags & QIB_S_WAIT_RNR)
+ goto bail;
+ if (qp->s_rnr_retry == 0) {
+ status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto class_b;
+ }
+ if (qp->s_rnr_retry_cnt < 7)
+ qp->s_rnr_retry--;
+
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
+
+ ibp->n_rc_resends += (qp->s_psn - psn) & QIB_PSN_MASK;
+
+ reset_psn(qp, psn);
+
+ qp->s_flags &= ~(QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_ACK);
+ qp->s_flags |= QIB_S_WAIT_RNR;
+ qp->s_timer.function = qib_rc_rnr_retry;
+ qp->s_timer.expires = jiffies + usecs_to_jiffies(
+ ib_qib_rnr_table[(aeth >> QIB_AETH_CREDIT_SHIFT) &
+ QIB_AETH_CREDIT_MASK]);
+ add_timer(&qp->s_timer);
+ goto bail;
+
+ case 3: /* NAK */
+ if (qp->s_acked == qp->s_tail)
+ goto bail;
+ /* The last valid PSN is the previous PSN. */
+ update_last_psn(qp, psn - 1);
+ switch ((aeth >> QIB_AETH_CREDIT_SHIFT) &
+ QIB_AETH_CREDIT_MASK) {
+ case 0: /* PSN sequence error */
+ ibp->n_seq_naks++;
+ /*
+ * Back up to the responder's expected PSN.
+ * Note that we might get a NAK in the middle of an
+ * RDMA READ response which terminates the RDMA
+ * READ.
+ */
+ qib_restart_rc(qp, psn, 0);
+ qib_schedule_send(qp);
+ break;
+
+ case 1: /* Invalid Request */
+ status = IB_WC_REM_INV_REQ_ERR;
+ ibp->n_other_naks++;
+ goto class_b;
+
+ case 2: /* Remote Access Error */
+ status = IB_WC_REM_ACCESS_ERR;
+ ibp->n_other_naks++;
+ goto class_b;
+
+ case 3: /* Remote Operation Error */
+ status = IB_WC_REM_OP_ERR;
+ ibp->n_other_naks++;
+class_b:
+ if (qp->s_last == qp->s_acked) {
+ qib_send_complete(qp, wqe, status);
+ qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ }
+ break;
+
+ default:
+ /* Ignore other reserved NAK error codes */
+ goto reserved;
+ }
+ qp->s_retry = qp->s_retry_cnt;
+ qp->s_rnr_retry = qp->s_rnr_retry_cnt;
+ goto bail;
+
+ default: /* 2: reserved */
+reserved:
+ /* Ignore reserved NAK codes. */
+ goto bail;
+ }
+
+bail:
+ return ret;
+}
+
+/*
+ * We have seen an out of sequence RDMA read middle or last packet.
+ * This ACKs SENDs and RDMA writes up to the first RDMA read or atomic SWQE.
+ */
+static void rdma_seq_err(struct qib_qp *qp, struct qib_ibport *ibp, u32 psn,
+ struct qib_ctxtdata *rcd)
+{
+ struct qib_swqe *wqe;
+
+ /* Remove QP from retry timer */
+ if (qp->s_flags & (QIB_S_TIMER | QIB_S_WAIT_RNR)) {
+ qp->s_flags &= ~(QIB_S_TIMER | QIB_S_WAIT_RNR);
+ del_timer(&qp->s_timer);
+ }
+
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+
+ while (qib_cmp24(psn, wqe->lpsn) > 0) {
+ if (wqe->wr.opcode == IB_WR_RDMA_READ ||
+ wqe->wr.opcode == IB_WR_ATOMIC_CMP_AND_SWP ||
+ wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD)
+ break;
+ wqe = do_rc_completion(qp, wqe, ibp);
+ }
+
+ ibp->n_rdma_seq++;
+ qp->r_flags |= QIB_R_RDMAR_SEQ;
+ qib_restart_rc(qp, qp->s_last_psn + 1, 0);
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_SEND;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+}
+
+/**
+ * qib_rc_rcv_resp - process an incoming RC response packet
+ * @ibp: the port this packet came in on
+ * @ohdr: the other headers for this packet
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP for this packet
+ * @opcode: the opcode for this packet
+ * @psn: the packet sequence number for this packet
+ * @hdrsize: the header length
+ * @pmtu: the path MTU
+ *
+ * This is called from qib_rc_rcv() to process an incoming RC response
+ * packet for the given QP.
+ * Called at interrupt level.
+ */
+static void qib_rc_rcv_resp(struct qib_ibport *ibp,
+ struct qib_other_headers *ohdr,
+ void *data, u32 tlen,
+ struct qib_qp *qp,
+ u32 opcode,
+ u32 psn, u32 hdrsize, u32 pmtu,
+ struct qib_ctxtdata *rcd)
+{
+ struct qib_swqe *wqe;
+ enum ib_wc_status status;
+ unsigned long flags;
+ int diff;
+ u32 pad;
+ u32 aeth;
+ u64 val;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto ack_done;
+
+ /* Ignore invalid responses. */
+ if (qib_cmp24(psn, qp->s_next_psn) >= 0)
+ goto ack_done;
+
+ /* Ignore duplicate responses. */
+ diff = qib_cmp24(psn, qp->s_last_psn);
+ if (unlikely(diff <= 0)) {
+ /* Update credits for "ghost" ACKs */
+ if (diff == 0 && opcode == OP(ACKNOWLEDGE)) {
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ if ((aeth >> 29) == 0)
+ qib_get_credit(qp, aeth);
+ }
+ goto ack_done;
+ }
+
+ /*
+ * Skip everything other than the PSN we expect, if we are waiting
+ * for a reply to a restarted RDMA read or atomic op.
+ */
+ if (qp->r_flags & QIB_R_RDMAR_SEQ) {
+ if (qib_cmp24(psn, qp->s_last_psn + 1) != 0)
+ goto ack_done;
+ qp->r_flags &= ~QIB_R_RDMAR_SEQ;
+ }
+
+ if (unlikely(qp->s_acked == qp->s_tail))
+ goto ack_done;
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ status = IB_WC_SUCCESS;
+
+ switch (opcode) {
+ case OP(ACKNOWLEDGE):
+ case OP(ATOMIC_ACKNOWLEDGE):
+ case OP(RDMA_READ_RESPONSE_FIRST):
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ if (opcode == OP(ATOMIC_ACKNOWLEDGE)) {
+ __be32 *p = ohdr->u.at.atomic_ack_eth;
+
+ val = ((u64) be32_to_cpu(p[0]) << 32) |
+ be32_to_cpu(p[1]);
+ } else
+ val = 0;
+ if (!do_rc_ack(qp, aeth, psn, opcode, val, rcd) ||
+ opcode != OP(RDMA_READ_RESPONSE_FIRST))
+ goto ack_done;
+ hdrsize += 4;
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /*
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
+ */
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_middle;
+
+ case OP(RDMA_READ_RESPONSE_MIDDLE):
+ /* no AETH, no ACK */
+ if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
+ goto ack_seq_err;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+read_middle:
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto ack_len_err;
+ if (unlikely(pmtu >= qp->s_rdma_read_len))
+ goto ack_len_err;
+
+ /*
+ * We got a response so update the timeout.
+ * 4.096 usec. * (1 << qp->timeout)
+ */
+ qp->s_flags |= QIB_S_TIMER;
+ mod_timer(&qp->s_timer, jiffies +
+ usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
+ 1000UL));
+ if (qp->s_flags & QIB_S_WAIT_ACK) {
+ qp->s_flags &= ~QIB_S_WAIT_ACK;
+ qib_schedule_send(qp);
+ }
+
+ if (opcode == OP(RDMA_READ_RESPONSE_MIDDLE))
+ qp->s_retry = qp->s_retry_cnt;
+
+ /*
+ * Update the RDMA receive state but do the copy w/o
+ * holding the locks and blocking interrupts.
+ */
+ qp->s_rdma_read_len -= pmtu;
+ update_last_psn(qp, psn);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qib_copy_sge(&qp->s_rdma_read_sge, data, pmtu, 0);
+ goto bail;
+
+ case OP(RDMA_READ_RESPONSE_ONLY):
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ if (!do_rc_ack(qp, aeth, psn, opcode, 0, rcd))
+ goto ack_done;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 0 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen < (hdrsize + pad + 8)))
+ goto ack_len_err;
+ /*
+ * If this is a response to a resent RDMA read, we
+ * have to be careful to copy the data to the right
+ * location.
+ */
+ wqe = get_swqe_ptr(qp, qp->s_acked);
+ qp->s_rdma_read_len = restart_sge(&qp->s_rdma_read_sge,
+ wqe, psn, pmtu);
+ goto read_last;
+
+ case OP(RDMA_READ_RESPONSE_LAST):
+ /* ACKs READ req. */
+ if (unlikely(qib_cmp24(psn, qp->s_last_psn + 1)))
+ goto ack_seq_err;
+ if (unlikely(wqe->wr.opcode != IB_WR_RDMA_READ))
+ goto ack_op_err;
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /*
+ * Check that the data size is >= 1 && <= pmtu.
+ * Remember to account for the AETH header (4) and
+ * ICRC (4).
+ */
+ if (unlikely(tlen <= (hdrsize + pad + 8)))
+ goto ack_len_err;
+read_last:
+ tlen -= hdrsize + pad + 8;
+ if (unlikely(tlen != qp->s_rdma_read_len))
+ goto ack_len_err;
+ aeth = be32_to_cpu(ohdr->u.aeth);
+ qib_copy_sge(&qp->s_rdma_read_sge, data, tlen, 0);
+ WARN_ON(qp->s_rdma_read_sge.num_sge);
+ (void) do_rc_ack(qp, aeth, psn,
+ OP(RDMA_READ_RESPONSE_LAST), 0, rcd);
+ goto ack_done;
+ }
+
+ack_op_err:
+ status = IB_WC_LOC_QP_OP_ERR;
+ goto ack_err;
+
+ack_seq_err:
+ rdma_seq_err(qp, ibp, psn, rcd);
+ goto ack_done;
+
+ack_len_err:
+ status = IB_WC_LOC_LEN_ERR;
+ack_err:
+ if (qp->s_last == qp->s_acked) {
+ qib_send_complete(qp, wqe, status);
+ qib_error_qp(qp, IB_WC_WR_FLUSH_ERR);
+ }
+ack_done:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+bail:
+ return;
+}
+
+/**
+ * qib_rc_rcv_error - process an incoming duplicate or error RC packet
+ * @ohdr: the other headers for this packet
+ * @data: the packet data
+ * @qp: the QP for this packet
+ * @opcode: the opcode for this packet
+ * @psn: the packet sequence number for this packet
+ * @diff: the difference between the PSN and the expected PSN
+ *
+ * This is called from qib_rc_rcv() to process an unexpected
+ * incoming RC packet for the given QP.
+ * Called at interrupt level.
+ * Return 1 if no more processing is needed; otherwise return 0 to
+ * schedule a response to be sent.
+ */
+static int qib_rc_rcv_error(struct qib_other_headers *ohdr,
+ void *data,
+ struct qib_qp *qp,
+ u32 opcode,
+ u32 psn,
+ int diff,
+ struct qib_ctxtdata *rcd)
+{
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_ack_entry *e;
+ unsigned long flags;
+ u8 i, prev;
+ int old_req;
+
+ if (diff > 0) {
+ /*
+ * Packet sequence error.
+ * A NAK will ACK earlier sends and RDMA writes.
+ * Don't queue the NAK if we already sent one.
+ */
+ if (!qp->r_nak_state) {
+ ibp->n_rc_seqnak++;
+ qp->r_nak_state = IB_NAK_PSN_ERROR;
+ /* Use the expected PSN. */
+ qp->r_ack_psn = qp->r_psn;
+ /*
+ * Wait to send the sequence NAK until all packets
+ * in the receive queue have been processed.
+ * Otherwise, we end up propagating congestion.
+ */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_NAK;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ }
+ goto done;
+ }
+
+ /*
+ * Handle a duplicate request. Don't re-execute SEND, RDMA
+ * write or atomic op. Don't NAK errors, just silently drop
+ * the duplicate request. Note that r_sge, r_len, and
+ * r_rcv_len may be in use so don't modify them.
+ *
+ * We are supposed to ACK the earliest duplicate PSN but we
+ * can coalesce an outstanding duplicate ACK. We have to
+ * send the earliest so that RDMA reads can be restarted at
+ * the requester's expected PSN.
+ *
+ * First, find where this duplicate PSN falls within the
+ * ACKs previously sent.
+ * old_req is true if there is an older response that is scheduled
+ * to be sent before sending this one.
+ */
+ e = NULL;
+ old_req = 1;
+ ibp->n_rc_dupreq++;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this now that we hold the s_lock. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto unlock_done;
+
+ for (i = qp->r_head_ack_queue; ; i = prev) {
+ if (i == qp->s_tail_ack_queue)
+ old_req = 0;
+ if (i)
+ prev = i - 1;
+ else
+ prev = QIB_MAX_RDMA_ATOMIC;
+ if (prev == qp->r_head_ack_queue) {
+ e = NULL;
+ break;
+ }
+ e = &qp->s_ack_queue[prev];
+ if (!e->opcode) {
+ e = NULL;
+ break;
+ }
+ if (qib_cmp24(psn, e->psn) >= 0) {
+ if (prev == qp->s_tail_ack_queue &&
+ qib_cmp24(psn, e->lpsn) <= 0)
+ old_req = 0;
+ break;
+ }
+ }
+ switch (opcode) {
+ case OP(RDMA_READ_REQUEST): {
+ struct ib_reth *reth;
+ u32 offset;
+ u32 len;
+
+ /*
+ * If we didn't find the RDMA read request in the ack queue,
+ * we can ignore this request.
+ */
+ if (!e || e->opcode != OP(RDMA_READ_REQUEST))
+ goto unlock_done;
+ /* RETH comes after BTH */
+ reth = &ohdr->u.rc.reth;
+ /*
+ * Address range must be a subset of the original
+ * request and start on pmtu boundaries.
+ * We reuse the old ack_queue slot since the requester
+ * should not back up and request an earlier PSN for the
+ * same request.
+ */
+ offset = ((psn - e->psn) & QIB_PSN_MASK) *
+ ib_mtu_enum_to_int(qp->path_mtu);
+ len = be32_to_cpu(reth->length);
+ if (unlikely(offset + len != e->rdma_sge.sge_length))
+ goto unlock_done;
+ if (e->rdma_sge.mr) {
+ atomic_dec(&e->rdma_sge.mr->refcount);
+ e->rdma_sge.mr = NULL;
+ }
+ if (len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr, rkey,
+ IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
+ goto unlock_done;
+ } else {
+ e->rdma_sge.vaddr = NULL;
+ e->rdma_sge.length = 0;
+ e->rdma_sge.sge_length = 0;
+ }
+ e->psn = psn;
+ if (old_req)
+ goto unlock_done;
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ case OP(COMPARE_SWAP):
+ case OP(FETCH_ADD): {
+ /*
+ * If we didn't find the atomic request in the ack queue
+ * or the send tasklet is already backed up to send an
+ * earlier entry, we can ignore this request.
+ */
+ if (!e || e->opcode != (u8) opcode || old_req)
+ goto unlock_done;
+ qp->s_tail_ack_queue = prev;
+ break;
+ }
+
+ default:
+ /*
+ * Ignore this operation if it doesn't request an ACK
+ * or an earlier RDMA read or atomic is going to be resent.
+ */
+ if (!(psn & IB_BTH_REQ_ACK) || old_req)
+ goto unlock_done;
+ /*
+ * Resend the most recent ACK if this request is
+ * after all the previous RDMA reads and atomics.
+ */
+ if (i == qp->r_head_ack_queue) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->r_psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Try to send a simple ACK to work around a Mellanox bug
+ * which doesn't accept a RDMA read response or atomic
+ * response as an ACK for earlier SENDs or RDMA writes.
+ */
+ if (!(qp->s_flags & QIB_S_RESP_PENDING)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qp->r_nak_state = 0;
+ qp->r_ack_psn = qp->s_ack_queue[i].psn - 1;
+ goto send_ack;
+ }
+ /*
+ * Resend the RDMA read or atomic op which
+ * ACKs this duplicate request.
+ */
+ qp->s_tail_ack_queue = i;
+ break;
+ }
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+ qp->s_flags |= QIB_S_RESP_PENDING;
+ qp->r_nak_state = 0;
+ qib_schedule_send(qp);
+
+unlock_done:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+done:
+ return 1;
+
+send_ack:
+ return 0;
+}
+
+void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err)
+{
+ unsigned long flags;
+ int lastwqe;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ lastwqe = qib_error_qp(qp, err);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+}
+
+static inline void qib_update_ack_queue(struct qib_qp *qp, unsigned n)
+{
+ unsigned next;
+
+ next = n + 1;
+ if (next > QIB_MAX_RDMA_ATOMIC)
+ next = 0;
+ qp->s_tail_ack_queue = next;
+ qp->s_ack_state = OP(ACKNOWLEDGE);
+}
+
+/**
+ * qib_rc_rcv - process an incoming RC packet
+ * @rcd: the context pointer
+ * @hdr: the header of this packet
+ * @has_grh: true if the header has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP for this packet
+ *
+ * This is called from qib_qp_rcv() to process an incoming RC packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+ struct qib_ibport *ibp = &rcd->ppd->ibport_data;
+ struct qib_other_headers *ohdr;
+ u32 opcode;
+ u32 hdrsize;
+ u32 psn;
+ u32 pad;
+ struct ib_wc wc;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ int diff;
+ struct ib_reth *reth;
+ unsigned long flags;
+ int ret;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12; /* LRH + BTH */
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
+ }
+
+ opcode = be32_to_cpu(ohdr->bth[0]);
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
+ goto sunlock;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ psn = be32_to_cpu(ohdr->bth[2]);
+ opcode >>= 24;
+
+ /* Prevent simultaneous processing after APM on different CPUs */
+ spin_lock(&qp->r_lock);
+
+ /*
+ * Process responses (ACKs) before anything else. Note that the
+ * packet sequence number will be for something in the send work
+ * queue rather than the expected receive packet sequence number.
+ * In other words, this QP is the requester.
+ */
+ if (opcode >= OP(RDMA_READ_RESPONSE_FIRST) &&
+ opcode <= OP(ATOMIC_ACKNOWLEDGE)) {
+ qib_rc_rcv_resp(ibp, ohdr, data, tlen, qp, opcode, psn,
+ hdrsize, pmtu, rcd);
+ goto runlock;
+ }
+
+ /* Compute 24 bits worth of difference. */
+ diff = qib_cmp24(psn, qp->r_psn);
+ if (unlikely(diff)) {
+ if (qib_rc_rcv_error(ohdr, data, qp, opcode, psn, diff, rcd))
+ goto runlock;
+ goto send_ack;
+ }
+
+ /* Check for opcode sequence errors. */
+ switch (qp->r_state) {
+ case OP(SEND_FIRST):
+ case OP(SEND_MIDDLE):
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+ break;
+ goto nack_inv;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_MIDDLE):
+ if (opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ break;
+ goto nack_inv;
+
+ default:
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ goto nack_inv;
+ /*
+ * Note that it is up to the requester to not send a new
+ * RDMA read or atomic operation before receiving an ACK
+ * for the previous operation.
+ */
+ break;
+ }
+
+ memset(&wc, 0, sizeof wc);
+
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
+ qp->r_flags |= QIB_R_COMM_EST;
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_COMM_EST;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ }
+
+ /* OK, process the packet. */
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0)
+ goto nack_op_err;
+ if (!ret)
+ goto rnr_nak;
+ qp->r_rcv_len = 0;
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ case OP(RDMA_WRITE_MIDDLE):
+send_middle:
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto nack_inv;
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len))
+ goto nack_inv;
+ qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+ break;
+
+ case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
+ /* consume RWQE */
+ ret = qib_get_rwqe(qp, 1);
+ if (ret < 0)
+ goto nack_op_err;
+ if (!ret)
+ goto rnr_nak;
+ goto send_last_imm;
+
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0)
+ goto nack_op_err;
+ if (!ret)
+ goto rnr_nak;
+ qp->r_rcv_len = 0;
+ if (opcode == OP(SEND_ONLY))
+ goto send_last;
+ /* FALLTHROUGH */
+ case OP(SEND_LAST_WITH_IMMEDIATE):
+send_last_imm:
+ wc.ex.imm_data = ohdr->u.imm_data;
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ /* FALLTHROUGH */
+ case OP(SEND_LAST):
+ case OP(RDMA_WRITE_LAST):
+send_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto nack_inv;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ wc.byte_len = tlen + qp->r_rcv_len;
+ if (unlikely(wc.byte_len > qp->r_len))
+ goto nack_inv;
+ qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ while (qp->r_sge.num_sge) {
+ atomic_dec(&qp->r_sge.sge.mr->refcount);
+ if (--qp->r_sge.num_sge)
+ qp->r_sge.sge = *qp->r_sge.sg_list++;
+ }
+ qp->r_msn++;
+ if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ break;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto nack_inv;
+ /* consume RWQE */
+ reth = &ohdr->u.rc.reth;
+ hdrsize += sizeof(*reth);
+ qp->r_len = be32_to_cpu(reth->length);
+ qp->r_rcv_len = 0;
+ qp->r_sge.sg_list = NULL;
+ if (qp->r_len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey & NAK */
+ ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len, vaddr,
+ rkey, IB_ACCESS_REMOTE_WRITE);
+ if (unlikely(!ok))
+ goto nack_acc;
+ qp->r_sge.num_sge = 1;
+ } else {
+ qp->r_sge.num_sge = 0;
+ qp->r_sge.sge.mr = NULL;
+ qp->r_sge.sge.vaddr = NULL;
+ qp->r_sge.sge.length = 0;
+ qp->r_sge.sge.sge_length = 0;
+ }
+ if (opcode == OP(RDMA_WRITE_FIRST))
+ goto send_middle;
+ else if (opcode == OP(RDMA_WRITE_ONLY))
+ goto send_last;
+ ret = qib_get_rwqe(qp, 1);
+ if (ret < 0)
+ goto nack_op_err;
+ if (!ret)
+ goto rnr_nak;
+ goto send_last_imm;
+
+ case OP(RDMA_READ_REQUEST): {
+ struct qib_ack_entry *e;
+ u32 len;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto nack_inv;
+ next = qp->r_head_ack_queue + 1;
+ /* s_ack_queue is size QIB_MAX_RDMA_ATOMIC+1 so use > not >= */
+ if (next > QIB_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto srunlock;
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlck;
+ qib_update_ack_queue(qp, next);
+ }
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+ atomic_dec(&e->rdma_sge.mr->refcount);
+ e->rdma_sge.mr = NULL;
+ }
+ reth = &ohdr->u.rc.reth;
+ len = be32_to_cpu(reth->length);
+ if (len) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey & NAK */
+ ok = qib_rkey_ok(qp, &e->rdma_sge, len, vaddr,
+ rkey, IB_ACCESS_REMOTE_READ);
+ if (unlikely(!ok))
+ goto nack_acc_unlck;
+ /*
+ * Update the next expected PSN. We add 1 later
+ * below, so only add the remainder here.
+ */
+ if (len > pmtu)
+ qp->r_psn += (len - 1) / pmtu;
+ } else {
+ e->rdma_sge.mr = NULL;
+ e->rdma_sge.vaddr = NULL;
+ e->rdma_sge.length = 0;
+ e->rdma_sge.sge_length = 0;
+ }
+ e->opcode = opcode;
+ e->sent = 0;
+ e->psn = psn;
+ e->lpsn = qp->r_psn;
+ /*
+ * We need to increment the MSN here instead of when we
+ * finish sending the result since a duplicate request would
+ * increment it more than once.
+ */
+ qp->r_msn++;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ qp->r_head_ack_queue = next;
+
+ /* Schedule the send tasklet. */
+ qp->s_flags |= QIB_S_RESP_PENDING;
+ qib_schedule_send(qp);
+
+ goto srunlock;
+ }
+
+ case OP(COMPARE_SWAP):
+ case OP(FETCH_ADD): {
+ struct ib_atomic_eth *ateth;
+ struct qib_ack_entry *e;
+ u64 vaddr;
+ atomic64_t *maddr;
+ u64 sdata;
+ u32 rkey;
+ u8 next;
+
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_inv;
+ next = qp->r_head_ack_queue + 1;
+ if (next > QIB_MAX_RDMA_ATOMIC)
+ next = 0;
+ spin_lock_irqsave(&qp->s_lock, flags);
+ /* Double check we can process this while holding the s_lock. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK))
+ goto srunlock;
+ if (unlikely(next == qp->s_tail_ack_queue)) {
+ if (!qp->s_ack_queue[next].sent)
+ goto nack_inv_unlck;
+ qib_update_ack_queue(qp, next);
+ }
+ e = &qp->s_ack_queue[qp->r_head_ack_queue];
+ if (e->opcode == OP(RDMA_READ_REQUEST) && e->rdma_sge.mr) {
+ atomic_dec(&e->rdma_sge.mr->refcount);
+ e->rdma_sge.mr = NULL;
+ }
+ ateth = &ohdr->u.atomic_eth;
+ vaddr = ((u64) be32_to_cpu(ateth->vaddr[0]) << 32) |
+ be32_to_cpu(ateth->vaddr[1]);
+ if (unlikely(vaddr & (sizeof(u64) - 1)))
+ goto nack_inv_unlck;
+ rkey = be32_to_cpu(ateth->rkey);
+ /* Check rkey & NAK */
+ if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ vaddr, rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto nack_acc_unlck;
+ /* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = be64_to_cpu(ateth->swap_data);
+ e->atomic_data = (opcode == OP(FETCH_ADD)) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ be64_to_cpu(ateth->compare_data),
+ sdata);
+ atomic_dec(&qp->r_sge.sge.mr->refcount);
+ qp->r_sge.num_sge = 0;
+ e->opcode = opcode;
+ e->sent = 0;
+ e->psn = psn;
+ e->lpsn = psn;
+ qp->r_msn++;
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_nak_state = 0;
+ qp->r_head_ack_queue = next;
+
+ /* Schedule the send tasklet. */
+ qp->s_flags |= QIB_S_RESP_PENDING;
+ qib_schedule_send(qp);
+
+ goto srunlock;
+ }
+
+ default:
+ /* NAK unknown opcodes. */
+ goto nack_inv;
+ }
+ qp->r_psn++;
+ qp->r_state = opcode;
+ qp->r_ack_psn = psn;
+ qp->r_nak_state = 0;
+ /* Send an ACK if requested or required. */
+ if (psn & (1 << 31))
+ goto send_ack;
+ goto runlock;
+
+rnr_nak:
+ qp->r_nak_state = IB_RNR_NAK | qp->r_min_rnr_timer;
+ qp->r_ack_psn = qp->r_psn;
+ /* Queue RNR NAK for later */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_NAK;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ goto runlock;
+
+nack_op_err:
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_OPERATIONAL_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+ /* Queue NAK for later */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_NAK;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ goto runlock;
+
+nack_inv_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_inv:
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ qp->r_nak_state = IB_NAK_INVALID_REQUEST;
+ qp->r_ack_psn = qp->r_psn;
+ /* Queue NAK for later */
+ if (list_empty(&qp->rspwait)) {
+ qp->r_flags |= QIB_R_RSP_NAK;
+ atomic_inc(&qp->refcount);
+ list_add_tail(&qp->rspwait, &rcd->qp_wait_list);
+ }
+ goto runlock;
+
+nack_acc_unlck:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+nack_acc:
+ qib_rc_error(qp, IB_WC_LOC_PROT_ERR);
+ qp->r_nak_state = IB_NAK_REMOTE_ACCESS_ERROR;
+ qp->r_ack_psn = qp->r_psn;
+send_ack:
+ qib_send_rc_ack(qp);
+runlock:
+ spin_unlock(&qp->r_lock);
+ return;
+
+srunlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ spin_unlock(&qp->r_lock);
+ return;
+
+sunlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 32/52] IB/qib: Add qib_ruc.c
From: Ralph Campbell @ 2010-05-07 0:01 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_ruc.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_ruc.c | 817 +++++++++++++++++++++++++++++++++++
1 files changed, 817 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_ruc.c
diff --git a/drivers/infiniband/hw/qib/qib_ruc.c b/drivers/infiniband/hw/qib/qib_ruc.c
new file mode 100644
index 0000000..eb78d93
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_ruc.c
@@ -0,0 +1,817 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+
+#include "qib.h"
+#include "qib_mad.h"
+
+/*
+ * Convert the AETH RNR timeout code into the number of microseconds.
+ */
+const u32 ib_qib_rnr_table[32] = {
+ 655360, /* 00: 655.36 */
+ 10, /* 01: .01 */
+ 20, /* 02 .02 */
+ 30, /* 03: .03 */
+ 40, /* 04: .04 */
+ 60, /* 05: .06 */
+ 80, /* 06: .08 */
+ 120, /* 07: .12 */
+ 160, /* 08: .16 */
+ 240, /* 09: .24 */
+ 320, /* 0A: .32 */
+ 480, /* 0B: .48 */
+ 640, /* 0C: .64 */
+ 960, /* 0D: .96 */
+ 1280, /* 0E: 1.28 */
+ 1920, /* 0F: 1.92 */
+ 2560, /* 10: 2.56 */
+ 3840, /* 11: 3.84 */
+ 5120, /* 12: 5.12 */
+ 7680, /* 13: 7.68 */
+ 10240, /* 14: 10.24 */
+ 15360, /* 15: 15.36 */
+ 20480, /* 16: 20.48 */
+ 30720, /* 17: 30.72 */
+ 40960, /* 18: 40.96 */
+ 61440, /* 19: 61.44 */
+ 81920, /* 1A: 81.92 */
+ 122880, /* 1B: 122.88 */
+ 163840, /* 1C: 163.84 */
+ 245760, /* 1D: 245.76 */
+ 327680, /* 1E: 327.68 */
+ 491520 /* 1F: 491.52 */
+};
+
+/*
+ * Validate a RWQE and fill in the SGE state.
+ * Return 1 if OK.
+ */
+static int qib_init_sge(struct qib_qp *qp, struct qib_rwqe *wqe)
+{
+ int i, j, ret;
+ struct ib_wc wc;
+ struct qib_lkey_table *rkt;
+ struct qib_pd *pd;
+ struct qib_sge_state *ss;
+
+ rkt = &to_idev(qp->ibqp.device)->lk_table;
+ pd = to_ipd(qp->ibqp.srq ? qp->ibqp.srq->pd : qp->ibqp.pd);
+ ss = &qp->r_sge;
+ ss->sg_list = qp->r_sg_list;
+ qp->r_len = 0;
+ for (i = j = 0; i < wqe->num_sge; i++) {
+ if (wqe->sg_list[i].length == 0)
+ continue;
+ /* Check LKEY */
+ if (!qib_lkey_ok(rkt, pd, j ? &ss->sg_list[j - 1] : &ss->sge,
+ &wqe->sg_list[i], IB_ACCESS_LOCAL_WRITE))
+ goto bad_lkey;
+ qp->r_len += wqe->sg_list[i].length;
+ j++;
+ }
+ ss->num_sge = j;
+ ss->total_len = qp->r_len;
+ ret = 1;
+ goto bail;
+
+bad_lkey:
+ while (j) {
+ struct qib_sge *sge = --j ? &ss->sg_list[j - 1] : &ss->sge;
+
+ atomic_dec(&sge->mr->refcount);
+ }
+ ss->num_sge = 0;
+ memset(&wc, 0, sizeof(wc));
+ wc.wr_id = wqe->wr_id;
+ wc.status = IB_WC_LOC_PROT_ERR;
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ /* Signal solicited completion event. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc, 1);
+ ret = 0;
+bail:
+ return ret;
+}
+
+/**
+ * qib_get_rwqe - copy the next RWQE into the QP's RWQE
+ * @qp: the QP
+ * @wr_id_only: update qp->r_wr_id only, not qp->r_sge
+ *
+ * Return -1 if there is a local error, 0 if no RWQE is available,
+ * otherwise return 1.
+ *
+ * Can be called from interrupt level.
+ */
+int qib_get_rwqe(struct qib_qp *qp, int wr_id_only)
+{
+ unsigned long flags;
+ struct qib_rq *rq;
+ struct qib_rwq *wq;
+ struct qib_srq *srq;
+ struct qib_rwqe *wqe;
+ void (*handler)(struct ib_event *, void *);
+ u32 tail;
+ int ret;
+
+ if (qp->ibqp.srq) {
+ srq = to_isrq(qp->ibqp.srq);
+ handler = srq->ibsrq.event_handler;
+ rq = &srq->rq;
+ } else {
+ srq = NULL;
+ handler = NULL;
+ rq = &qp->r_rq;
+ }
+
+ spin_lock_irqsave(&rq->lock, flags);
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+ ret = 0;
+ goto unlock;
+ }
+
+ wq = rq->wq;
+ tail = wq->tail;
+ /* Validate tail before using it since it is user writable. */
+ if (tail >= rq->size)
+ tail = 0;
+ if (unlikely(tail == wq->head)) {
+ ret = 0;
+ goto unlock;
+ }
+ /* Make sure entry is read after head index is read. */
+ smp_rmb();
+ wqe = get_rwqe_ptr(rq, tail);
+ /*
+ * Even though we update the tail index in memory, the verbs
+ * consumer is not supposed to post more entries until a
+ * completion is generated.
+ */
+ if (++tail >= rq->size)
+ tail = 0;
+ wq->tail = tail;
+ if (!wr_id_only && !qib_init_sge(qp, wqe)) {
+ ret = -1;
+ goto unlock;
+ }
+ qp->r_wr_id = wqe->wr_id;
+
+ ret = 1;
+ set_bit(QIB_R_WRID_VALID, &qp->r_aflags);
+ if (handler) {
+ u32 n;
+
+ /*
+ * Validate head pointer value and compute
+ * the number of remaining WQEs.
+ */
+ n = wq->head;
+ if (n >= rq->size)
+ n = 0;
+ if (n < tail)
+ n += rq->size - tail;
+ else
+ n -= tail;
+ if (n < srq->limit) {
+ struct ib_event ev;
+
+ srq->limit = 0;
+ spin_unlock_irqrestore(&rq->lock, flags);
+ ev.device = qp->ibqp.device;
+ ev.element.srq = qp->ibqp.srq;
+ ev.event = IB_EVENT_SRQ_LIMIT_REACHED;
+ handler(&ev, srq->ibsrq.srq_context);
+ goto bail;
+ }
+ }
+unlock:
+ spin_unlock_irqrestore(&rq->lock, flags);
+bail:
+ return ret;
+}
+
+/*
+ * Switch to alternate path.
+ * The QP s_lock should be held and interrupts disabled.
+ */
+void qib_migrate_qp(struct qib_qp *qp)
+{
+ struct ib_event ev;
+
+ qp->s_mig_state = IB_MIG_MIGRATED;
+ qp->remote_ah_attr = qp->alt_ah_attr;
+ qp->port_num = qp->alt_ah_attr.port_num;
+ qp->s_pkey_index = qp->s_alt_pkey_index;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_PATH_MIG;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+}
+
+static __be64 get_sguid(struct qib_ibport *ibp, unsigned index)
+{
+ if (!index) {
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ return ppd->guid;
+ } else
+ return ibp->guids[index - 1];
+}
+
+static int gid_ok(union ib_gid *gid, __be64 gid_prefix, __be64 id)
+{
+ return (gid->global.interface_id == id &&
+ (gid->global.subnet_prefix == gid_prefix ||
+ gid->global.subnet_prefix == IB_DEFAULT_GID_PREFIX));
+}
+
+/*
+ *
+ * This should be called with the QP s_lock held.
+ */
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, struct qib_qp *qp, u32 bth0)
+{
+ __be64 guid;
+
+ if (qp->s_mig_state == IB_MIG_ARMED && (bth0 & IB_BTH_MIG_REQ)) {
+ if (!has_grh) {
+ if (qp->alt_ah_attr.ah_flags & IB_AH_GRH)
+ goto err;
+ } else {
+ if (!(qp->alt_ah_attr.ah_flags & IB_AH_GRH))
+ goto err;
+ guid = get_sguid(ibp, qp->alt_ah_attr.grh.sgid_index);
+ if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+ goto err;
+ if (!gid_ok(&hdr->u.l.grh.sgid,
+ qp->alt_ah_attr.grh.dgid.global.subnet_prefix,
+ qp->alt_ah_attr.grh.dgid.global.interface_id))
+ goto err;
+ }
+ if (!qib_pkey_ok((u16)bth0,
+ qib_get_pkey(ibp, qp->s_alt_pkey_index))) {
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+ (u16)bth0,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ 0, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
+ goto err;
+ }
+ /* Validate the SLID. See Ch. 9.6.1.5 and 17.2.8 */
+ if (be16_to_cpu(hdr->lrh[3]) != qp->alt_ah_attr.dlid ||
+ ppd_from_ibp(ibp)->port != qp->alt_ah_attr.port_num)
+ goto err;
+ qib_migrate_qp(qp);
+ } else {
+ if (!has_grh) {
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ goto err;
+ } else {
+ if (!(qp->remote_ah_attr.ah_flags & IB_AH_GRH))
+ goto err;
+ guid = get_sguid(ibp,
+ qp->remote_ah_attr.grh.sgid_index);
+ if (!gid_ok(&hdr->u.l.grh.dgid, ibp->gid_prefix, guid))
+ goto err;
+ if (!gid_ok(&hdr->u.l.grh.sgid,
+ qp->remote_ah_attr.grh.dgid.global.subnet_prefix,
+ qp->remote_ah_attr.grh.dgid.global.interface_id))
+ goto err;
+ }
+ if (!qib_pkey_ok((u16)bth0,
+ qib_get_pkey(ibp, qp->s_pkey_index))) {
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+ (u16)bth0,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ 0, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
+ goto err;
+ }
+ /* Validate the SLID. See Ch. 9.6.1.5 */
+ if (be16_to_cpu(hdr->lrh[3]) != qp->remote_ah_attr.dlid ||
+ ppd_from_ibp(ibp)->port != qp->port_num)
+ goto err;
+ if (qp->s_mig_state == IB_MIG_REARM &&
+ !(bth0 & IB_BTH_MIG_REQ))
+ qp->s_mig_state = IB_MIG_ARMED;
+ }
+
+ return 0;
+
+err:
+ return 1;
+}
+
+/**
+ * qib_ruc_loopback - handle UC and RC lookback requests
+ * @sqp: the sending QP
+ *
+ * This is called from qib_do_send() to
+ * forward a WQE addressed to the same HCA.
+ * Note that although we are single threaded due to the tasklet, we still
+ * have to protect against post_send(). We don't have to worry about
+ * receive interrupts since this is a connected protocol and all packets
+ * will pass through here.
+ */
+static void qib_ruc_loopback(struct qib_qp *sqp)
+{
+ struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
+ struct qib_qp *qp;
+ struct qib_swqe *wqe;
+ struct qib_sge *sge;
+ unsigned long flags;
+ struct ib_wc wc;
+ u64 sdata;
+ atomic64_t *maddr;
+ enum ib_wc_status send_status;
+ int release;
+ int ret;
+
+ /*
+ * Note that we check the responder QP state after
+ * checking the requester's state.
+ */
+ qp = qib_lookup_qpn(ibp, sqp->remote_qpn);
+
+ spin_lock_irqsave(&sqp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if ((sqp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT)) ||
+ !(ib_qib_state_ops[sqp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ goto unlock;
+
+ sqp->s_flags |= QIB_S_BUSY;
+
+again:
+ if (sqp->s_last == sqp->s_head)
+ goto clr_busy;
+ wqe = get_swqe_ptr(sqp, sqp->s_last);
+
+ /* Return if it is not OK to start a new work reqeust. */
+ if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_qib_state_ops[sqp->state] & QIB_FLUSH_SEND))
+ goto clr_busy;
+ /* We are in the error state, flush the work request. */
+ send_status = IB_WC_WR_FLUSH_ERR;
+ goto flush_send;
+ }
+
+ /*
+ * We can rely on the entry not changing without the s_lock
+ * being held until we update s_last.
+ * We increment s_cur to indicate s_last is in progress.
+ */
+ if (sqp->s_last == sqp->s_cur) {
+ if (++sqp->s_cur >= sqp->s_size)
+ sqp->s_cur = 0;
+ }
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+
+ if (!qp || !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) ||
+ qp->ibqp.qp_type != sqp->ibqp.qp_type) {
+ ibp->n_pkt_drops++;
+ /*
+ * For RC, the requester would timeout and retry so
+ * shortcut the timeouts and just signal too many retries.
+ */
+ if (sqp->ibqp.qp_type == IB_QPT_RC)
+ send_status = IB_WC_RETRY_EXC_ERR;
+ else
+ send_status = IB_WC_SUCCESS;
+ goto serr;
+ }
+
+ memset(&wc, 0, sizeof wc);
+ send_status = IB_WC_SUCCESS;
+
+ release = 1;
+ sqp->s_sge.sge = wqe->sg_list[0];
+ sqp->s_sge.sg_list = wqe->sg_list + 1;
+ sqp->s_sge.num_sge = wqe->wr.num_sge;
+ sqp->s_len = wqe->length;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND_WITH_IMM:
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ /* FALLTHROUGH */
+ case IB_WR_SEND:
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto rnr_nak;
+ break;
+
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = wqe->wr.ex.imm_data;
+ ret = qib_get_rwqe(qp, 1);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto rnr_nak;
+ /* FALLTHROUGH */
+ case IB_WR_RDMA_WRITE:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_WRITE)))
+ goto inv_err;
+ if (wqe->length == 0)
+ break;
+ if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, wqe->length,
+ wqe->wr.wr.rdma.remote_addr,
+ wqe->wr.wr.rdma.rkey,
+ IB_ACCESS_REMOTE_WRITE)))
+ goto acc_err;
+ qp->r_sge.sg_list = NULL;
+ qp->r_sge.num_sge = 1;
+ qp->r_sge.total_len = wqe->length;
+ break;
+
+ case IB_WR_RDMA_READ:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_READ)))
+ goto inv_err;
+ if (unlikely(!qib_rkey_ok(qp, &sqp->s_sge.sge, wqe->length,
+ wqe->wr.wr.rdma.remote_addr,
+ wqe->wr.wr.rdma.rkey,
+ IB_ACCESS_REMOTE_READ)))
+ goto acc_err;
+ release = 0;
+ sqp->s_sge.sg_list = NULL;
+ sqp->s_sge.num_sge = 1;
+ qp->r_sge.sge = wqe->sg_list[0];
+ qp->r_sge.sg_list = wqe->sg_list + 1;
+ qp->r_sge.num_sge = wqe->wr.num_sge;
+ qp->r_sge.total_len = wqe->length;
+ break;
+
+ case IB_WR_ATOMIC_CMP_AND_SWP:
+ case IB_WR_ATOMIC_FETCH_AND_ADD:
+ if (unlikely(!(qp->qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+ goto inv_err;
+ if (unlikely(!qib_rkey_ok(qp, &qp->r_sge.sge, sizeof(u64),
+ wqe->wr.wr.atomic.remote_addr,
+ wqe->wr.wr.atomic.rkey,
+ IB_ACCESS_REMOTE_ATOMIC)))
+ goto acc_err;
+ /* Perform atomic OP and save result. */
+ maddr = (atomic64_t *) qp->r_sge.sge.vaddr;
+ sdata = wqe->wr.wr.atomic.compare_add;
+ *(u64 *) sqp->s_sge.sge.vaddr =
+ (wqe->wr.opcode == IB_WR_ATOMIC_FETCH_AND_ADD) ?
+ (u64) atomic64_add_return(sdata, maddr) - sdata :
+ (u64) cmpxchg((u64 *) qp->r_sge.sge.vaddr,
+ sdata, wqe->wr.wr.atomic.swap);
+ atomic_dec(&qp->r_sge.sge.mr->refcount);
+ qp->r_sge.num_sge = 0;
+ goto send_comp;
+
+ default:
+ send_status = IB_WC_LOC_QP_OP_ERR;
+ goto serr;
+ }
+
+ sge = &sqp->s_sge.sge;
+ while (sqp->s_len) {
+ u32 len = sqp->s_len;
+
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ qib_copy_sge(&qp->r_sge, sge->vaddr, len, release);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (!release)
+ atomic_dec(&sge->mr->refcount);
+ if (--sqp->s_sge.num_sge)
+ *sge = *sqp->s_sge.sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ sqp->s_len -= len;
+ }
+ if (release)
+ while (qp->r_sge.num_sge) {
+ atomic_dec(&qp->r_sge.sge.mr->refcount);
+ if (--qp->r_sge.num_sge)
+ qp->r_sge.sge = *qp->r_sge.sg_list++;
+ }
+
+ if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ goto send_comp;
+
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE_WITH_IMM)
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ else
+ wc.opcode = IB_WC_RECV;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.byte_len = wqe->length;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ wc.port_num = 1;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ wqe->wr.send_flags & IB_SEND_SOLICITED);
+
+send_comp:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ ibp->n_loop_pkts++;
+flush_send:
+ sqp->s_rnr_retry = sqp->s_rnr_retry_cnt;
+ qib_send_complete(sqp, wqe, send_status);
+ goto again;
+
+rnr_nak:
+ /* Handle RNR NAK */
+ if (qp->ibqp.qp_type == IB_QPT_UC)
+ goto send_comp;
+ ibp->n_rnr_naks++;
+ /*
+ * Note: we don't need the s_lock held since the BUSY flag
+ * makes this single threaded.
+ */
+ if (sqp->s_rnr_retry == 0) {
+ send_status = IB_WC_RNR_RETRY_EXC_ERR;
+ goto serr;
+ }
+ if (sqp->s_rnr_retry_cnt < 7)
+ sqp->s_rnr_retry--;
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ if (!(ib_qib_state_ops[sqp->state] & QIB_PROCESS_RECV_OK))
+ goto clr_busy;
+ sqp->s_flags |= QIB_S_WAIT_RNR;
+ sqp->s_timer.function = qib_rc_rnr_retry;
+ sqp->s_timer.expires = jiffies +
+ usecs_to_jiffies(ib_qib_rnr_table[qp->r_min_rnr_timer]);
+ add_timer(&sqp->s_timer);
+ goto clr_busy;
+
+op_err:
+ send_status = IB_WC_REM_OP_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+inv_err:
+ send_status = IB_WC_REM_INV_REQ_ERR;
+ wc.status = IB_WC_LOC_QP_OP_ERR;
+ goto err;
+
+acc_err:
+ send_status = IB_WC_REM_ACCESS_ERR;
+ wc.status = IB_WC_LOC_PROT_ERR;
+err:
+ /* responder goes to error state */
+ qib_rc_error(qp, wc.status);
+
+serr:
+ spin_lock_irqsave(&sqp->s_lock, flags);
+ qib_send_complete(sqp, wqe, send_status);
+ if (sqp->ibqp.qp_type == IB_QPT_RC) {
+ int lastwqe = qib_error_qp(sqp, IB_WC_WR_FLUSH_ERR);
+
+ sqp->s_flags &= ~QIB_S_BUSY;
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+ if (lastwqe) {
+ struct ib_event ev;
+
+ ev.device = sqp->ibqp.device;
+ ev.element.qp = &sqp->ibqp;
+ ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
+ sqp->ibqp.event_handler(&ev, sqp->ibqp.qp_context);
+ }
+ goto done;
+ }
+clr_busy:
+ sqp->s_flags &= ~QIB_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&sqp->s_lock, flags);
+done:
+ if (qp && atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+/**
+ * qib_make_grh - construct a GRH header
+ * @ibp: a pointer to the IB port
+ * @hdr: a pointer to the GRH header being constructed
+ * @grh: the global route address to send to
+ * @hwords: the number of 32 bit words of header being sent
+ * @nwords: the number of 32 bit words of data being sent
+ *
+ * Return the size of the header in 32 bit words.
+ */
+u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords)
+{
+ hdr->version_tclass_flow =
+ cpu_to_be32((IB_GRH_VERSION << IB_GRH_VERSION_SHIFT) |
+ (grh->traffic_class << IB_GRH_TCLASS_SHIFT) |
+ (grh->flow_label << IB_GRH_FLOW_SHIFT));
+ hdr->paylen = cpu_to_be16((hwords - 2 + nwords + SIZE_OF_CRC) << 2);
+ /* next_hdr is defined by C8-7 in ch. 8.4.1 */
+ hdr->next_hdr = IB_GRH_NEXT_HDR;
+ hdr->hop_limit = grh->hop_limit;
+ /* The SGID is 32-bit aligned. */
+ hdr->sgid.global.subnet_prefix = ibp->gid_prefix;
+ hdr->sgid.global.interface_id = grh->sgid_index ?
+ ibp->guids[grh->sgid_index - 1] : ppd_from_ibp(ibp)->guid;
+ hdr->dgid = grh->dgid;
+
+ /* GRH header size in 32-bit words. */
+ return sizeof(struct ib_grh) / sizeof(u32);
+}
+
+void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+ u32 bth0, u32 bth2)
+{
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ u16 lrh0;
+ u32 nwords;
+ u32 extra_bytes;
+
+ /* Construct the header. */
+ extra_bytes = -qp->s_cur_size & 3;
+ nwords = (qp->s_cur_size + extra_bytes) >> 2;
+ lrh0 = QIB_LRH_BTH;
+ if (unlikely(qp->remote_ah_attr.ah_flags & IB_AH_GRH)) {
+ qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
+ &qp->remote_ah_attr.grh,
+ qp->s_hdrwords, nwords);
+ lrh0 = QIB_LRH_GRH;
+ }
+ lrh0 |= ibp->sl_to_vl[qp->remote_ah_attr.sl] << 12 |
+ qp->remote_ah_attr.sl << 4;
+ qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr.lrh[1] = cpu_to_be16(qp->remote_ah_attr.dlid);
+ qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ qp->s_hdr.lrh[3] = cpu_to_be16(ppd_from_ibp(ibp)->lid |
+ qp->remote_ah_attr.src_path_bits);
+ bth0 |= qib_get_pkey(ibp, qp->s_pkey_index);
+ bth0 |= extra_bytes << 20;
+ if (qp->s_mig_state == IB_MIG_MIGRATED)
+ bth0 |= IB_BTH_MIG_REQ;
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ ohdr->bth[1] = cpu_to_be32(qp->remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(bth2);
+}
+
+/**
+ * qib_do_send - perform a send on a QP
+ * @work: contains a pointer to the QP
+ *
+ * Process entries in the send work queue until credit or queue is
+ * exhausted. Only allow one CPU to send a packet per QP (tasklet).
+ * Otherwise, two threads could send packets out of order.
+ */
+void qib_do_send(struct work_struct *work)
+{
+ struct qib_qp *qp = container_of(work, struct qib_qp, s_work);
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ int (*make_req)(struct qib_qp *qp);
+ unsigned long flags;
+
+ if ((qp->ibqp.qp_type == IB_QPT_RC ||
+ qp->ibqp.qp_type == IB_QPT_UC) &&
+ (qp->remote_ah_attr.dlid & ~((1 << ppd->lmc) - 1)) == ppd->lid) {
+ qib_ruc_loopback(qp);
+ return;
+ }
+
+ if (qp->ibqp.qp_type == IB_QPT_RC)
+ make_req = qib_make_rc_req;
+ else if (qp->ibqp.qp_type == IB_QPT_UC)
+ make_req = qib_make_uc_req;
+ else
+ make_req = qib_make_ud_req;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Return if we are already busy processing a work request. */
+ if (!qib_send_ok(qp)) {
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return;
+ }
+
+ qp->s_flags |= QIB_S_BUSY;
+
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ do {
+ /* Check for a constructed packet to be sent. */
+ if (qp->s_hdrwords != 0) {
+ /*
+ * If the packet cannot be sent now, return and
+ * the send tasklet will be woken up later.
+ */
+ if (qib_verbs_send(qp, &qp->s_hdr, qp->s_hdrwords,
+ qp->s_cur_sge, qp->s_cur_size))
+ break;
+ /* Record that s_hdr is empty. */
+ qp->s_hdrwords = 0;
+ }
+ } while (make_req(qp));
+}
+
+/*
+ * This should be called with s_lock held.
+ */
+void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+ enum ib_wc_status status)
+{
+ u32 old_last, last;
+ unsigned i;
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_OR_FLUSH_SEND))
+ return;
+
+ for (i = 0; i < wqe->wr.num_sge; i++) {
+ struct qib_sge *sge = &wqe->sg_list[i];
+
+ atomic_dec(&sge->mr->refcount);
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->ibqp.qp_type == IB_QPT_SMI ||
+ qp->ibqp.qp_type == IB_QPT_GSI)
+ atomic_dec(&to_iah(wqe->wr.wr.ud.ah)->refcount);
+
+ /* See ch. 11.2.4.1 and 10.7.3.1 */
+ if (!(qp->s_flags & QIB_S_SIGNAL_REQ_WR) ||
+ (wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+ status != IB_WC_SUCCESS) {
+ struct ib_wc wc;
+
+ memset(&wc, 0, sizeof wc);
+ wc.wr_id = wqe->wr.wr_id;
+ wc.status = status;
+ wc.opcode = ib_qib_wc_opcode[wqe->wr.opcode];
+ wc.qp = &qp->ibqp;
+ if (status == IB_WC_SUCCESS)
+ wc.byte_len = wqe->length;
+ qib_cq_enter(to_icq(qp->ibqp.send_cq), &wc,
+ status != IB_WC_SUCCESS);
+ }
+
+ last = qp->s_last;
+ old_last = last;
+ if (++last >= qp->s_size)
+ last = 0;
+ qp->s_last = last;
+ if (qp->s_acked == old_last)
+ qp->s_acked = last;
+ if (qp->s_cur == old_last)
+ qp->s_cur = last;
+ if (qp->s_tail == old_last)
+ qp->s_tail = last;
+ if (qp->state == IB_QPS_SQD && last == qp->s_cur)
+ qp->s_draining = 0;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 33/52] IB/qib: Add qib_sd7220.c
From: Ralph Campbell @ 2010-05-07 0:01 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_sd7220.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_sd7220.c | 1415 ++++++++++++++++++++++++++++++++
1 files changed, 1415 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_sd7220.c
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
new file mode 100644
index 0000000..17af3cd
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -0,0 +1,1415 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the SerDes
+ * on the QLogic_IB 7220 chip.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_7220.h"
+
+/*
+ * Same as in qib_iba7220.c, but just the registers needed here.
+ * Could move whole set to qib_7220.h, but decided better to keep
+ * local.
+ */
+#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64))
+#define kr_hwerrclear KREG_IDX(HwErrClear)
+#define kr_hwerrmask KREG_IDX(HwErrMask)
+#define kr_hwerrstatus KREG_IDX(HwErrStatus)
+#define kr_ibcstatus KREG_IDX(IBCStatus)
+#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl)
+#define kr_scratch KREG_IDX(Scratch)
+#define kr_xgxs_cfg KREG_IDX(XGXSCfg)
+/* these are used only here, not in qib_iba7220.c */
+#define kr_ibsd_epb_access_ctrl KREG_IDX(ibsd_epb_access_ctrl)
+#define kr_ibsd_epb_transaction_reg KREG_IDX(ibsd_epb_transaction_reg)
+#define kr_pciesd_epb_transaction_reg KREG_IDX(pciesd_epb_transaction_reg)
+#define kr_pciesd_epb_access_ctrl KREG_IDX(pciesd_epb_access_ctrl)
+#define kr_serdes_ddsrxeq0 KREG_IDX(SerDes_DDSRXEQ0)
+
+/*
+ * The IBSerDesMappTable is a memory that holds values to be stored in
+ * various SerDes registers by IBC.
+ */
+#define kr_serdes_maptable KREG_IDX(IBSerDesMappTable)
+
+/*
+ * Below used for sdnum parameter, selecting one of the two sections
+ * used for PCIe, or the single SerDes used for IB.
+ */
+#define PCIE_SERDES0 0
+#define PCIE_SERDES1 1
+
+/*
+ * The EPB requires addressing in a particular form. EPB_LOC() is intended
+ * to make #definitions a little more readable.
+ */
+#define EPB_ADDR_SHF 8
+#define EPB_LOC(chn, elt, reg) \
+ (((elt & 0xf) | ((chn & 7) << 4) | ((reg & 0x3f) << 9)) << \
+ EPB_ADDR_SHF)
+#define EPB_IB_QUAD0_CS_SHF (25)
+#define EPB_IB_QUAD0_CS (1U << EPB_IB_QUAD0_CS_SHF)
+#define EPB_IB_UC_CS_SHF (26)
+#define EPB_PCIE_UC_CS_SHF (27)
+#define EPB_GLOBAL_WR (1U << (EPB_ADDR_SHF + 8))
+
+/* Forward declarations. */
+static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc,
+ u32 data, u32 mask);
+static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
+ int mask);
+static int qib_sd_trimdone_poll(struct qib_devdata *dd);
+static void qib_sd_trimdone_monitor(struct qib_devdata *dd, const char *where);
+static int qib_sd_setvals(struct qib_devdata *dd);
+static int qib_sd_early(struct qib_devdata *dd);
+static int qib_sd_dactrim(struct qib_devdata *dd);
+static int qib_internal_presets(struct qib_devdata *dd);
+/* Tweak the register (CMUCTRL5) that contains the TRIMSELF controls */
+static int qib_sd_trimself(struct qib_devdata *dd, int val);
+static int epb_access(struct qib_devdata *dd, int sdnum, int claim);
+
+/*
+ * Below keeps track of whether the "once per power-on" initialization has
+ * been done, because uC code Version 1.32.17 or higher allows the uC to
+ * be reset at will, and Automatic Equalization may require it. So the
+ * state of the reset "pin", is no longer valid. Instead, we check for the
+ * actual uC code having been loaded.
+ */
+static int qib_ibsd_ucode_loaded(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ if (!dd->cspec->serdes_first_init_done && (qib_sd7220_ib_vfy(dd) > 0))
+ dd->cspec->serdes_first_init_done = 1;
+ return dd->cspec->serdes_first_init_done;
+}
+
+/* repeat #define for local use. "Real" #define is in qib_iba7220.c */
+#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL
+#define IB_MPREG5 (EPB_LOC(6, 0, 0xE) | (1L << EPB_IB_UC_CS_SHF))
+#define IB_MPREG6 (EPB_LOC(6, 0, 0xF) | (1U << EPB_IB_UC_CS_SHF))
+#define UC_PAR_CLR_D 8
+#define UC_PAR_CLR_M 0xC
+#define IB_CTRL2(chn) (EPB_LOC(chn, 7, 3) | EPB_IB_QUAD0_CS)
+#define START_EQ1(chan) EPB_LOC(chan, 7, 0x27)
+
+void qib_sd7220_clr_ibpar(struct qib_devdata *dd)
+{
+ int ret;
+
+ /* clear, then re-enable parity errs */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6,
+ UC_PAR_CLR_D, UC_PAR_CLR_M);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed clearing IBSerDes Parity err\n");
+ goto bail;
+ }
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0,
+ UC_PAR_CLR_M);
+
+ qib_read_kreg32(dd, kr_scratch);
+ udelay(4);
+ qib_write_kreg(dd, kr_hwerrclear,
+ QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+ qib_read_kreg32(dd, kr_scratch);
+bail:
+ return;
+}
+
+/*
+ * After a reset or other unusual event, the epb interface may need
+ * to be re-synchronized, between the host and the uC.
+ * returns <0 for failure to resync within IBSD_RESYNC_TRIES (not expected)
+ */
+#define IBSD_RESYNC_TRIES 3
+#define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS)
+#define IB_CMUDONE(chn) (EPB_LOC((chn), 7, 0xF) | EPB_IB_QUAD0_CS)
+
+static int qib_resync_ibepb(struct qib_devdata *dd)
+{
+ int ret, pat, tries, chn;
+ u32 loc;
+
+ ret = -1;
+ chn = 0;
+ for (tries = 0; tries < (4 * IBSD_RESYNC_TRIES); ++tries) {
+ loc = IB_PGUDP(chn);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed read in resync\n");
+ continue;
+ }
+ if (ret != 0xF0 && ret != 0x55 && tries == 0)
+ qib_dev_err(dd, "unexpected pattern in resync\n");
+ pat = ret ^ 0xA5; /* alternate F0 and 55 */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed write in resync\n");
+ continue;
+ }
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed re-read in resync\n");
+ continue;
+ }
+ if (ret != pat) {
+ qib_dev_err(dd, "Failed compare1 in resync\n");
+ continue;
+ }
+ loc = IB_CMUDONE(chn);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed CMUDONE rd in resync\n");
+ continue;
+ }
+ if ((ret & 0x70) != ((chn << 4) | 0x40)) {
+ qib_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n",
+ ret, chn);
+ continue;
+ }
+ if (++chn == 4)
+ break; /* Success */
+ }
+ return (ret > 0) ? 0 : ret;
+}
+
+/*
+ * Localize the stuff that should be done to change IB uC reset
+ * returns <0 for errors.
+ */
+static int qib_ibsd_reset(struct qib_devdata *dd, int assert_rst)
+{
+ u64 rst_val;
+ int ret = 0;
+ unsigned long flags;
+
+ rst_val = qib_read_kreg64(dd, kr_ibserdesctrl);
+ if (assert_rst) {
+ /*
+ * Vendor recommends "interrupting" uC before reset, to
+ * minimize possible glitches.
+ */
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+ epb_access(dd, IB_7220_SERDES, 1);
+ rst_val |= 1ULL;
+ /* Squelch possible parity error from _asserting_ reset */
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask &
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+ qib_write_kreg(dd, kr_ibserdesctrl, rst_val);
+ /* flush write, delay to ensure it took effect */
+ qib_read_kreg32(dd, kr_scratch);
+ udelay(2);
+ /* once it's reset, can remove interrupt */
+ epb_access(dd, IB_7220_SERDES, -1);
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ } else {
+ /*
+ * Before we de-assert reset, we need to deal with
+ * possible glitch on the Parity-error line.
+ * Suppress it around the reset, both in chip-level
+ * hwerrmask and in IB uC control reg. uC will allow
+ * it again during startup.
+ */
+ u64 val;
+ rst_val &= ~(1ULL);
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask &
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+
+ ret = qib_resync_ibepb(dd);
+ if (ret < 0)
+ qib_dev_err(dd, "unable to re-sync IB EPB\n");
+
+ /* set uC control regs to suppress parity errs */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1);
+ if (ret < 0)
+ goto bail;
+ /* IB uC code past Version 1.32.17 allow suppression of wdog */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80,
+ 0x80);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to set WDOG disable\n");
+ goto bail;
+ }
+ qib_write_kreg(dd, kr_ibserdesctrl, rst_val);
+ /* flush write, delay for startup */
+ qib_read_kreg32(dd, kr_scratch);
+ udelay(1);
+ /* clear, then re-enable parity errs */
+ qib_sd7220_clr_ibpar(dd);
+ val = qib_read_kreg64(dd, kr_hwerrstatus);
+ if (val & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) {
+ qib_dev_err(dd, "IBUC Parity still set after RST\n");
+ dd->cspec->hwerrmask &=
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR;
+ }
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask);
+ }
+
+bail:
+ return ret;
+}
+
+static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
+ const char *where)
+{
+ int ret, chn, baduns;
+ u64 val;
+
+ if (!where)
+ where = "?";
+
+ /* give time for reset to settle out in EPB */
+ udelay(2);
+
+ ret = qib_resync_ibepb(dd);
+ if (ret < 0)
+ qib_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where);
+
+ /* Do "sacrificial read" to get EPB in sane state after reset */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where);
+
+ /* Check/show "summary" Trim-done bit in IBCStatus */
+ val = qib_read_kreg64(dd, kr_ibcstatus);
+ if (!(val & (1ULL << 11)))
+ qib_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where);
+ /*
+ * Do "dummy read/mod/wr" to get EPB in sane state after reset
+ * The default value for MPREG6 is 0.
+ */
+ udelay(2);
+
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed Dummy RMW, (%s)\n", where);
+ udelay(10);
+
+ baduns = 0;
+
+ for (chn = 3; chn >= 0; --chn) {
+ /* Read CTRL reg for each channel to check TRIMDONE */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0, 0);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed checking TRIMDONE, chn %d"
+ " (%s)\n", chn, where);
+
+ if (!(ret & 0x10)) {
+ int probe;
+
+ baduns |= (1 << chn);
+ qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
+ " (%s)\n", chn, ret, where);
+ probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_PGUDP(0), 0, 0);
+ qib_dev_err(dd, "probe is %d (%02X)\n",
+ probe, probe);
+ probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0, 0);
+ qib_dev_err(dd, "re-read: %d (%02X)\n",
+ probe, probe);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0x10, 0x10);
+ if (ret < 0)
+ qib_dev_err(dd,
+ "Err on TRIMDONE rewrite1\n");
+ }
+ }
+ for (chn = 3; chn >= 0; --chn) {
+ /* Read CTRL reg for each channel to check TRIMDONE */
+ if (baduns & (1 << chn)) {
+ qib_dev_err(dd,
+ "Reseting TRIMDONE on chn %d (%s)\n",
+ chn, where);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0x10, 0x10);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed re-setting "
+ "TRIMDONE, chn %d (%s)\n",
+ chn, where);
+ }
+ }
+}
+
+/*
+ * Below is portion of IBA7220-specific bringup_serdes() that actually
+ * deals with registers and memory within the SerDes itself.
+ * Post IB uC code version 1.32.17, was_reset being 1 is not really
+ * informative, so we double-check.
+ */
+int qib_sd7220_init(struct qib_devdata *dd)
+{
+ int ret = 1; /* default to failure */
+ int first_reset, was_reset;
+
+ /* SERDES MPU reset recorded in D0 */
+ was_reset = (qib_read_kreg64(dd, kr_ibserdesctrl) & 1);
+ if (!was_reset) {
+ /* entered with reset not asserted, we need to do it */
+ qib_ibsd_reset(dd, 1);
+ qib_sd_trimdone_monitor(dd, "Driver-reload");
+ }
+ /* Substitute our deduced value for was_reset */
+ ret = qib_ibsd_ucode_loaded(dd->pport);
+ if (ret < 0)
+ goto bail;
+
+ first_reset = !ret; /* First reset if IBSD uCode not yet loaded */
+ /*
+ * Alter some regs per vendor latest doc, reset-defaults
+ * are not right for IB.
+ */
+ ret = qib_sd_early(dd);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to set IB SERDES early defaults\n");
+ goto bail;
+ }
+ /*
+ * Set DAC manual trim IB.
+ * We only do this once after chip has been reset (usually
+ * same as once per system boot).
+ */
+ if (first_reset) {
+ ret = qib_sd_dactrim(dd);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed IB SERDES DAC trim\n");
+ goto bail;
+ }
+ }
+ /*
+ * Set various registers (DDS and RXEQ) that will be
+ * controlled by IBC (in 1.2 mode) to reasonable preset values
+ * Calling the "internal" version avoids the "check for needed"
+ * and "trimdone monitor" that might be counter-productive.
+ */
+ ret = qib_internal_presets(dd);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to set IB SERDES presets\n");
+ goto bail;
+ }
+ ret = qib_sd_trimself(dd, 0x80);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n");
+ goto bail;
+ }
+
+ /* Load image, then try to verify */
+ ret = 0; /* Assume success */
+ if (first_reset) {
+ int vfy;
+ int trim_done;
+
+ ret = qib_sd7220_ib_load(dd);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to load IB SERDES image\n");
+ goto bail;
+ } else {
+ /* Loaded image, try to verify */
+ vfy = qib_sd7220_ib_vfy(dd);
+ if (vfy != ret) {
+ qib_dev_err(dd, "SERDES PRAM VFY failed\n");
+ goto bail;
+ } /* end if verified */
+ } /* end if loaded */
+
+ /*
+ * Loaded and verified. Almost good...
+ * hold "success" in ret
+ */
+ ret = 0;
+ /*
+ * Prev steps all worked, continue bringup
+ * De-assert RESET to uC, only in first reset, to allow
+ * trimming.
+ *
+ * Since our default setup sets START_EQ1 to
+ * PRESET, we need to clear that for this very first run.
+ */
+ ret = ibsd_mod_allchnls(dd, START_EQ1(0), 0, 0x38);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed clearing START_EQ1\n");
+ goto bail;
+ }
+
+ qib_ibsd_reset(dd, 0);
+ /*
+ * If this is not the first reset, trimdone should be set
+ * already. We may need to check about this.
+ */
+ trim_done = qib_sd_trimdone_poll(dd);
+ /*
+ * Whether or not trimdone succeeded, we need to put the
+ * uC back into reset to avoid a possible fight with the
+ * IBC state-machine.
+ */
+ qib_ibsd_reset(dd, 1);
+
+ if (!trim_done) {
+ qib_dev_err(dd, "No TRIMDONE seen\n");
+ goto bail;
+ }
+ /*
+ * DEBUG: check each time we reset if trimdone bits have
+ * gotten cleared, and re-set them.
+ */
+ qib_sd_trimdone_monitor(dd, "First-reset");
+ /* Remember so we do not re-do the load, dactrim, etc. */
+ dd->cspec->serdes_first_init_done = 1;
+ }
+ /*
+ * setup for channel training and load values for
+ * RxEq and DDS in tables used by IBC in IB1.2 mode
+ */
+ ret = 0;
+ if (qib_sd_setvals(dd) >= 0)
+ goto done;
+bail:
+ ret = 1;
+done:
+ /* start relock timer regardless, but start at 1 second */
+ set_7220_relock_poll(dd, -1);
+ return ret;
+}
+
+#define EPB_ACC_REQ 1
+#define EPB_ACC_GNT 0x100
+#define EPB_DATA_MASK 0xFF
+#define EPB_RD (1ULL << 24)
+#define EPB_TRANS_RDY (1ULL << 31)
+#define EPB_TRANS_ERR (1ULL << 30)
+#define EPB_TRANS_TRIES 5
+
+/*
+ * query, claim, release ownership of the EPB (External Parallel Bus)
+ * for a specified SERDES.
+ * the "claim" parameter is >0 to claim, <0 to release, 0 to query.
+ * Returns <0 for errors, >0 if we had ownership, else 0.
+ */
+static int epb_access(struct qib_devdata *dd, int sdnum, int claim)
+{
+ u16 acc;
+ u64 accval;
+ int owned = 0;
+ u64 oct_sel = 0;
+
+ switch (sdnum) {
+ case IB_7220_SERDES:
+ /*
+ * The IB SERDES "ownership" is fairly simple. A single each
+ * request/grant.
+ */
+ acc = kr_ibsd_epb_access_ctrl;
+ break;
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
+ /* PCIe SERDES has two "octants", need to select which */
+ acc = kr_pciesd_epb_access_ctrl;
+ oct_sel = (2 << (sdnum - PCIE_SERDES0));
+ break;
+
+ default:
+ return 0;
+ }
+
+ /* Make sure any outstanding transaction was seen */
+ qib_read_kreg32(dd, kr_scratch);
+ udelay(15);
+
+ accval = qib_read_kreg32(dd, acc);
+
+ owned = !!(accval & EPB_ACC_GNT);
+ if (claim < 0) {
+ /* Need to release */
+ u64 pollval;
+ /*
+ * The only writeable bits are the request and CS.
+ * Both should be clear
+ */
+ u64 newval = 0;
+ qib_write_kreg(dd, acc, newval);
+ /* First read after write is not trustworthy */
+ pollval = qib_read_kreg32(dd, acc);
+ udelay(5);
+ pollval = qib_read_kreg32(dd, acc);
+ if (pollval & EPB_ACC_GNT)
+ owned = -1;
+ } else if (claim > 0) {
+ /* Need to claim */
+ u64 pollval;
+ u64 newval = EPB_ACC_REQ | oct_sel;
+ qib_write_kreg(dd, acc, newval);
+ /* First read after write is not trustworthy */
+ pollval = qib_read_kreg32(dd, acc);
+ udelay(5);
+ pollval = qib_read_kreg32(dd, acc);
+ if (!(pollval & EPB_ACC_GNT))
+ owned = -1;
+ }
+ return owned;
+}
+
+/*
+ * Lemma to deal with race condition of write..read to epb regs
+ */
+static int epb_trans(struct qib_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
+{
+ int tries;
+ u64 transval;
+
+ qib_write_kreg(dd, reg, i_val);
+ /* Throw away first read, as RDY bit may be stale */
+ transval = qib_read_kreg64(dd, reg);
+
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = qib_read_kreg32(dd, reg);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+ if (transval & EPB_TRANS_ERR)
+ return -1;
+ if (tries > 0 && o_vp)
+ *o_vp = transval;
+ return tries;
+}
+
+/**
+ * qib_sd7220_reg_mod - modify SERDES register
+ * @dd: the qlogic_ib device
+ * @sdnum: which SERDES to access
+ * @loc: location - channel, element, register, as packed by EPB_LOC() macro.
+ * @wd: Write Data - value to set in register
+ * @mask: ones where data should be spliced into reg.
+ *
+ * Basic register read/modify/write, with un-needed acesses elided. That is,
+ * a mask of zero will prevent write, while a mask of 0xFF will prevent read.
+ * returns current (presumed, if a write was done) contents of selected
+ * register, or <0 if errors.
+ */
+static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc,
+ u32 wd, u32 mask)
+{
+ u16 trans;
+ u64 transval;
+ int owned;
+ int tries, ret;
+ unsigned long flags;
+
+ switch (sdnum) {
+ case IB_7220_SERDES:
+ trans = kr_ibsd_epb_transaction_reg;
+ break;
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
+ trans = kr_pciesd_epb_transaction_reg;
+ break;
+
+ default:
+ return -1;
+ }
+
+ /*
+ * All access is locked in software (vs other host threads) and
+ * hardware (vs uC access).
+ */
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+
+ owned = epb_access(dd, sdnum, 1);
+ if (owned < 0) {
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ return -1;
+ }
+ ret = 0;
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = qib_read_kreg32(dd, trans);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+
+ if (tries > 0) {
+ tries = 1; /* to make read-skip work */
+ if (mask != 0xFF) {
+ /*
+ * Not a pure write, so need to read.
+ * loc encodes chip-select as well as address
+ */
+ transval = loc | EPB_RD;
+ tries = epb_trans(dd, trans, transval, &transval);
+ }
+ if (tries > 0 && mask != 0) {
+ /*
+ * Not a pure read, so need to write.
+ */
+ wd = (wd & mask) | (transval & ~mask);
+ transval = loc | (wd & EPB_DATA_MASK);
+ tries = epb_trans(dd, trans, transval, &transval);
+ }
+ }
+ /* else, failed to see ready, what error-handling? */
+
+ /*
+ * Release bus. Failure is an error.
+ */
+ if (epb_access(dd, sdnum, -1) < 0)
+ ret = -1;
+ else
+ ret = transval & EPB_DATA_MASK;
+
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ if (tries <= 0)
+ ret = -1;
+ return ret;
+}
+
+#define EPB_ROM_R (2)
+#define EPB_ROM_W (1)
+/*
+ * Below, all uC-related, use appropriate UC_CS, depending
+ * on which SerDes is used.
+ */
+#define EPB_UC_CTL EPB_LOC(6, 0, 0)
+#define EPB_MADDRL EPB_LOC(6, 0, 2)
+#define EPB_MADDRH EPB_LOC(6, 0, 3)
+#define EPB_ROMDATA EPB_LOC(6, 0, 4)
+#define EPB_RAMDATA EPB_LOC(6, 0, 5)
+
+/* Transfer date to/from uC Program RAM of IB or PCIe SerDes */
+static int qib_sd7220_ram_xfer(struct qib_devdata *dd, int sdnum, u32 loc,
+ u8 *buf, int cnt, int rd_notwr)
+{
+ u16 trans;
+ u64 transval;
+ u64 csbit;
+ int owned;
+ int tries;
+ int sofar;
+ int addr;
+ int ret;
+ unsigned long flags;
+ const char *op;
+
+ /* Pick appropriate transaction reg and "Chip select" for this serdes */
+ switch (sdnum) {
+ case IB_7220_SERDES:
+ csbit = 1ULL << EPB_IB_UC_CS_SHF;
+ trans = kr_ibsd_epb_transaction_reg;
+ break;
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
+ /* PCIe SERDES has uC "chip select" in different bit, too */
+ csbit = 1ULL << EPB_PCIE_UC_CS_SHF;
+ trans = kr_pciesd_epb_transaction_reg;
+ break;
+
+ default:
+ return -1;
+ }
+
+ op = rd_notwr ? "Rd" : "Wr";
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+
+ owned = epb_access(dd, sdnum, 1);
+ if (owned < 0) {
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ return -1;
+ }
+
+ /*
+ * In future code, we may need to distinguish several address ranges,
+ * and select various memories based on this. For now, just trim
+ * "loc" (location including address and memory select) to
+ * "addr" (address within memory). we will only support PRAM
+ * The memory is 8KB.
+ */
+ addr = loc & 0x1FFF;
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = qib_read_kreg32(dd, trans);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+
+ sofar = 0;
+ if (tries > 0) {
+ /*
+ * Every "memory" access is doubly-indirect.
+ * We set two bytes of address, then read/write
+ * one or mores bytes of data.
+ */
+
+ /* First, we set control to "Read" or "Write" */
+ transval = csbit | EPB_UC_CTL |
+ (rd_notwr ? EPB_ROM_R : EPB_ROM_W);
+ tries = epb_trans(dd, trans, transval, &transval);
+ while (tries > 0 && sofar < cnt) {
+ if (!sofar) {
+ /* Only set address at start of chunk */
+ int addrbyte = (addr + sofar) >> 8;
+ transval = csbit | EPB_MADDRH | addrbyte;
+ tries = epb_trans(dd, trans, transval,
+ &transval);
+ if (tries <= 0)
+ break;
+ addrbyte = (addr + sofar) & 0xFF;
+ transval = csbit | EPB_MADDRL | addrbyte;
+ tries = epb_trans(dd, trans, transval,
+ &transval);
+ if (tries <= 0)
+ break;
+ }
+
+ if (rd_notwr)
+ transval = csbit | EPB_ROMDATA | EPB_RD;
+ else
+ transval = csbit | EPB_ROMDATA | buf[sofar];
+ tries = epb_trans(dd, trans, transval, &transval);
+ if (tries <= 0)
+ break;
+ if (rd_notwr)
+ buf[sofar] = transval & EPB_DATA_MASK;
+ ++sofar;
+ }
+ /* Finally, clear control-bit for Read or Write */
+ transval = csbit | EPB_UC_CTL;
+ tries = epb_trans(dd, trans, transval, &transval);
+ }
+
+ ret = sofar;
+ /* Release bus. Failure is an error */
+ if (epb_access(dd, sdnum, -1) < 0)
+ ret = -1;
+
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ if (tries <= 0)
+ ret = -1;
+ return ret;
+}
+
+#define PROG_CHUNK 64
+
+int qib_sd7220_prog_ld(struct qib_devdata *dd, int sdnum,
+ u8 *img, int len, int offset)
+{
+ int cnt, sofar, req;
+
+ sofar = 0;
+ while (sofar < len) {
+ req = len - sofar;
+ if (req > PROG_CHUNK)
+ req = PROG_CHUNK;
+ cnt = qib_sd7220_ram_xfer(dd, sdnum, offset + sofar,
+ img + sofar, req, 0);
+ if (cnt < req) {
+ sofar = -1;
+ break;
+ }
+ sofar += req;
+ }
+ return sofar;
+}
+
+#define VFY_CHUNK 64
+#define SD_PRAM_ERROR_LIMIT 42
+
+int qib_sd7220_prog_vfy(struct qib_devdata *dd, int sdnum,
+ const u8 *img, int len, int offset)
+{
+ int cnt, sofar, req, idx, errors;
+ unsigned char readback[VFY_CHUNK];
+
+ errors = 0;
+ sofar = 0;
+ while (sofar < len) {
+ req = len - sofar;
+ if (req > VFY_CHUNK)
+ req = VFY_CHUNK;
+ cnt = qib_sd7220_ram_xfer(dd, sdnum, sofar + offset,
+ readback, req, 1);
+ if (cnt < req) {
+ /* failed in read itself */
+ sofar = -1;
+ break;
+ }
+ for (idx = 0; idx < cnt; ++idx) {
+ if (readback[idx] != img[idx+sofar])
+ ++errors;
+ }
+ sofar += cnt;
+ }
+ return errors ? -errors : sofar;
+}
+
+/*
+ * IRQ not set up at this point in init, so we poll.
+ */
+#define IB_SERDES_TRIM_DONE (1ULL << 11)
+#define TRIM_TMO (30)
+
+static int qib_sd_trimdone_poll(struct qib_devdata *dd)
+{
+ int trim_tmo, ret;
+ uint64_t val;
+
+ /*
+ * Default to failure, so IBC will not start
+ * without IB_SERDES_TRIM_DONE.
+ */
+ ret = 0;
+ for (trim_tmo = 0; trim_tmo < TRIM_TMO; ++trim_tmo) {
+ val = qib_read_kreg64(dd, kr_ibcstatus);
+ if (val & IB_SERDES_TRIM_DONE) {
+ ret = 1;
+ break;
+ }
+ msleep(10);
+ }
+ if (trim_tmo >= TRIM_TMO) {
+ qib_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo);
+ ret = 0;
+ }
+ return ret;
+}
+
+#define TX_FAST_ELT (9)
+
+/*
+ * Set the "negotiation" values for SERDES. These are used by the IB1.2
+ * link negotiation. Macros below are attempt to keep the values a
+ * little more human-editable.
+ * First, values related to Drive De-emphasis Settings.
+ */
+
+#define NUM_DDS_REGS 6
+#define DDS_REG_MAP 0x76A910 /* LSB-first list of regs (in elt 9) to mod */
+
+#define DDS_VAL(amp_d, main_d, ipst_d, ipre_d, amp_s, main_s, ipst_s, ipre_s) \
+ { { ((amp_d & 0x1F) << 1) | 1, ((amp_s & 0x1F) << 1) | 1, \
+ (main_d << 3) | 4 | (ipre_d >> 2), \
+ (main_s << 3) | 4 | (ipre_s >> 2), \
+ ((ipst_d & 0xF) << 1) | ((ipre_d & 3) << 6) | 0x21, \
+ ((ipst_s & 0xF) << 1) | ((ipre_s & 3) << 6) | 0x21 } }
+
+static struct dds_init {
+ uint8_t reg_vals[NUM_DDS_REGS];
+} dds_init_vals[] = {
+ /* DDR(FDR) SDR(HDR) */
+ /* Vendor recommends below for 3m cable */
+#define DDS_3M 0
+ DDS_VAL(31, 19, 12, 0, 29, 22, 9, 0),
+ DDS_VAL(31, 12, 15, 4, 31, 15, 15, 1),
+ DDS_VAL(31, 13, 15, 3, 31, 16, 15, 0),
+ DDS_VAL(31, 14, 15, 2, 31, 17, 14, 0),
+ DDS_VAL(31, 15, 15, 1, 31, 18, 13, 0),
+ DDS_VAL(31, 16, 15, 0, 31, 19, 12, 0),
+ DDS_VAL(31, 17, 14, 0, 31, 20, 11, 0),
+ DDS_VAL(31, 18, 13, 0, 30, 21, 10, 0),
+ DDS_VAL(31, 20, 11, 0, 28, 23, 8, 0),
+ DDS_VAL(31, 21, 10, 0, 27, 24, 7, 0),
+ DDS_VAL(31, 22, 9, 0, 26, 25, 6, 0),
+ DDS_VAL(30, 23, 8, 0, 25, 26, 5, 0),
+ DDS_VAL(29, 24, 7, 0, 23, 27, 4, 0),
+ /* Vendor recommends below for 1m cable */
+#define DDS_1M 13
+ DDS_VAL(28, 25, 6, 0, 21, 28, 3, 0),
+ DDS_VAL(27, 26, 5, 0, 19, 29, 2, 0),
+ DDS_VAL(25, 27, 4, 0, 17, 30, 1, 0)
+};
+
+/*
+ * Now the RXEQ section of the table.
+ */
+/* Hardware packs an element number and register address thus: */
+#define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4))
+#define RXEQ_VAL(elt, adr, val0, val1, val2, val3) \
+ {RXEQ_INIT_RDESC((elt), (adr)), {(val0), (val1), (val2), (val3)} }
+
+#define RXEQ_VAL_ALL(elt, adr, val) \
+ {RXEQ_INIT_RDESC((elt), (adr)), {(val), (val), (val), (val)} }
+
+#define RXEQ_SDR_DFELTH 0
+#define RXEQ_SDR_TLTH 0
+#define RXEQ_SDR_G1CNT_Z1CNT 0x11
+#define RXEQ_SDR_ZCNT 23
+
+static struct rxeq_init {
+ u16 rdesc; /* in form used in SerDesDDSRXEQ */
+ u8 rdata[4];
+} rxeq_init_vals[] = {
+ /* Set Rcv Eq. to Preset node */
+ RXEQ_VAL_ALL(7, 0x27, 0x10),
+ /* Set DFELTHFDR/HDR thresholds */
+ RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR, was 0, 1, 2, 3 */
+ RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */
+ /* Set TLTHFDR/HDR theshold */
+ RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR, was 0, 2, 4, 6 */
+ RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR, was 0, 1, 2, 3 */
+ /* Set Preamp setting 2 (ZFR/ZCNT) */
+ RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR, was 12, 16, 20, 24 */
+ RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR, was 12, 16, 20, 24 */
+ /* Set Preamp DC gain and Setting 1 (GFR/GHR) */
+ RXEQ_VAL(7, 0x1E, 16, 16, 16, 16), /* FDR, was 16, 17, 18, 20 */
+ RXEQ_VAL(7, 0x1F, 16, 16, 16, 16), /* HDR, was 16, 17, 18, 20 */
+ /* Toggle RELOCK (in VCDL_CTRL0) to lock to data */
+ RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */
+ RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */
+};
+
+/* There are 17 values from vendor, but IBC only accesses the first 16 */
+#define DDS_ROWS (16)
+#define RXEQ_ROWS ARRAY_SIZE(rxeq_init_vals)
+
+static int qib_sd_setvals(struct qib_devdata *dd)
+{
+ int idx, midx;
+ int min_idx; /* Minimum index for this portion of table */
+ uint32_t dds_reg_map;
+ u64 __iomem *taddr, *iaddr;
+ uint64_t data;
+ uint64_t sdctl;
+
+ taddr = dd->kregbase + kr_serdes_maptable;
+ iaddr = dd->kregbase + kr_serdes_ddsrxeq0;
+
+ /*
+ * Init the DDS section of the table.
+ * Each "row" of the table provokes NUM_DDS_REG writes, to the
+ * registers indicated in DDS_REG_MAP.
+ */
+ sdctl = qib_read_kreg64(dd, kr_ibserdesctrl);
+ sdctl = (sdctl & ~(0x1f << 8)) | (NUM_DDS_REGS << 8);
+ sdctl = (sdctl & ~(0x1f << 13)) | (RXEQ_ROWS << 13);
+ qib_write_kreg(dd, kr_ibserdesctrl, sdctl);
+
+ /*
+ * Iterate down table within loop for each register to store.
+ */
+ dds_reg_map = DDS_REG_MAP;
+ for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+ data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT;
+ writeq(data, iaddr + idx);
+ mmiowb();
+ qib_read_kreg32(dd, kr_scratch);
+ dds_reg_map >>= 4;
+ for (midx = 0; midx < DDS_ROWS; ++midx) {
+ u64 __iomem *daddr = taddr + ((midx << 4) + idx);
+ data = dds_init_vals[midx].reg_vals[idx];
+ writeq(data, daddr);
+ mmiowb();
+ qib_read_kreg32(dd, kr_scratch);
+ } /* End inner for (vals for this reg, each row) */
+ } /* end outer for (regs to be stored) */
+
+ /*
+ * Init the RXEQ section of the table.
+ * This runs in a different order, as the pattern of
+ * register references is more complex, but there are only
+ * four "data" values per register.
+ */
+ min_idx = idx; /* RXEQ indices pick up where DDS left off */
+ taddr += 0x100; /* RXEQ data is in second half of table */
+ /* Iterate through RXEQ register addresses */
+ for (idx = 0; idx < RXEQ_ROWS; ++idx) {
+ int didx; /* "destination" */
+ int vidx;
+
+ /* didx is offset by min_idx to address RXEQ range of regs */
+ didx = idx + min_idx;
+ /* Store the next RXEQ register address */
+ writeq(rxeq_init_vals[idx].rdesc, iaddr + didx);
+ mmiowb();
+ qib_read_kreg32(dd, kr_scratch);
+ /* Iterate through RXEQ values */
+ for (vidx = 0; vidx < 4; vidx++) {
+ data = rxeq_init_vals[idx].rdata[vidx];
+ writeq(data, taddr + (vidx << 6) + idx);
+ mmiowb();
+ qib_read_kreg32(dd, kr_scratch);
+ }
+ } /* end outer for (Reg-writes for RXEQ) */
+ return 0;
+}
+
+#define CMUCTRL5 EPB_LOC(7, 0, 0x15)
+#define RXHSCTRL0(chan) EPB_LOC(chan, 6, 0)
+#define VCDL_DAC2(chan) EPB_LOC(chan, 6, 5)
+#define VCDL_CTRL0(chan) EPB_LOC(chan, 6, 6)
+#define VCDL_CTRL2(chan) EPB_LOC(chan, 6, 8)
+#define START_EQ2(chan) EPB_LOC(chan, 7, 0x28)
+
+/*
+ * Repeat a "store" across all channels of the IB SerDes.
+ * Although nominally it inherits the "read value" of the last
+ * channel it modified, the only really useful return is <0 for
+ * failure, >= 0 for success. The parameter 'loc' is assumed to
+ * be the location in some channel of the register to be modified
+ * The caller can specify use of the "gang write" option of EPB,
+ * in which case we use the specified channel data for any fields
+ * not explicitely written.
+ */
+static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
+ int mask)
+{
+ int ret = -1;
+ int chnl;
+
+ if (loc & EPB_GLOBAL_WR) {
+ /*
+ * Our caller has assured us that we can set all four
+ * channels at once. Trust that. If mask is not 0xFF,
+ * we will read the _specified_ channel for our starting
+ * value.
+ */
+ loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+ chnl = (loc >> (4 + EPB_ADDR_SHF)) & 7;
+ if (mask != 0xFF) {
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ loc & ~EPB_GLOBAL_WR, 0, 0);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+
+ qib_dev_err(dd, "pre-read failed: elt %d,"
+ " addr 0x%X, chnl %d\n",
+ (sloc & 0xF),
+ (sloc >> 9) & 0x3f, chnl);
+ return ret;
+ }
+ val = (ret & ~mask) | (val & mask);
+ }
+ loc &= ~(7 << (4+EPB_ADDR_SHF));
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+
+ qib_dev_err(dd, "Global WR failed: elt %d,"
+ " addr 0x%X, val %02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, val);
+ }
+ return ret;
+ }
+ /* Clear "channel" and set CS so we can simply iterate */
+ loc &= ~(7 << (4+EPB_ADDR_SHF));
+ loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+ for (chnl = 0; chnl < 4; ++chnl) {
+ int cloc = loc | (chnl << (4+EPB_ADDR_SHF));
+
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+
+ qib_dev_err(dd, "Write failed: elt %d,"
+ " addr 0x%X, chnl %d, val 0x%02X,"
+ " mask 0x%02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
+ val & 0xFF, mask & 0xFF);
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Set the Tx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from first row of init table.
+ */
+static int set_dds_vals(struct qib_devdata *dd, struct dds_init *ddi)
+{
+ int ret;
+ int idx, reg, data;
+ uint32_t regmap;
+
+ regmap = DDS_REG_MAP;
+ for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+ reg = (regmap & 0xF);
+ regmap >>= 4;
+ data = ddi->reg_vals[idx];
+ /* Vendor says RMW not needed for these regs, use 0xFF mask */
+ ret = ibsd_mod_allchnls(dd, EPB_LOC(0, 9, reg), data, 0xFF);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Set the Rx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from selected column of init table.
+ */
+static int set_rxeq_vals(struct qib_devdata *dd, int vsel)
+{
+ int ret;
+ int ridx;
+ int cnt = ARRAY_SIZE(rxeq_init_vals);
+
+ for (ridx = 0; ridx < cnt; ++ridx) {
+ int elt, reg, val, loc;
+
+ elt = rxeq_init_vals[ridx].rdesc & 0xF;
+ reg = rxeq_init_vals[ridx].rdesc >> 4;
+ loc = EPB_LOC(0, elt, reg);
+ val = rxeq_init_vals[ridx].rdata[vsel];
+ /* mask of 0xFF, because hardware does full-byte store. */
+ ret = ibsd_mod_allchnls(dd, loc, val, 0xFF);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Set the default values (row 0) for DDR Driver Demphasis.
+ * we do this initially and whenever we turn off IB-1.2
+ *
+ * The "default" values for Rx equalization are also stored to
+ * SerDes registers. Formerly (and still default), we used set 2.
+ * For experimenting with cables and link-partners, we allow changing
+ * that via a module parameter.
+ */
+static unsigned qib_rxeq_set = 2;
+module_param_named(rxeq_default_set, qib_rxeq_set, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxeq_default_set,
+ "Which set [0..3] of Rx Equalization values is default");
+
+static int qib_internal_presets(struct qib_devdata *dd)
+{
+ int ret = 0;
+
+ ret = set_dds_vals(dd, dds_init_vals + DDS_3M);
+
+ if (ret < 0)
+ qib_dev_err(dd, "Failed to set default DDS values\n");
+ ret = set_rxeq_vals(dd, qib_rxeq_set & 3);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed to set default RXEQ values\n");
+ return ret;
+}
+
+int qib_sd7220_presets(struct qib_devdata *dd)
+{
+ int ret = 0;
+
+ if (!dd->cspec->presets_needed)
+ return ret;
+ dd->cspec->presets_needed = 0;
+ /* Assert uC reset, so we don't clash with it. */
+ qib_ibsd_reset(dd, 1);
+ udelay(2);
+ qib_sd_trimdone_monitor(dd, "link-down");
+
+ ret = qib_internal_presets(dd);
+ return ret;
+}
+
+static int qib_sd_trimself(struct qib_devdata *dd, int val)
+{
+ int loc = CMUCTRL5 | (1U << EPB_IB_QUAD0_CS_SHF);
+
+ return qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+}
+
+static int qib_sd_early(struct qib_devdata *dd)
+{
+ int ret;
+
+ ret = ibsd_mod_allchnls(dd, RXHSCTRL0(0) | EPB_GLOBAL_WR, 0xD4, 0xFF);
+ if (ret < 0)
+ goto bail;
+ ret = ibsd_mod_allchnls(dd, START_EQ1(0) | EPB_GLOBAL_WR, 0x10, 0xFF);
+ if (ret < 0)
+ goto bail;
+ ret = ibsd_mod_allchnls(dd, START_EQ2(0) | EPB_GLOBAL_WR, 0x30, 0xFF);
+bail:
+ return ret;
+}
+
+#define BACTRL(chnl) EPB_LOC(chnl, 6, 0x0E)
+#define LDOUTCTRL1(chnl) EPB_LOC(chnl, 7, 6)
+#define RXHSSTATUS(chnl) EPB_LOC(chnl, 6, 0xF)
+
+static int qib_sd_dactrim(struct qib_devdata *dd)
+{
+ int ret;
+
+ ret = ibsd_mod_allchnls(dd, VCDL_DAC2(0) | EPB_GLOBAL_WR, 0x2D, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ /* more fine-tuning of what will be default */
+ ret = ibsd_mod_allchnls(dd, VCDL_CTRL2(0), 3, 0xF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, BACTRL(0) | EPB_GLOBAL_WR, 0x40, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x04, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, RXHSSTATUS(0) | EPB_GLOBAL_WR, 0x04, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ /*
+ * Delay for max possible number of steps, with slop.
+ * Each step is about 4usec.
+ */
+ udelay(415);
+
+ ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x00, 0xFF);
+
+bail:
+ return ret;
+}
+
+#define RELOCK_FIRST_MS 3
+#define RXLSPPM(chan) EPB_LOC(chan, 0, 2)
+void toggle_7220_rclkrls(struct qib_devdata *dd)
+{
+ int loc = RXLSPPM(0) | EPB_GLOBAL_WR;
+ int ret;
+
+ ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+ if (ret < 0)
+ qib_dev_err(dd, "RCLKRLS failed to clear D7\n");
+ else {
+ udelay(1);
+ ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+ }
+ /* And again for good measure */
+ udelay(1);
+ ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+ if (ret < 0)
+ qib_dev_err(dd, "RCLKRLS failed to clear D7\n");
+ else {
+ udelay(1);
+ ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+ }
+ /* Now reset xgxs and IBC to complete the recovery */
+ dd->f_xgxs_reset(dd->pport);
+}
+
+/*
+ * Shut down the timer that polls for relock occasions, if needed
+ * this is "hooked" from qib_7220_quiet_serdes(), which is called
+ * just before qib_shutdown_device() in qib_driver.c shuts down all
+ * the other timers
+ */
+void shutdown_7220_relock_poll(struct qib_devdata *dd)
+{
+ struct qib_relock *irp = &dd->cspec->relock_st;
+
+ if (atomic_read(&irp->relock_timer_active)) {
+ del_timer_sync(&irp->relock_timer);
+ atomic_set(&irp->relock_timer_active, 0);
+ }
+}
+
+static unsigned qib_relock_by_timer = 1;
+module_param_named(relock_by_timer, qib_relock_by_timer, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up");
+
+static void qib_run_relock(unsigned long opaque)
+{
+ struct qib_devdata *dd = (struct qib_devdata *)opaque;
+ struct qib_pportdata *ppd = dd->pport;
+ struct qib_relock *irp = &dd->cspec->relock_st;
+ int timeoff;
+
+ /*
+ * Check link-training state for "stuck" state, when down.
+ * if found, try relock and schedule another try at
+ * exponentially growing delay, maxed at one second.
+ * if not stuck, our work is done.
+ */
+ if ((dd->flags & QIB_INITTED) && !(ppd->lflags &
+ (QIBL_IB_AUTONEG_INPROG | QIBL_LINKINIT | QIBL_LINKARMED |
+ QIBL_LINKACTIVE))) {
+ if (qib_relock_by_timer) {
+ if (!(ppd->lflags & QIBL_IB_LINK_DISABLED))
+ toggle_7220_rclkrls(dd);
+ }
+ /* re-set timer for next check */
+ timeoff = irp->relock_interval << 1;
+ if (timeoff > HZ)
+ timeoff = HZ;
+ irp->relock_interval = timeoff;
+ } else
+ timeoff = HZ;
+ mod_timer(&irp->relock_timer, jiffies + timeoff);
+}
+
+void set_7220_relock_poll(struct qib_devdata *dd, int ibup)
+{
+ struct qib_relock *irp = &dd->cspec->relock_st;
+
+ if (ibup > 0) {
+ /* We are now up, relax timer to 1 second interval */
+ if (atomic_read(&irp->relock_timer_active))
+ mod_timer(&irp->relock_timer, jiffies + HZ);
+ } else {
+ /* Transition to down, (re-)set timer to short interval. */
+ int timeout;
+
+ timeout = (HZ * ((ibup == -1) ? 1000 : RELOCK_FIRST_MS)) / 1000;
+ if (timeout == 0)
+ timeout = 1;
+ /* If timer has not yet been started, do so. */
+ if (atomic_inc_return(&irp->relock_timer_active) == 1) {
+ init_timer(&irp->relock_timer);
+ irp->relock_timer.function = qib_run_relock;
+ irp->relock_timer.data = (unsigned long) dd;
+ irp->relock_interval = timeout;
+ irp->relock_timer.expires = jiffies + timeout;
+ add_timer(&irp->relock_timer);
+ } else {
+ irp->relock_interval = timeout;
+ mod_timer(&irp->relock_timer, jiffies + timeout);
+ atomic_dec(&irp->relock_timer_active);
+ }
+ }
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 34/52] IB/qib: Add qib_sd7220_img.c
From: Ralph Campbell @ 2010-05-07 0:01 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_sd7220_img.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_sd7220_img.c | 1081 ++++++++++++++++++++++++++++
1 files changed, 1081 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_sd7220_img.c
diff --git a/drivers/infiniband/hw/qib/qib_sd7220_img.c b/drivers/infiniband/hw/qib/qib_sd7220_img.c
new file mode 100644
index 0000000..a1118fb
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sd7220_img.c
@@ -0,0 +1,1081 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file contains the memory image from the vendor, to be copied into
+ * the IB SERDES of the IBA7220 during initialization.
+ * The file also includes the two functions which use this image.
+ */
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_7220.h"
+
+static unsigned char qib_sd7220_ib_img[] = {
+/*0000*/0x02, 0x0A, 0x29, 0x02, 0x0A, 0x87, 0xE5, 0xE6,
+ 0x30, 0xE6, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
+/*0010*/0x00, 0xE5, 0xE2, 0x30, 0xE4, 0x04, 0x7E, 0x01,
+ 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x08,
+/*0020*/0x53, 0xF9, 0xF7, 0xE4, 0xF5, 0xFE, 0x80, 0x08,
+ 0x7F, 0x0A, 0x12, 0x17, 0x31, 0x12, 0x0E, 0xA2,
+/*0030*/0x75, 0xFC, 0x08, 0xE4, 0xF5, 0xFD, 0xE5, 0xE7,
+ 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0x22, 0x00,
+/*0040*/0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x75,
+ 0x51, 0x01, 0xE4, 0xF5, 0x52, 0xF5, 0x53, 0xF5,
+/*0050*/0x52, 0xF5, 0x7E, 0x7F, 0x04, 0x02, 0x04, 0x38,
+ 0xC2, 0x36, 0x05, 0x52, 0xE5, 0x52, 0xD3, 0x94,
+/*0060*/0x0C, 0x40, 0x05, 0x75, 0x52, 0x01, 0xD2, 0x36,
+ 0x90, 0x07, 0x0C, 0x74, 0x07, 0xF0, 0xA3, 0x74,
+/*0070*/0xFF, 0xF0, 0xE4, 0xF5, 0x0C, 0xA3, 0xF0, 0x90,
+ 0x07, 0x14, 0xF0, 0xA3, 0xF0, 0x75, 0x0B, 0x20,
+/*0080*/0xF5, 0x09, 0xE4, 0xF5, 0x08, 0xE5, 0x08, 0xD3,
+ 0x94, 0x30, 0x40, 0x03, 0x02, 0x04, 0x04, 0x12,
+/*0090*/0x00, 0x06, 0x15, 0x0B, 0xE5, 0x08, 0x70, 0x04,
+ 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x09,
+/*00A0*/0x70, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00,
+ 0xEE, 0x5F, 0x60, 0x05, 0x12, 0x18, 0x71, 0xD2,
+/*00B0*/0x35, 0x53, 0xE1, 0xF7, 0xE5, 0x08, 0x45, 0x09,
+ 0xFF, 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24,
+/*00C0*/0x83, 0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83,
+ 0xEF, 0xF0, 0x85, 0xE2, 0x20, 0xE5, 0x52, 0xD3,
+/*00D0*/0x94, 0x01, 0x40, 0x0D, 0x12, 0x19, 0xF3, 0xE0,
+ 0x54, 0xA0, 0x64, 0x40, 0x70, 0x03, 0x02, 0x03,
+/*00E0*/0xFB, 0x53, 0xF9, 0xF8, 0x90, 0x94, 0x70, 0xE4,
+ 0xF0, 0xE0, 0xF5, 0x10, 0xAF, 0x09, 0x12, 0x1E,
+/*00F0*/0xB3, 0xAF, 0x08, 0xEF, 0x44, 0x08, 0xF5, 0x82,
+ 0x75, 0x83, 0x80, 0xE0, 0xF5, 0x29, 0xEF, 0x44,
+/*0100*/0x07, 0x12, 0x1A, 0x3C, 0xF5, 0x22, 0x54, 0x40,
+ 0xD3, 0x94, 0x00, 0x40, 0x1E, 0xE5, 0x29, 0x54,
+/*0110*/0xF0, 0x70, 0x21, 0x12, 0x19, 0xF3, 0xE0, 0x44,
+ 0x80, 0xF0, 0xE5, 0x22, 0x54, 0x30, 0x65, 0x08,
+/*0120*/0x70, 0x09, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xBF,
+ 0xF0, 0x80, 0x09, 0x12, 0x19, 0xF3, 0x74, 0x40,
+/*0130*/0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12, 0x75,
+ 0x83, 0xAE, 0x74, 0xFF, 0xF0, 0xAF, 0x08, 0x7E,
+/*0140*/0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0xE0, 0xFD,
+ 0xE5, 0x0B, 0x25, 0xE0, 0x25, 0xE0, 0x24, 0x81,
+/*0150*/0xF5, 0x82, 0xE4, 0x34, 0x07, 0xF5, 0x83, 0xED,
+ 0xF0, 0x90, 0x07, 0x0E, 0xE0, 0x04, 0xF0, 0xEF,
+/*0160*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0x98, 0xE0,
+ 0xF5, 0x28, 0x12, 0x1A, 0x23, 0x40, 0x0C, 0x12,
+/*0170*/0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12, 0x1A, 0x32,
+ 0x02, 0x03, 0xF6, 0xAF, 0x08, 0x7E, 0x00, 0x74,
+/*0180*/0x80, 0xCD, 0xEF, 0xCD, 0x8D, 0x82, 0xF5, 0x83,
+ 0xE0, 0x30, 0xE0, 0x0A, 0x12, 0x19, 0xF3, 0xE0,
+/*0190*/0x44, 0x20, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x19,
+ 0xF3, 0xE0, 0x54, 0xDF, 0xF0, 0xEE, 0x44, 0xAE,
+/*01A0*/0x12, 0x1A, 0x43, 0x30, 0xE4, 0x03, 0x02, 0x03,
+ 0xFB, 0x74, 0x9E, 0x12, 0x1A, 0x05, 0x20, 0xE0,
+/*01B0*/0x03, 0x02, 0x03, 0xFB, 0x8F, 0x82, 0x8E, 0x83,
+ 0xE0, 0x20, 0xE0, 0x03, 0x02, 0x03, 0xFB, 0x12,
+/*01C0*/0x19, 0xF3, 0xE0, 0x44, 0x10, 0xF0, 0xE5, 0xE3,
+ 0x20, 0xE7, 0x08, 0xE5, 0x08, 0x12, 0x1A, 0x3A,
+/*01D0*/0x44, 0x04, 0xF0, 0xAF, 0x08, 0x7E, 0x00, 0xEF,
+ 0x12, 0x1A, 0x3A, 0x20, 0xE2, 0x34, 0x12, 0x19,
+/*01E0*/0xF3, 0xE0, 0x44, 0x08, 0xF0, 0xE5, 0xE4, 0x30,
+ 0xE6, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
+/*01F0*/0xE5, 0x7E, 0xC3, 0x94, 0x04, 0x50, 0x04, 0x7C,
+ 0x01, 0x80, 0x02, 0x7C, 0x00, 0xEC, 0x4D, 0x60,
+/*0200*/0x05, 0xC2, 0x35, 0x02, 0x03, 0xFB, 0xEE, 0x44,
+ 0xD2, 0x12, 0x1A, 0x43, 0x44, 0x40, 0xF0, 0x02,
+/*0210*/0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xF7,
+ 0xF0, 0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0,
+/*0220*/0x54, 0xBF, 0xF0, 0x90, 0x07, 0x14, 0xE0, 0x04,
+ 0xF0, 0xE5, 0x7E, 0x70, 0x03, 0x75, 0x7E, 0x01,
+/*0230*/0xAF, 0x08, 0x7E, 0x00, 0x12, 0x1A, 0x23, 0x40,
+ 0x12, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x01, 0x12,
+/*0240*/0x19, 0xF2, 0xE0, 0x54, 0x02, 0x12, 0x1A, 0x32,
+ 0x02, 0x03, 0xFB, 0x12, 0x19, 0xF3, 0xE0, 0x44,
+/*0250*/0x02, 0x12, 0x19, 0xF2, 0xE0, 0x54, 0xFE, 0xF0,
+ 0xC2, 0x35, 0xEE, 0x44, 0x8A, 0x8F, 0x82, 0xF5,
+/*0260*/0x83, 0xE0, 0xF5, 0x17, 0x54, 0x8F, 0x44, 0x40,
+ 0xF0, 0x74, 0x90, 0xFC, 0xE5, 0x08, 0x44, 0x07,
+/*0270*/0xFD, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0x54, 0x3F,
+ 0x90, 0x07, 0x02, 0xF0, 0xE0, 0x54, 0xC0, 0x8D,
+/*0280*/0x82, 0x8C, 0x83, 0xF0, 0x74, 0x92, 0x12, 0x1A,
+ 0x05, 0x90, 0x07, 0x03, 0x12, 0x1A, 0x19, 0x74,
+/*0290*/0x82, 0x12, 0x1A, 0x05, 0x90, 0x07, 0x04, 0x12,
+ 0x1A, 0x19, 0x74, 0xB4, 0x12, 0x1A, 0x05, 0x90,
+/*02A0*/0x07, 0x05, 0x12, 0x1A, 0x19, 0x74, 0x94, 0xFE,
+ 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A, 0xF5,
+/*02B0*/0x10, 0x30, 0xE0, 0x04, 0xD2, 0x37, 0x80, 0x02,
+ 0xC2, 0x37, 0xE5, 0x10, 0x54, 0x7F, 0x8F, 0x82,
+/*02C0*/0x8E, 0x83, 0xF0, 0x30, 0x44, 0x30, 0x12, 0x1A,
+ 0x03, 0x54, 0x80, 0xD3, 0x94, 0x00, 0x40, 0x04,
+/*02D0*/0xD2, 0x39, 0x80, 0x02, 0xC2, 0x39, 0x8F, 0x82,
+ 0x8E, 0x83, 0xE0, 0x44, 0x80, 0xF0, 0x12, 0x1A,
+/*02E0*/0x03, 0x54, 0x40, 0xD3, 0x94, 0x00, 0x40, 0x04,
+ 0xD2, 0x3A, 0x80, 0x02, 0xC2, 0x3A, 0x8F, 0x82,
+/*02F0*/0x8E, 0x83, 0xE0, 0x44, 0x40, 0xF0, 0x74, 0x92,
+ 0xFE, 0xE5, 0x08, 0x44, 0x06, 0x12, 0x1A, 0x0A,
+/*0300*/0x30, 0xE7, 0x04, 0xD2, 0x38, 0x80, 0x02, 0xC2,
+ 0x38, 0x8F, 0x82, 0x8E, 0x83, 0xE0, 0x54, 0x7F,
+/*0310*/0xF0, 0x12, 0x1E, 0x46, 0xE4, 0xF5, 0x0A, 0x20,
+ 0x03, 0x02, 0x80, 0x03, 0x30, 0x43, 0x03, 0x12,
+/*0320*/0x19, 0x95, 0x20, 0x02, 0x02, 0x80, 0x03, 0x30,
+ 0x42, 0x03, 0x12, 0x0C, 0x8F, 0x30, 0x30, 0x06,
+/*0330*/0x12, 0x19, 0x95, 0x12, 0x0C, 0x8F, 0x12, 0x0D,
+ 0x47, 0x12, 0x19, 0xF3, 0xE0, 0x54, 0xFB, 0xF0,
+/*0340*/0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40, 0x46, 0x43,
+ 0xE1, 0x08, 0x12, 0x19, 0xF3, 0xE0, 0x44, 0x04,
+/*0350*/0xF0, 0xE5, 0xE4, 0x20, 0xE7, 0x2A, 0x12, 0x1A,
+ 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
+/*0360*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+ 0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
+/*0370*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
+ 0x5E, 0x60, 0x05, 0x12, 0x1D, 0xD7, 0x80, 0x17,
+/*0380*/0x12, 0x1A, 0x12, 0x75, 0x83, 0xD2, 0xE0, 0x44,
+ 0x08, 0xF0, 0x02, 0x03, 0xFB, 0x12, 0x1A, 0x12,
+/*0390*/0x75, 0x83, 0xD2, 0xE0, 0x54, 0xF7, 0xF0, 0x12,
+ 0x1E, 0x46, 0x7F, 0x08, 0x12, 0x17, 0x31, 0x74,
+/*03A0*/0x8E, 0xFE, 0x12, 0x1A, 0x12, 0x8E, 0x83, 0xE0,
+ 0xF5, 0x10, 0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44,
+/*03B0*/0x01, 0xFF, 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07,
+ 0xF5, 0x82, 0xEF, 0xF0, 0xE5, 0x10, 0x54, 0xFE,
+/*03C0*/0xFF, 0xED, 0x44, 0x07, 0xF5, 0x82, 0xEF, 0x12,
+ 0x1A, 0x11, 0x75, 0x83, 0x86, 0xE0, 0x44, 0x10,
+/*03D0*/0x12, 0x1A, 0x11, 0xE0, 0x44, 0x10, 0xF0, 0x12,
+ 0x19, 0xF3, 0xE0, 0x54, 0xFD, 0x44, 0x01, 0xFF,
+/*03E0*/0x12, 0x19, 0xF3, 0xEF, 0x12, 0x1A, 0x32, 0x30,
+ 0x32, 0x0C, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
+/*03F0*/0x75, 0x83, 0x82, 0x74, 0x05, 0xF0, 0xAF, 0x0B,
+ 0x12, 0x18, 0xD7, 0x74, 0x10, 0x25, 0x08, 0xF5,
+/*0400*/0x08, 0x02, 0x00, 0x85, 0x05, 0x09, 0xE5, 0x09,
+ 0xD3, 0x94, 0x07, 0x50, 0x03, 0x02, 0x00, 0x82,
+/*0410*/0xE5, 0x7E, 0xD3, 0x94, 0x00, 0x40, 0x04, 0x7F,
+ 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x7E, 0xC3,
+/*0420*/0x94, 0xFA, 0x50, 0x04, 0x7E, 0x01, 0x80, 0x02,
+ 0x7E, 0x00, 0xEE, 0x5F, 0x60, 0x02, 0x05, 0x7E,
+/*0430*/0x30, 0x35, 0x0B, 0x43, 0xE1, 0x01, 0x7F, 0x09,
+ 0x12, 0x17, 0x31, 0x02, 0x00, 0x58, 0x53, 0xE1,
+/*0440*/0xFE, 0x02, 0x00, 0x58, 0x8E, 0x6A, 0x8F, 0x6B,
+ 0x8C, 0x6C, 0x8D, 0x6D, 0x75, 0x6E, 0x01, 0x75,
+/*0450*/0x6F, 0x01, 0x75, 0x70, 0x01, 0xE4, 0xF5, 0x73,
+ 0xF5, 0x74, 0xF5, 0x75, 0x90, 0x07, 0x2F, 0xF0,
+/*0460*/0xF5, 0x3C, 0xF5, 0x3E, 0xF5, 0x46, 0xF5, 0x47,
+ 0xF5, 0x3D, 0xF5, 0x3F, 0xF5, 0x6F, 0xE5, 0x6F,
+/*0470*/0x70, 0x0F, 0xE5, 0x6B, 0x45, 0x6A, 0x12, 0x07,
+ 0x2A, 0x75, 0x83, 0x80, 0x74, 0x3A, 0xF0, 0x80,
+/*0480*/0x09, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74,
+ 0x1A, 0xF0, 0xE4, 0xF5, 0x6E, 0xC3, 0x74, 0x3F,
+/*0490*/0x95, 0x6E, 0xFF, 0x12, 0x08, 0x65, 0x75, 0x83,
+ 0x82, 0xEF, 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x08,
+/*04A0*/0xC6, 0xE5, 0x33, 0xF0, 0x12, 0x08, 0xFA, 0x12,
+ 0x08, 0xB1, 0x40, 0xE1, 0xE5, 0x6F, 0x70, 0x0B,
+/*04B0*/0x12, 0x07, 0x2A, 0x75, 0x83, 0x80, 0x74, 0x36,
+ 0xF0, 0x80, 0x09, 0x12, 0x07, 0x2A, 0x75, 0x83,
+/*04C0*/0x80, 0x74, 0x16, 0xF0, 0x75, 0x6E, 0x01, 0x12,
+ 0x07, 0x2A, 0x75, 0x83, 0xB4, 0xE5, 0x6E, 0xF0,
+/*04D0*/0x12, 0x1A, 0x4D, 0x74, 0x3F, 0x25, 0x6E, 0xF5,
+ 0x82, 0xE4, 0x34, 0x00, 0xF5, 0x83, 0xE5, 0x33,
+/*04E0*/0xF0, 0x74, 0xBF, 0x25, 0x6E, 0xF5, 0x82, 0xE4,
+ 0x34, 0x00, 0x12, 0x08, 0xB1, 0x40, 0xD8, 0xE4,
+/*04F0*/0xF5, 0x70, 0xF5, 0x46, 0xF5, 0x47, 0xF5, 0x6E,
+ 0x12, 0x08, 0xFA, 0xF5, 0x83, 0xE0, 0xFE, 0x12,
+/*0500*/0x08, 0xC6, 0xE0, 0x7C, 0x00, 0x24, 0x00, 0xFF,
+ 0xEC, 0x3E, 0xFE, 0xAD, 0x3B, 0xD3, 0xEF, 0x9D,
+/*0510*/0xEE, 0x9C, 0x50, 0x04, 0x7B, 0x01, 0x80, 0x02,
+ 0x7B, 0x00, 0xE5, 0x70, 0x70, 0x04, 0x7A, 0x01,
+/*0520*/0x80, 0x02, 0x7A, 0x00, 0xEB, 0x5A, 0x60, 0x06,
+ 0x85, 0x6E, 0x46, 0x75, 0x70, 0x01, 0xD3, 0xEF,
+/*0530*/0x9D, 0xEE, 0x9C, 0x50, 0x04, 0x7F, 0x01, 0x80,
+ 0x02, 0x7F, 0x00, 0xE5, 0x70, 0xB4, 0x01, 0x04,
+/*0540*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF, 0x5E,
+ 0x60, 0x03, 0x85, 0x6E, 0x47, 0x05, 0x6E, 0xE5,
+/*0550*/0x6E, 0x64, 0x7F, 0x70, 0xA3, 0xE5, 0x46, 0x60,
+ 0x05, 0xE5, 0x47, 0xB4, 0x7E, 0x03, 0x85, 0x46,
+/*0560*/0x47, 0xE5, 0x6F, 0x70, 0x08, 0x85, 0x46, 0x76,
+ 0x85, 0x47, 0x77, 0x80, 0x0E, 0xC3, 0x74, 0x7F,
+/*0570*/0x95, 0x46, 0xF5, 0x78, 0xC3, 0x74, 0x7F, 0x95,
+ 0x47, 0xF5, 0x79, 0xE5, 0x6F, 0x70, 0x37, 0xE5,
+/*0580*/0x46, 0x65, 0x47, 0x70, 0x0C, 0x75, 0x73, 0x01,
+ 0x75, 0x74, 0x01, 0xF5, 0x3C, 0xF5, 0x3D, 0x80,
+/*0590*/0x35, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5, 0x47, 0x95,
+ 0x46, 0xF5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
+/*05A0*/0x46, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
+ 0xE4, 0xF5, 0x3D, 0x80, 0x40, 0xC3, 0x74, 0x3F,
+/*05B0*/0x95, 0x72, 0xF5, 0x3D, 0x80, 0x37, 0xE5, 0x46,
+ 0x65, 0x47, 0x70, 0x0F, 0x75, 0x73, 0x01, 0x75,
+/*05C0*/0x75, 0x01, 0xF5, 0x3E, 0xF5, 0x3F, 0x75, 0x4E,
+ 0x01, 0x80, 0x22, 0xE4, 0xF5, 0x4E, 0xC3, 0xE5,
+/*05D0*/0x47, 0x95, 0x46, 0xF5, 0x3E, 0xC3, 0x13, 0xF5,
+ 0x71, 0x25, 0x46, 0xF5, 0x72, 0xD3, 0x94, 0x3F,
+/*05E0*/0x50, 0x05, 0xE4, 0xF5, 0x3F, 0x80, 0x06, 0xE5,
+ 0x72, 0x24, 0xC1, 0xF5, 0x3F, 0x05, 0x6F, 0xE5,
+/*05F0*/0x6F, 0xC3, 0x94, 0x02, 0x50, 0x03, 0x02, 0x04,
+ 0x6E, 0xE5, 0x6D, 0x45, 0x6C, 0x70, 0x02, 0x80,
+/*0600*/0x04, 0xE5, 0x74, 0x45, 0x75, 0x90, 0x07, 0x2F,
+ 0xF0, 0x7F, 0x01, 0xE5, 0x3E, 0x60, 0x04, 0xE5,
+/*0610*/0x3C, 0x70, 0x14, 0xE4, 0xF5, 0x3C, 0xF5, 0x3D,
+ 0xF5, 0x3E, 0xF5, 0x3F, 0x12, 0x08, 0xD2, 0x70,
+/*0620*/0x04, 0xF0, 0x02, 0x06, 0xA4, 0x80, 0x7A, 0xE5,
+ 0x3C, 0xC3, 0x95, 0x3E, 0x40, 0x07, 0xE5, 0x3C,
+/*0630*/0x95, 0x3E, 0xFF, 0x80, 0x06, 0xC3, 0xE5, 0x3E,
+ 0x95, 0x3C, 0xFF, 0xE5, 0x76, 0xD3, 0x95, 0x79,
+/*0640*/0x40, 0x05, 0x85, 0x76, 0x7A, 0x80, 0x03, 0x85,
+ 0x79, 0x7A, 0xE5, 0x77, 0xC3, 0x95, 0x78, 0x50,
+/*0650*/0x05, 0x85, 0x77, 0x7B, 0x80, 0x03, 0x85, 0x78,
+ 0x7B, 0xE5, 0x7B, 0xD3, 0x95, 0x7A, 0x40, 0x30,
+/*0660*/0xE5, 0x7B, 0x95, 0x7A, 0xF5, 0x3C, 0xF5, 0x3E,
+ 0xC3, 0xE5, 0x7B, 0x95, 0x7A, 0x90, 0x07, 0x19,
+/*0670*/0xF0, 0xE5, 0x3C, 0xC3, 0x13, 0xF5, 0x71, 0x25,
+ 0x7A, 0xF5, 0x72, 0xC3, 0x94, 0x3F, 0x40, 0x05,
+/*0680*/0xE4, 0xF5, 0x3D, 0x80, 0x1F, 0xC3, 0x74, 0x3F,
+ 0x95, 0x72, 0xF5, 0x3D, 0xF5, 0x3F, 0x80, 0x14,
+/*0690*/0xE4, 0xF5, 0x3C, 0xF5, 0x3E, 0x90, 0x07, 0x19,
+ 0xF0, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
+/*06A0*/0x03, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x65, 0x75,
+ 0x83, 0xD0, 0xE0, 0x54, 0x0F, 0xFE, 0xAD, 0x3C,
+/*06B0*/0x70, 0x02, 0x7E, 0x07, 0xBE, 0x0F, 0x02, 0x7E,
+ 0x80, 0xEE, 0xFB, 0xEF, 0xD3, 0x9B, 0x74, 0x80,
+/*06C0*/0xF8, 0x98, 0x40, 0x1F, 0xE4, 0xF5, 0x3C, 0xF5,
+ 0x3E, 0x12, 0x08, 0xD2, 0x70, 0x03, 0xF0, 0x80,
+/*06D0*/0x12, 0x74, 0x01, 0xF0, 0xE5, 0x08, 0xFB, 0xEB,
+ 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xD2, 0xE0,
+/*06E0*/0x44, 0x10, 0xF0, 0xE5, 0x08, 0xFB, 0xEB, 0x44,
+ 0x09, 0xF5, 0x82, 0x75, 0x83, 0x9E, 0xED, 0xF0,
+/*06F0*/0xEB, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xCA,
+ 0xED, 0xF0, 0x12, 0x08, 0x65, 0x75, 0x83, 0xCC,
+/*0700*/0xEF, 0xF0, 0x22, 0xE5, 0x08, 0x44, 0x07, 0xF5,
+ 0x82, 0x75, 0x83, 0xBC, 0xE0, 0x54, 0xF0, 0xF0,
+/*0710*/0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82, 0x75, 0x83,
+ 0xBE, 0xE0, 0x54, 0xF0, 0xF0, 0xE5, 0x08, 0x44,
+/*0720*/0x07, 0xF5, 0x82, 0x75, 0x83, 0xC0, 0xE0, 0x54,
+ 0xF0, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
+/*0730*/0x22, 0xF0, 0x90, 0x07, 0x28, 0xE0, 0xFE, 0xA3,
+ 0xE0, 0xF5, 0x82, 0x8E, 0x83, 0x22, 0x85, 0x42,
+/*0740*/0x42, 0x85, 0x41, 0x41, 0x85, 0x40, 0x40, 0x74,
+ 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E, 0xF5,
+/*0750*/0x83, 0xE5, 0x42, 0xF0, 0x74, 0xE0, 0x2F, 0xF5,
+ 0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xE5,
+/*0760*/0x42, 0x29, 0xFD, 0xE4, 0x33, 0xFC, 0xE5, 0x3C,
+ 0xC3, 0x9D, 0xEC, 0x64, 0x80, 0xF8, 0x74, 0x80,
+/*0770*/0x98, 0x22, 0xF5, 0x83, 0xE0, 0x90, 0x07, 0x22,
+ 0x54, 0x1F, 0xFD, 0xE0, 0xFA, 0xA3, 0xE0, 0xF5,
+/*0780*/0x82, 0x8A, 0x83, 0xED, 0xF0, 0x22, 0x90, 0x07,
+ 0x22, 0xE0, 0xFC, 0xA3, 0xE0, 0xF5, 0x82, 0x8C,
+/*0790*/0x83, 0x22, 0x90, 0x07, 0x24, 0xFF, 0xED, 0x44,
+ 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22, 0x85,
+/*07A0*/0x38, 0x38, 0x85, 0x39, 0x39, 0x85, 0x3A, 0x3A,
+ 0x74, 0xC0, 0x2F, 0xF5, 0x82, 0x74, 0x02, 0x3E,
+/*07B0*/0xF5, 0x83, 0x22, 0x90, 0x07, 0x26, 0xFF, 0xED,
+ 0x44, 0x07, 0xCF, 0xF0, 0xA3, 0xEF, 0xF0, 0x22,
+/*07C0*/0xF0, 0x74, 0xA0, 0x2F, 0xF5, 0x82, 0x74, 0x02,
+ 0x3E, 0xF5, 0x83, 0x22, 0x74, 0xC0, 0x25, 0x11,
+/*07D0*/0xF5, 0x82, 0xE4, 0x34, 0x01, 0xF5, 0x83, 0x22,
+ 0x74, 0x00, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+/*07E0*/0x02, 0xF5, 0x83, 0x22, 0x74, 0x60, 0x25, 0x11,
+ 0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
+/*07F0*/0x74, 0x80, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+ 0x03, 0xF5, 0x83, 0x22, 0x74, 0xE0, 0x25, 0x11,
+/*0800*/0xF5, 0x82, 0xE4, 0x34, 0x03, 0xF5, 0x83, 0x22,
+ 0x74, 0x40, 0x25, 0x11, 0xF5, 0x82, 0xE4, 0x34,
+/*0810*/0x06, 0xF5, 0x83, 0x22, 0x74, 0x80, 0x2F, 0xF5,
+ 0x82, 0x74, 0x02, 0x3E, 0xF5, 0x83, 0x22, 0xAF,
+/*0820*/0x08, 0x7E, 0x00, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+ 0x22, 0xF5, 0x83, 0xE5, 0x82, 0x44, 0x07, 0xF5,
+/*0830*/0x82, 0xE5, 0x40, 0xF0, 0x22, 0x74, 0x40, 0x25,
+ 0x11, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
+/*0840*/0x22, 0x74, 0xC0, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+ 0x34, 0x03, 0xF5, 0x83, 0x22, 0x74, 0x00, 0x25,
+/*0850*/0x11, 0xF5, 0x82, 0xE4, 0x34, 0x06, 0xF5, 0x83,
+ 0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+/*0860*/0x34, 0x06, 0xF5, 0x83, 0x22, 0xE5, 0x08, 0xFD,
+ 0xED, 0x44, 0x07, 0xF5, 0x82, 0x22, 0xE5, 0x41,
+/*0870*/0xF0, 0xE5, 0x65, 0x64, 0x01, 0x45, 0x64, 0x22,
+ 0x7E, 0x00, 0xFB, 0x7A, 0x00, 0xFD, 0x7C, 0x00,
+/*0880*/0x22, 0x74, 0x20, 0x25, 0x11, 0xF5, 0x82, 0xE4,
+ 0x34, 0x02, 0x22, 0x74, 0xA0, 0x25, 0x11, 0xF5,
+/*0890*/0x82, 0xE4, 0x34, 0x03, 0x22, 0x85, 0x3E, 0x42,
+ 0x85, 0x3F, 0x41, 0x8F, 0x40, 0x22, 0x85, 0x3C,
+/*08A0*/0x42, 0x85, 0x3D, 0x41, 0x8F, 0x40, 0x22, 0x75,
+ 0x45, 0x3F, 0x90, 0x07, 0x20, 0xE4, 0xF0, 0xA3,
+/*08B0*/0x22, 0xF5, 0x83, 0xE5, 0x32, 0xF0, 0x05, 0x6E,
+ 0xE5, 0x6E, 0xC3, 0x94, 0x40, 0x22, 0xF0, 0xE5,
+/*08C0*/0x08, 0x44, 0x06, 0xF5, 0x82, 0x22, 0x74, 0x00,
+ 0x25, 0x6E, 0xF5, 0x82, 0xE4, 0x34, 0x00, 0xF5,
+/*08D0*/0x83, 0x22, 0xE5, 0x6D, 0x45, 0x6C, 0x90, 0x07,
+ 0x2F, 0x22, 0xE4, 0xF9, 0xE5, 0x3C, 0xD3, 0x95,
+/*08E0*/0x3E, 0x22, 0x74, 0x80, 0x2E, 0xF5, 0x82, 0xE4,
+ 0x34, 0x02, 0xF5, 0x83, 0xE0, 0x22, 0x74, 0xA0,
+/*08F0*/0x2E, 0xF5, 0x82, 0xE4, 0x34, 0x02, 0xF5, 0x83,
+ 0xE0, 0x22, 0x74, 0x80, 0x25, 0x6E, 0xF5, 0x82,
+/*0900*/0xE4, 0x34, 0x00, 0x22, 0x25, 0x42, 0xFD, 0xE4,
+ 0x33, 0xFC, 0x22, 0x85, 0x42, 0x42, 0x85, 0x41,
+/*0910*/0x41, 0x85, 0x40, 0x40, 0x22, 0xED, 0x4C, 0x60,
+ 0x03, 0x02, 0x09, 0xE5, 0xEF, 0x4E, 0x70, 0x37,
+/*0920*/0x90, 0x07, 0x26, 0x12, 0x07, 0x89, 0xE0, 0xFD,
+ 0x12, 0x07, 0xCC, 0xED, 0xF0, 0x90, 0x07, 0x28,
+/*0930*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xD8,
+ 0xED, 0xF0, 0x12, 0x07, 0x86, 0xE0, 0x54, 0x1F,
+/*0940*/0xFD, 0x12, 0x08, 0x81, 0xF5, 0x83, 0xED, 0xF0,
+ 0x90, 0x07, 0x24, 0x12, 0x07, 0x89, 0xE0, 0x54,
+/*0950*/0x1F, 0xFD, 0x12, 0x08, 0x35, 0xED, 0xF0, 0xEF,
+ 0x64, 0x04, 0x4E, 0x70, 0x37, 0x90, 0x07, 0x26,
+/*0960*/0x12, 0x07, 0x89, 0xE0, 0xFD, 0x12, 0x07, 0xE4,
+ 0xED, 0xF0, 0x90, 0x07, 0x28, 0x12, 0x07, 0x89,
+/*0970*/0xE0, 0xFD, 0x12, 0x07, 0xF0, 0xED, 0xF0, 0x12,
+ 0x07, 0x86, 0xE0, 0x54, 0x1F, 0xFD, 0x12, 0x08,
+/*0980*/0x8B, 0xF5, 0x83, 0xED, 0xF0, 0x90, 0x07, 0x24,
+ 0x12, 0x07, 0x89, 0xE0, 0x54, 0x1F, 0xFD, 0x12,
+/*0990*/0x08, 0x41, 0xED, 0xF0, 0xEF, 0x64, 0x01, 0x4E,
+ 0x70, 0x04, 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00,
+/*09A0*/0xEF, 0x64, 0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01,
+ 0x80, 0x02, 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x78,
+/*09B0*/0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE0, 0xFF,
+ 0x12, 0x07, 0xFC, 0xEF, 0x12, 0x07, 0x31, 0xE0,
+/*09C0*/0xFF, 0x12, 0x08, 0x08, 0xEF, 0xF0, 0x90, 0x07,
+ 0x22, 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF,
+/*09D0*/0x12, 0x08, 0x4D, 0xEF, 0xF0, 0x90, 0x07, 0x24,
+ 0x12, 0x07, 0x35, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
+/*09E0*/0x08, 0x59, 0xEF, 0xF0, 0x22, 0x12, 0x07, 0xCC,
+ 0xE4, 0xF0, 0x12, 0x07, 0xD8, 0xE4, 0xF0, 0x12,
+/*09F0*/0x08, 0x81, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08,
+ 0x35, 0x74, 0x14, 0xF0, 0x12, 0x07, 0xE4, 0xE4,
+/*0A00*/0xF0, 0x12, 0x07, 0xF0, 0xE4, 0xF0, 0x12, 0x08,
+ 0x8B, 0xF5, 0x83, 0xE4, 0xF0, 0x12, 0x08, 0x41,
+/*0A10*/0x74, 0x14, 0xF0, 0x12, 0x07, 0xFC, 0xE4, 0xF0,
+ 0x12, 0x08, 0x08, 0xE4, 0xF0, 0x12, 0x08, 0x4D,
+/*0A20*/0xE4, 0xF0, 0x12, 0x08, 0x59, 0x74, 0x14, 0xF0,
+ 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFC, 0x10, 0xE4,
+/*0A30*/0xF5, 0xFD, 0x75, 0xFE, 0x30, 0xF5, 0xFF, 0xE5,
+ 0xE7, 0x20, 0xE7, 0x03, 0x43, 0xF9, 0x08, 0xE5,
+/*0A40*/0xE6, 0x20, 0xE7, 0x0B, 0x78, 0xFF, 0xE4, 0xF6,
+ 0xD8, 0xFD, 0x53, 0xE6, 0xFE, 0x80, 0x09, 0x78,
+/*0A50*/0x08, 0xE4, 0xF6, 0xD8, 0xFD, 0x53, 0xE6, 0xFE,
+ 0x75, 0x81, 0x80, 0xE4, 0xF5, 0xA8, 0xD2, 0xA8,
+/*0A60*/0xC2, 0xA9, 0xD2, 0xAF, 0xE5, 0xE2, 0x20, 0xE5,
+ 0x05, 0x20, 0xE6, 0x02, 0x80, 0x03, 0x43, 0xE1,
+/*0A70*/0x02, 0xE5, 0xE2, 0x20, 0xE0, 0x0E, 0x90, 0x00,
+ 0x00, 0x7F, 0x00, 0x7E, 0x08, 0xE4, 0xF0, 0xA3,
+/*0A80*/0xDF, 0xFC, 0xDE, 0xFA, 0x02, 0x0A, 0xDB, 0x43,
+ 0xFA, 0x01, 0xC0, 0xE0, 0xC0, 0xF0, 0xC0, 0x83,
+/*0A90*/0xC0, 0x82, 0xC0, 0xD0, 0x12, 0x1C, 0xE7, 0xD0,
+ 0xD0, 0xD0, 0x82, 0xD0, 0x83, 0xD0, 0xF0, 0xD0,
+/*0AA0*/0xE0, 0x53, 0xFA, 0xFE, 0x32, 0x02, 0x1B, 0x55,
+ 0xE4, 0x93, 0xA3, 0xF8, 0xE4, 0x93, 0xA3, 0xF6,
+/*0AB0*/0x08, 0xDF, 0xF9, 0x80, 0x29, 0xE4, 0x93, 0xA3,
+ 0xF8, 0x54, 0x07, 0x24, 0x0C, 0xC8, 0xC3, 0x33,
+/*0AC0*/0xC4, 0x54, 0x0F, 0x44, 0x20, 0xC8, 0x83, 0x40,
+ 0x04, 0xF4, 0x56, 0x80, 0x01, 0x46, 0xF6, 0xDF,
+/*0AD0*/0xE4, 0x80, 0x0B, 0x01, 0x02, 0x04, 0x08, 0x10,
+ 0x20, 0x40, 0x80, 0x90, 0x00, 0x3F, 0xE4, 0x7E,
+/*0AE0*/0x01, 0x93, 0x60, 0xC1, 0xA3, 0xFF, 0x54, 0x3F,
+ 0x30, 0xE5, 0x09, 0x54, 0x1F, 0xFE, 0xE4, 0x93,
+/*0AF0*/0xA3, 0x60, 0x01, 0x0E, 0xCF, 0x54, 0xC0, 0x25,
+ 0xE0, 0x60, 0xAD, 0x40, 0xB8, 0x80, 0xFE, 0x8C,
+/*0B00*/0x64, 0x8D, 0x65, 0x8A, 0x66, 0x8B, 0x67, 0xE4,
+ 0xF5, 0x69, 0xEF, 0x4E, 0x70, 0x03, 0x02, 0x1D,
+/*0B10*/0x55, 0xE4, 0xF5, 0x68, 0xE5, 0x67, 0x45, 0x66,
+ 0x70, 0x32, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90,
+/*0B20*/0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE4,
+ 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4, 0x12,
+/*0B30*/0x08, 0x70, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0x92, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*0B40*/0xC6, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8,
+ 0xE4, 0xF0, 0x80, 0x11, 0x90, 0x07, 0x26, 0x12,
+/*0B50*/0x07, 0x35, 0xE4, 0x12, 0x08, 0x70, 0x70, 0x05,
+ 0x12, 0x07, 0x32, 0xE4, 0xF0, 0x12, 0x1D, 0x55,
+/*0B60*/0x12, 0x1E, 0xBF, 0xE5, 0x67, 0x45, 0x66, 0x70,
+ 0x33, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x90, 0xE5,
+/*0B70*/0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
+ 0x41, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x12,
+/*0B80*/0x08, 0x6E, 0x70, 0x29, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0x92, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
+/*0B90*/0x83, 0xC6, 0xE5, 0x40, 0x12, 0x07, 0x29, 0x75,
+ 0x83, 0xC8, 0x80, 0x0E, 0x90, 0x07, 0x26, 0x12,
+/*0BA0*/0x07, 0x35, 0x12, 0x08, 0x6E, 0x70, 0x06, 0x12,
+ 0x07, 0x32, 0xE5, 0x40, 0xF0, 0xAF, 0x69, 0x7E,
+/*0BB0*/0x00, 0xAD, 0x67, 0xAC, 0x66, 0x12, 0x04, 0x44,
+ 0x12, 0x07, 0x2A, 0x75, 0x83, 0xCA, 0xE0, 0xD3,
+/*0BC0*/0x94, 0x00, 0x50, 0x0C, 0x05, 0x68, 0xE5, 0x68,
+ 0xC3, 0x94, 0x05, 0x50, 0x03, 0x02, 0x0B, 0x14,
+/*0BD0*/0x22, 0x8C, 0x60, 0x8D, 0x61, 0x12, 0x08, 0xDA,
+ 0x74, 0x20, 0x40, 0x0D, 0x2F, 0xF5, 0x82, 0x74,
+/*0BE0*/0x03, 0x3E, 0xF5, 0x83, 0xE5, 0x3E, 0xF0, 0x80,
+ 0x0B, 0x2F, 0xF5, 0x82, 0x74, 0x03, 0x3E, 0xF5,
+/*0BF0*/0x83, 0xE5, 0x3C, 0xF0, 0xE5, 0x3C, 0xD3, 0x95,
+ 0x3E, 0x40, 0x3C, 0xE5, 0x61, 0x45, 0x60, 0x70,
+/*0C00*/0x10, 0xE9, 0x12, 0x09, 0x04, 0xE5, 0x3E, 0x12,
+ 0x07, 0x68, 0x40, 0x3B, 0x12, 0x08, 0x95, 0x80,
+/*0C10*/0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40, 0x1D,
+ 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05, 0x85,
+/*0C20*/0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F,
+ 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x3E, 0x12, 0x07,
+/*0C30*/0xC0, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x43, 0xE5,
+ 0x61, 0x45, 0x60, 0x70, 0x19, 0x12, 0x07, 0x5F,
+/*0C40*/0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x27, 0x12,
+ 0x09, 0x0B, 0x12, 0x08, 0x14, 0xE5, 0x42, 0x12,
+/*0C50*/0x07, 0xC0, 0xE5, 0x41, 0xF0, 0x22, 0xE5, 0x3C,
+ 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C, 0x38,
+/*0C60*/0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39, 0x80,
+ 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x08,
+/*0C70*/0x14, 0xE5, 0x3C, 0x12, 0x07, 0xC0, 0xE5, 0x3D,
+ 0xF0, 0x22, 0x85, 0x38, 0x38, 0x85, 0x39, 0x39,
+/*0C80*/0x85, 0x3A, 0x3A, 0x12, 0x08, 0x14, 0xE5, 0x38,
+ 0x12, 0x07, 0xC0, 0xE5, 0x39, 0xF0, 0x22, 0x7F,
+/*0C90*/0x06, 0x12, 0x17, 0x31, 0x12, 0x1D, 0x23, 0x12,
+ 0x0E, 0x04, 0x12, 0x0E, 0x33, 0xE0, 0x44, 0x0A,
+/*0CA0*/0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E, 0x04, 0x12,
+ 0x0E, 0x0B, 0xEF, 0xF0, 0xE5, 0x28, 0x30, 0xE5,
+/*0CB0*/0x03, 0xD3, 0x80, 0x01, 0xC3, 0x40, 0x05, 0x75,
+ 0x14, 0x20, 0x80, 0x03, 0x75, 0x14, 0x08, 0x12,
+/*0CC0*/0x0E, 0x04, 0x75, 0x83, 0x8A, 0xE5, 0x14, 0xF0,
+ 0xB4, 0xFF, 0x05, 0x75, 0x12, 0x80, 0x80, 0x06,
+/*0CD0*/0xE5, 0x14, 0xC3, 0x13, 0xF5, 0x12, 0xE4, 0xF5,
+ 0x16, 0xF5, 0x7F, 0x12, 0x19, 0x36, 0x12, 0x13,
+/*0CE0*/0xA3, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x50, 0x09,
+ 0x05, 0x16, 0xE5, 0x16, 0xC3, 0x94, 0x14, 0x40,
+/*0CF0*/0xEA, 0xE5, 0xE4, 0x20, 0xE7, 0x28, 0x12, 0x0E,
+ 0x04, 0x75, 0x83, 0xD2, 0xE0, 0x54, 0x08, 0xD3,
+/*0D00*/0x94, 0x00, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+ 0x7F, 0x00, 0xE5, 0x0A, 0xC3, 0x94, 0x01, 0x40,
+/*0D10*/0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEF,
+ 0x5E, 0x60, 0x03, 0x12, 0x1D, 0xD7, 0xE5, 0x7F,
+/*0D20*/0xC3, 0x94, 0x11, 0x40, 0x14, 0x12, 0x0E, 0x04,
+ 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x80, 0xF0, 0xE5,
+/*0D30*/0xE4, 0x20, 0xE7, 0x0F, 0x12, 0x1D, 0xD7, 0x80,
+ 0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0xD2, 0xE0,
+/*0D40*/0x54, 0x7F, 0xF0, 0x12, 0x1D, 0x23, 0x22, 0x74,
+ 0x8A, 0x85, 0x08, 0x82, 0xF5, 0x83, 0xE5, 0x17,
+/*0D50*/0xF0, 0x12, 0x0E, 0x3A, 0xE4, 0xF0, 0x90, 0x07,
+ 0x02, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x90,
+/*0D60*/0xEF, 0xF0, 0x74, 0x92, 0xFE, 0xE5, 0x08, 0x44,
+ 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0, 0x54,
+/*0D70*/0xC0, 0xFD, 0x90, 0x07, 0x03, 0xE0, 0x54, 0x3F,
+ 0x4D, 0x8F, 0x82, 0x8E, 0x83, 0xF0, 0x90, 0x07,
+/*0D80*/0x04, 0xE0, 0x12, 0x0E, 0x17, 0x75, 0x83, 0x82,
+ 0xEF, 0xF0, 0x90, 0x07, 0x05, 0xE0, 0xFF, 0xED,
+/*0D90*/0x44, 0x07, 0xF5, 0x82, 0x75, 0x83, 0xB4, 0xEF,
+ 0x12, 0x0E, 0x03, 0x75, 0x83, 0x80, 0xE0, 0x54,
+/*0DA0*/0xBF, 0xF0, 0x30, 0x37, 0x0A, 0x12, 0x0E, 0x91,
+ 0x75, 0x83, 0x94, 0xE0, 0x44, 0x80, 0xF0, 0x30,
+/*0DB0*/0x38, 0x0A, 0x12, 0x0E, 0x91, 0x75, 0x83, 0x92,
+ 0xE0, 0x44, 0x80, 0xF0, 0xE5, 0x28, 0x30, 0xE4,
+/*0DC0*/0x1A, 0x20, 0x39, 0x0A, 0x12, 0x0E, 0x04, 0x75,
+ 0x83, 0x88, 0xE0, 0x54, 0x7F, 0xF0, 0x20, 0x3A,
+/*0DD0*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x88, 0xE0,
+ 0x54, 0xBF, 0xF0, 0x74, 0x8C, 0xFE, 0x12, 0x0E,
+/*0DE0*/0x04, 0x8E, 0x83, 0xE0, 0x54, 0x0F, 0x12, 0x0E,
+ 0x03, 0x75, 0x83, 0x86, 0xE0, 0x54, 0xBF, 0xF0,
+/*0DF0*/0xE5, 0x08, 0x44, 0x06, 0x12, 0x0D, 0xFD, 0x75,
+ 0x83, 0x8A, 0xE4, 0xF0, 0x22, 0xF5, 0x82, 0x75,
+/*0E00*/0x83, 0x82, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07,
+ 0xF5, 0x82, 0x22, 0x8E, 0x83, 0xE0, 0xF5, 0x10,
+/*0E10*/0x54, 0xFE, 0xF0, 0xE5, 0x10, 0x44, 0x01, 0xFF,
+ 0xE5, 0x08, 0xFD, 0xED, 0x44, 0x07, 0xF5, 0x82,
+/*0E20*/0x22, 0xE5, 0x15, 0xC4, 0x54, 0x07, 0xFF, 0xE5,
+ 0x08, 0xFD, 0xED, 0x44, 0x08, 0xF5, 0x82, 0x75,
+/*0E30*/0x83, 0x82, 0x22, 0x75, 0x83, 0x80, 0xE0, 0x44,
+ 0x40, 0xF0, 0xE5, 0x08, 0x44, 0x08, 0xF5, 0x82,
+/*0E40*/0x75, 0x83, 0x8A, 0x22, 0xE5, 0x16, 0x25, 0xE0,
+ 0x25, 0xE0, 0x24, 0xAF, 0xF5, 0x82, 0xE4, 0x34,
+/*0E50*/0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0D, 0x22,
+ 0x43, 0xE1, 0x10, 0x43, 0xE1, 0x80, 0x53, 0xE1,
+/*0E60*/0xFD, 0x85, 0xE1, 0x10, 0x22, 0xE5, 0x16, 0x25,
+ 0xE0, 0x25, 0xE0, 0x24, 0xB2, 0xF5, 0x82, 0xE4,
+/*0E70*/0x34, 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0x22, 0x85,
+ 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15, 0xF0,
+/*0E80*/0x22, 0xE5, 0xE2, 0x54, 0x20, 0xD3, 0x94, 0x00,
+ 0x22, 0xE5, 0xE2, 0x54, 0x40, 0xD3, 0x94, 0x00,
+/*0E90*/0x22, 0xE5, 0x08, 0x44, 0x06, 0xF5, 0x82, 0x22,
+ 0xFD, 0xE5, 0x08, 0xFB, 0xEB, 0x44, 0x07, 0xF5,
+/*0EA0*/0x82, 0x22, 0x53, 0xF9, 0xF7, 0x75, 0xFE, 0x30,
+ 0x22, 0xEF, 0x4E, 0x70, 0x26, 0x12, 0x07, 0xCC,
+/*0EB0*/0xE0, 0xFD, 0x90, 0x07, 0x26, 0x12, 0x07, 0x7B,
+ 0x12, 0x07, 0xD8, 0xE0, 0xFD, 0x90, 0x07, 0x28,
+/*0EC0*/0x12, 0x07, 0x7B, 0x12, 0x08, 0x81, 0x12, 0x07,
+ 0x72, 0x12, 0x08, 0x35, 0xE0, 0x90, 0x07, 0x24,
+/*0ED0*/0x12, 0x07, 0x78, 0xEF, 0x64, 0x04, 0x4E, 0x70,
+ 0x29, 0x12, 0x07, 0xE4, 0xE0, 0xFD, 0x90, 0x07,
+/*0EE0*/0x26, 0x12, 0x07, 0x7B, 0x12, 0x07, 0xF0, 0xE0,
+ 0xFD, 0x90, 0x07, 0x28, 0x12, 0x07, 0x7B, 0x12,
+/*0EF0*/0x08, 0x8B, 0x12, 0x07, 0x72, 0x12, 0x08, 0x41,
+ 0xE0, 0x54, 0x1F, 0xFD, 0x90, 0x07, 0x24, 0x12,
+/*0F00*/0x07, 0x7B, 0xEF, 0x64, 0x01, 0x4E, 0x70, 0x04,
+ 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0xEF, 0x64,
+/*0F10*/0x02, 0x4E, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02,
+ 0x7F, 0x00, 0xEF, 0x4D, 0x60, 0x35, 0x12, 0x07,
+/*0F20*/0xFC, 0xE0, 0xFF, 0x90, 0x07, 0x26, 0x12, 0x07,
+ 0x89, 0xEF, 0xF0, 0x12, 0x08, 0x08, 0xE0, 0xFF,
+/*0F30*/0x90, 0x07, 0x28, 0x12, 0x07, 0x89, 0xEF, 0xF0,
+ 0x12, 0x08, 0x4D, 0xE0, 0x54, 0x1F, 0xFF, 0x12,
+/*0F40*/0x07, 0x86, 0xEF, 0xF0, 0x12, 0x08, 0x59, 0xE0,
+ 0x54, 0x1F, 0xFF, 0x90, 0x07, 0x24, 0x12, 0x07,
+/*0F50*/0x89, 0xEF, 0xF0, 0x22, 0xE4, 0xF5, 0x53, 0x12,
+ 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01, 0x80, 0x02,
+/*0F60*/0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40, 0x04, 0x7E,
+ 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x70,
+/*0F70*/0x03, 0x02, 0x0F, 0xF6, 0x85, 0xE1, 0x10, 0x43,
+ 0xE1, 0x02, 0x53, 0xE1, 0x0F, 0x85, 0xE1, 0x10,
+/*0F80*/0xE4, 0xF5, 0x51, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
+ 0x52, 0x12, 0x0E, 0x89, 0x40, 0x1D, 0xAD, 0x52,
+/*0F90*/0xAF, 0x51, 0x12, 0x11, 0x18, 0xEF, 0x60, 0x08,
+ 0x85, 0xE1, 0x10, 0x43, 0xE1, 0x40, 0x80, 0x0B,
+/*0FA0*/0x53, 0xE1, 0xBF, 0x12, 0x0E, 0x58, 0x12, 0x00,
+ 0x06, 0x80, 0xFB, 0xE5, 0xE3, 0x54, 0x3F, 0xF5,
+/*0FB0*/0x51, 0xE5, 0xE4, 0x54, 0x3F, 0xF5, 0x52, 0x12,
+ 0x0E, 0x81, 0x40, 0x1D, 0xAD, 0x52, 0xAF, 0x51,
+/*0FC0*/0x12, 0x11, 0x18, 0xEF, 0x60, 0x08, 0x85, 0xE1,
+ 0x10, 0x43, 0xE1, 0x20, 0x80, 0x0B, 0x53, 0xE1,
+/*0FD0*/0xDF, 0x12, 0x0E, 0x58, 0x12, 0x00, 0x06, 0x80,
+ 0xFB, 0x12, 0x0E, 0x81, 0x40, 0x04, 0x7F, 0x01,
+/*0FE0*/0x80, 0x02, 0x7F, 0x00, 0x12, 0x0E, 0x89, 0x40,
+ 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
+/*0FF0*/0x4F, 0x60, 0x03, 0x12, 0x0E, 0x5B, 0x22, 0x12,
+ 0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x22,
+/*1000*/0x02, 0x11, 0x00, 0x02, 0x10, 0x40, 0x02, 0x10,
+ 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1010*/0x01, 0x20, 0x01, 0x20, 0xE4, 0xF5, 0x57, 0x12,
+ 0x16, 0xBD, 0x12, 0x16, 0x44, 0xE4, 0x12, 0x10,
+/*1020*/0x56, 0x12, 0x14, 0xB7, 0x90, 0x07, 0x26, 0x12,
+ 0x07, 0x35, 0xE4, 0x12, 0x07, 0x31, 0xE4, 0xF0,
+/*1030*/0x12, 0x10, 0x56, 0x12, 0x14, 0xB7, 0x90, 0x07,
+ 0x26, 0x12, 0x07, 0x35, 0xE5, 0x41, 0x12, 0x07,
+/*1040*/0x31, 0xE5, 0x40, 0xF0, 0xAF, 0x57, 0x7E, 0x00,
+ 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
+/*1050*/0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xFF, 0x90,
+ 0x07, 0x20, 0xA3, 0xE0, 0xFD, 0xE4, 0xF5, 0x56,
+/*1060*/0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x12,
+ 0x11, 0x51, 0x7F, 0x0F, 0x7D, 0x18, 0xE4, 0xF5,
+/*1070*/0x56, 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA,
+ 0x12, 0x15, 0x41, 0xAF, 0x56, 0x7E, 0x00, 0x12,
+/*1080*/0x1A, 0xFF, 0xE4, 0xFF, 0xF5, 0x56, 0x7D, 0x1F,
+ 0xF5, 0x40, 0xFE, 0xFC, 0xAB, 0x56, 0xFA, 0x22,
+/*1090*/0x22, 0xE4, 0xF5, 0x55, 0xE5, 0x08, 0xFD, 0x74,
+ 0xA0, 0xF5, 0x56, 0xED, 0x44, 0x07, 0xF5, 0x57,
+/*10A0*/0xE5, 0x28, 0x30, 0xE5, 0x03, 0xD3, 0x80, 0x01,
+ 0xC3, 0x40, 0x05, 0x7F, 0x28, 0xEF, 0x80, 0x04,
+/*10B0*/0x7F, 0x14, 0xEF, 0xC3, 0x13, 0xF5, 0x54, 0xE4,
+ 0xF9, 0x12, 0x0E, 0x18, 0x75, 0x83, 0x8E, 0xE0,
+/*10C0*/0xF5, 0x10, 0xCE, 0xEF, 0xCE, 0xEE, 0xD3, 0x94,
+ 0x00, 0x40, 0x26, 0xE5, 0x10, 0x54, 0xFE, 0x12,
+/*10D0*/0x0E, 0x98, 0x75, 0x83, 0x8E, 0xED, 0xF0, 0xE5,
+ 0x10, 0x44, 0x01, 0xFD, 0xEB, 0x44, 0x07, 0xF5,
+/*10E0*/0x82, 0xED, 0xF0, 0x85, 0x57, 0x82, 0x85, 0x56,
+ 0x83, 0xE0, 0x30, 0xE3, 0x01, 0x09, 0x1E, 0x80,
+/*10F0*/0xD4, 0xC2, 0x34, 0xE9, 0xC3, 0x95, 0x54, 0x40,
+ 0x02, 0xD2, 0x34, 0x22, 0x02, 0x00, 0x06, 0x22,
+/*1100*/0x30, 0x30, 0x11, 0x90, 0x10, 0x00, 0xE4, 0x93,
+ 0xF5, 0x10, 0x90, 0x10, 0x10, 0xE4, 0x93, 0xF5,
+/*1110*/0x10, 0x12, 0x10, 0x90, 0x12, 0x11, 0x50, 0x22,
+ 0xE4, 0xFC, 0xC3, 0xED, 0x9F, 0xFA, 0xEF, 0xF5,
+/*1120*/0x83, 0x75, 0x82, 0x00, 0x79, 0xFF, 0xE4, 0x93,
+ 0xCC, 0x6C, 0xCC, 0xA3, 0xD9, 0xF8, 0xDA, 0xF6,
+/*1130*/0xE5, 0xE2, 0x30, 0xE4, 0x02, 0x8C, 0xE5, 0xED,
+ 0x24, 0xFF, 0xFF, 0xEF, 0x75, 0x82, 0xFF, 0xF5,
+/*1140*/0x83, 0xE4, 0x93, 0x6C, 0x70, 0x03, 0x7F, 0x01,
+ 0x22, 0x7F, 0x00, 0x22, 0x22, 0x11, 0x00, 0x00,
+/*1150*/0x22, 0x8E, 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D,
+ 0x5B, 0x8A, 0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01,
+/*1160*/0xE4, 0xF5, 0x5F, 0xF5, 0x60, 0xF5, 0x62, 0x12,
+ 0x07, 0x2A, 0x75, 0x83, 0xD0, 0xE0, 0xFF, 0xC4,
+/*1170*/0x54, 0x0F, 0xF5, 0x61, 0x12, 0x1E, 0xA5, 0x85,
+ 0x59, 0x5E, 0xD3, 0xE5, 0x5E, 0x95, 0x5B, 0xE5,
+/*1180*/0x5A, 0x12, 0x07, 0x6B, 0x50, 0x4B, 0x12, 0x07,
+ 0x03, 0x75, 0x83, 0xBC, 0xE0, 0x45, 0x5E, 0x12,
+/*1190*/0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x45, 0x5E,
+ 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0, 0x45,
+/*11A0*/0x5E, 0xF0, 0xAF, 0x5F, 0xE5, 0x60, 0x12, 0x08,
+ 0x78, 0x12, 0x0A, 0xFF, 0xAF, 0x62, 0x7E, 0x00,
+/*11B0*/0xAD, 0x5D, 0xAC, 0x5C, 0x12, 0x04, 0x44, 0xE5,
+ 0x61, 0xAF, 0x5E, 0x7E, 0x00, 0xB4, 0x03, 0x05,
+/*11C0*/0x12, 0x1E, 0x21, 0x80, 0x07, 0xAD, 0x5D, 0xAC,
+ 0x5C, 0x12, 0x13, 0x17, 0x05, 0x5E, 0x02, 0x11,
+/*11D0*/0x7A, 0x12, 0x07, 0x03, 0x75, 0x83, 0xBC, 0xE0,
+ 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBE,
+/*11E0*/0xE0, 0x45, 0x40, 0x12, 0x07, 0x29, 0x75, 0x83,
+ 0xC0, 0xE0, 0x45, 0x40, 0xF0, 0x22, 0x8E, 0x58,
+/*11F0*/0x8F, 0x59, 0x75, 0x5A, 0x01, 0x79, 0x01, 0x75,
+ 0x5B, 0x01, 0xE4, 0xFB, 0x12, 0x07, 0x2A, 0x75,
+/*1200*/0x83, 0xAE, 0xE0, 0x54, 0x1A, 0xFF, 0x12, 0x08,
+ 0x65, 0xE0, 0xC4, 0x13, 0x54, 0x07, 0xFE, 0xEF,
+/*1210*/0x70, 0x0C, 0xEE, 0x65, 0x35, 0x70, 0x07, 0x90,
+ 0x07, 0x2F, 0xE0, 0xB4, 0x01, 0x0D, 0xAF, 0x35,
+/*1220*/0x7E, 0x00, 0x12, 0x0E, 0xA9, 0xCF, 0xEB, 0xCF,
+ 0x02, 0x1E, 0x60, 0xE5, 0x59, 0x64, 0x02, 0x45,
+/*1230*/0x58, 0x70, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F,
+ 0x00, 0xE5, 0x59, 0x45, 0x58, 0x70, 0x04, 0x7E,
+/*1240*/0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60,
+ 0x23, 0x85, 0x41, 0x49, 0x85, 0x40, 0x4B, 0xE5,
+/*1250*/0x59, 0x45, 0x58, 0x70, 0x2C, 0xAF, 0x5A, 0xFE,
+ 0xCD, 0xE9, 0xCD, 0xFC, 0xAB, 0x59, 0xAA, 0x58,
+/*1260*/0x12, 0x0A, 0xFF, 0xAF, 0x5B, 0x7E, 0x00, 0x12,
+ 0x1E, 0x60, 0x80, 0x15, 0xAF, 0x5B, 0x7E, 0x00,
+/*1270*/0x12, 0x1E, 0x60, 0x90, 0x07, 0x26, 0x12, 0x07,
+ 0x35, 0xE5, 0x49, 0x12, 0x07, 0x31, 0xE5, 0x4B,
+/*1280*/0xF0, 0xE4, 0xFD, 0xAF, 0x35, 0xFE, 0xFC, 0x12,
+ 0x09, 0x15, 0x22, 0x8C, 0x64, 0x8D, 0x65, 0x12,
+/*1290*/0x08, 0xDA, 0x40, 0x3C, 0xE5, 0x65, 0x45, 0x64,
+ 0x70, 0x10, 0x12, 0x09, 0x04, 0xC3, 0xE5, 0x3E,
+/*12A0*/0x12, 0x07, 0x69, 0x40, 0x3B, 0x12, 0x08, 0x95,
+ 0x80, 0x18, 0xE5, 0x3E, 0xC3, 0x95, 0x38, 0x40,
+/*12B0*/0x1D, 0x85, 0x3E, 0x38, 0xE5, 0x3E, 0x60, 0x05,
+ 0x85, 0x3F, 0x39, 0x80, 0x03, 0x85, 0x39, 0x39,
+/*12C0*/0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3E, 0x12,
+ 0x07, 0x53, 0xE5, 0x3F, 0xF0, 0x22, 0x80, 0x3B,
+/*12D0*/0xE5, 0x65, 0x45, 0x64, 0x70, 0x11, 0x12, 0x07,
+ 0x5F, 0x40, 0x05, 0x12, 0x08, 0x9E, 0x80, 0x1F,
+/*12E0*/0x12, 0x07, 0x3E, 0xE5, 0x41, 0xF0, 0x22, 0xE5,
+ 0x3C, 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3C,
+/*12F0*/0x38, 0xE5, 0x3C, 0x60, 0x05, 0x85, 0x3D, 0x39,
+ 0x80, 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12,
+/*1300*/0x07, 0xA8, 0xE5, 0x3C, 0x12, 0x07, 0x53, 0xE5,
+ 0x3D, 0xF0, 0x22, 0x12, 0x07, 0x9F, 0xE5, 0x38,
+/*1310*/0x12, 0x07, 0x53, 0xE5, 0x39, 0xF0, 0x22, 0x8C,
+ 0x63, 0x8D, 0x64, 0x12, 0x08, 0xDA, 0x40, 0x3C,
+/*1320*/0xE5, 0x64, 0x45, 0x63, 0x70, 0x10, 0x12, 0x09,
+ 0x04, 0xC3, 0xE5, 0x3E, 0x12, 0x07, 0x69, 0x40,
+/*1330*/0x3B, 0x12, 0x08, 0x95, 0x80, 0x18, 0xE5, 0x3E,
+ 0xC3, 0x95, 0x38, 0x40, 0x1D, 0x85, 0x3E, 0x38,
+/*1340*/0xE5, 0x3E, 0x60, 0x05, 0x85, 0x3F, 0x39, 0x80,
+ 0x03, 0x85, 0x39, 0x39, 0x8F, 0x3A, 0x12, 0x07,
+/*1350*/0xA8, 0xE5, 0x3E, 0x12, 0x07, 0x53, 0xE5, 0x3F,
+ 0xF0, 0x22, 0x80, 0x3B, 0xE5, 0x64, 0x45, 0x63,
+/*1360*/0x70, 0x11, 0x12, 0x07, 0x5F, 0x40, 0x05, 0x12,
+ 0x08, 0x9E, 0x80, 0x1F, 0x12, 0x07, 0x3E, 0xE5,
+/*1370*/0x41, 0xF0, 0x22, 0xE5, 0x3C, 0xC3, 0x95, 0x38,
+ 0x40, 0x1D, 0x85, 0x3C, 0x38, 0xE5, 0x3C, 0x60,
+/*1380*/0x05, 0x85, 0x3D, 0x39, 0x80, 0x03, 0x85, 0x39,
+ 0x39, 0x8F, 0x3A, 0x12, 0x07, 0xA8, 0xE5, 0x3C,
+/*1390*/0x12, 0x07, 0x53, 0xE5, 0x3D, 0xF0, 0x22, 0x12,
+ 0x07, 0x9F, 0xE5, 0x38, 0x12, 0x07, 0x53, 0xE5,
+/*13A0*/0x39, 0xF0, 0x22, 0xE5, 0x0D, 0xFE, 0xE5, 0x08,
+ 0x8E, 0x54, 0x44, 0x05, 0xF5, 0x55, 0x75, 0x15,
+/*13B0*/0x0F, 0xF5, 0x82, 0x12, 0x0E, 0x7A, 0x12, 0x17,
+ 0xA3, 0x20, 0x31, 0x05, 0x75, 0x15, 0x03, 0x80,
+/*13C0*/0x03, 0x75, 0x15, 0x0B, 0xE5, 0x0A, 0xC3, 0x94,
+ 0x01, 0x50, 0x38, 0x12, 0x14, 0x20, 0x20, 0x31,
+/*13D0*/0x06, 0x05, 0x15, 0x05, 0x15, 0x80, 0x04, 0x15,
+ 0x15, 0x15, 0x15, 0xE5, 0x0A, 0xC3, 0x94, 0x01,
+/*13E0*/0x50, 0x21, 0x12, 0x14, 0x20, 0x20, 0x31, 0x04,
+ 0x05, 0x15, 0x80, 0x02, 0x15, 0x15, 0xE5, 0x0A,
+/*13F0*/0xC3, 0x94, 0x01, 0x50, 0x0E, 0x12, 0x0E, 0x77,
+ 0x12, 0x17, 0xA3, 0x20, 0x31, 0x05, 0x05, 0x15,
+/*1400*/0x12, 0x0E, 0x77, 0xE5, 0x15, 0xB4, 0x08, 0x04,
+ 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x15,
+/*1410*/0xB4, 0x07, 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E,
+ 0x00, 0xEE, 0x4F, 0x60, 0x02, 0x05, 0x7F, 0x22,
+/*1420*/0x85, 0x55, 0x82, 0x85, 0x54, 0x83, 0xE5, 0x15,
+ 0xF0, 0x12, 0x17, 0xA3, 0x22, 0x12, 0x07, 0x2A,
+/*1430*/0x75, 0x83, 0xAE, 0x74, 0xFF, 0x12, 0x07, 0x29,
+ 0xE0, 0x54, 0x1A, 0xF5, 0x34, 0xE0, 0xC4, 0x13,
+/*1440*/0x54, 0x07, 0xF5, 0x35, 0x24, 0xFE, 0x60, 0x24,
+ 0x24, 0xFE, 0x60, 0x3C, 0x24, 0x04, 0x70, 0x63,
+/*1450*/0x75, 0x31, 0x2D, 0xE5, 0x08, 0xFD, 0x74, 0xB6,
+ 0x12, 0x07, 0x92, 0x74, 0xBC, 0x90, 0x07, 0x22,
+/*1460*/0x12, 0x07, 0x95, 0x74, 0x90, 0x12, 0x07, 0xB3,
+ 0x74, 0x92, 0x80, 0x3C, 0x75, 0x31, 0x3A, 0xE5,
+/*1470*/0x08, 0xFD, 0x74, 0xBA, 0x12, 0x07, 0x92, 0x74,
+ 0xC0, 0x90, 0x07, 0x22, 0x12, 0x07, 0xB6, 0x74,
+/*1480*/0xC4, 0x12, 0x07, 0xB3, 0x74, 0xC8, 0x80, 0x20,
+ 0x75, 0x31, 0x35, 0xE5, 0x08, 0xFD, 0x74, 0xB8,
+/*1490*/0x12, 0x07, 0x92, 0x74, 0xBE, 0xFF, 0xED, 0x44,
+ 0x07, 0x90, 0x07, 0x22, 0xCF, 0xF0, 0xA3, 0xEF,
+/*14A0*/0xF0, 0x74, 0xC2, 0x12, 0x07, 0xB3, 0x74, 0xC6,
+ 0xFF, 0xED, 0x44, 0x07, 0xA3, 0xCF, 0xF0, 0xA3,
+/*14B0*/0xEF, 0xF0, 0x22, 0x75, 0x34, 0x01, 0x22, 0x8E,
+ 0x58, 0x8F, 0x59, 0x8C, 0x5A, 0x8D, 0x5B, 0x8A,
+/*14C0*/0x5C, 0x8B, 0x5D, 0x75, 0x5E, 0x01, 0xE4, 0xF5,
+ 0x5F, 0x12, 0x1E, 0xA5, 0x85, 0x59, 0x5E, 0xD3,
+/*14D0*/0xE5, 0x5E, 0x95, 0x5B, 0xE5, 0x5A, 0x12, 0x07,
+ 0x6B, 0x50, 0x57, 0xE5, 0x5D, 0x45, 0x5C, 0x70,
+/*14E0*/0x30, 0x12, 0x07, 0x2A, 0x75, 0x83, 0x92, 0xE5,
+ 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE5,
+/*14F0*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC8, 0xE5,
+ 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0x90, 0xE5,
+/*1500*/0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2, 0xE5,
+ 0x5E, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0x80,
+/*1510*/0x03, 0x12, 0x07, 0x32, 0xE5, 0x5E, 0xF0, 0xAF,
+ 0x5F, 0x7E, 0x00, 0xAD, 0x5D, 0xAC, 0x5C, 0x12,
+/*1520*/0x04, 0x44, 0xAF, 0x5E, 0x7E, 0x00, 0xAD, 0x5D,
+ 0xAC, 0x5C, 0x12, 0x0B, 0xD1, 0x05, 0x5E, 0x02,
+/*1530*/0x14, 0xCF, 0xAB, 0x5D, 0xAA, 0x5C, 0xAD, 0x5B,
+ 0xAC, 0x5A, 0xAF, 0x59, 0xAE, 0x58, 0x02, 0x1B,
+/*1540*/0xFB, 0x8C, 0x5C, 0x8D, 0x5D, 0x8A, 0x5E, 0x8B,
+ 0x5F, 0x75, 0x60, 0x01, 0xE4, 0xF5, 0x61, 0xF5,
+/*1550*/0x62, 0xF5, 0x63, 0x12, 0x1E, 0xA5, 0x8F, 0x60,
+ 0xD3, 0xE5, 0x60, 0x95, 0x5D, 0xE5, 0x5C, 0x12,
+/*1560*/0x07, 0x6B, 0x50, 0x61, 0xE5, 0x5F, 0x45, 0x5E,
+ 0x70, 0x27, 0x12, 0x07, 0x2A, 0x75, 0x83, 0xB6,
+/*1570*/0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xB8,
+ 0xE5, 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0xBA,
+/*1580*/0xE5, 0x60, 0xF0, 0xAF, 0x61, 0x7E, 0x00, 0xE5,
+ 0x62, 0x12, 0x08, 0x7A, 0x12, 0x0A, 0xFF, 0x80,
+/*1590*/0x19, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE5,
+ 0x60, 0x12, 0x07, 0x29, 0x75, 0x83, 0x8E, 0xE4,
+/*15A0*/0x12, 0x07, 0x29, 0x74, 0x01, 0x12, 0x07, 0x29,
+ 0xE4, 0xF0, 0xAF, 0x63, 0x7E, 0x00, 0xAD, 0x5F,
+/*15B0*/0xAC, 0x5E, 0x12, 0x04, 0x44, 0xAF, 0x60, 0x7E,
+ 0x00, 0xAD, 0x5F, 0xAC, 0x5E, 0x12, 0x12, 0x8B,
+/*15C0*/0x05, 0x60, 0x02, 0x15, 0x58, 0x22, 0x90, 0x11,
+ 0x4D, 0xE4, 0x93, 0x90, 0x07, 0x2E, 0xF0, 0x12,
+/*15D0*/0x08, 0x1F, 0x75, 0x83, 0xAE, 0xE0, 0x54, 0x1A,
+ 0xF5, 0x34, 0x70, 0x67, 0xEF, 0x44, 0x07, 0xF5,
+/*15E0*/0x82, 0x75, 0x83, 0xCE, 0xE0, 0xFF, 0x13, 0x13,
+ 0x13, 0x54, 0x07, 0xF5, 0x36, 0x54, 0x0F, 0xD3,
+/*15F0*/0x94, 0x00, 0x40, 0x06, 0x12, 0x14, 0x2D, 0x12,
+ 0x1B, 0xA9, 0xE5, 0x36, 0x54, 0x0F, 0x24, 0xFE,
+/*1600*/0x60, 0x0C, 0x14, 0x60, 0x0C, 0x14, 0x60, 0x19,
+ 0x24, 0x03, 0x70, 0x37, 0x80, 0x10, 0x02, 0x1E,
+/*1610*/0x91, 0x12, 0x1E, 0x91, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0xCE, 0xE0, 0x54, 0xEF, 0xF0, 0x02, 0x1D,
+/*1620*/0xAE, 0x12, 0x10, 0x14, 0xE4, 0xF5, 0x55, 0x12,
+ 0x1D, 0x85, 0x05, 0x55, 0xE5, 0x55, 0xC3, 0x94,
+/*1630*/0x05, 0x40, 0xF4, 0x12, 0x07, 0x2A, 0x75, 0x83,
+ 0xCE, 0xE0, 0x54, 0xC7, 0x12, 0x07, 0x29, 0xE0,
+/*1640*/0x44, 0x08, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
+ 0x59, 0xAF, 0x08, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+/*1650*/0x75, 0x83, 0xD0, 0xE0, 0xFD, 0xC4, 0x54, 0x0F,
+ 0xF5, 0x5A, 0xEF, 0x44, 0x07, 0xF5, 0x82, 0x75,
+/*1660*/0x83, 0x80, 0x74, 0x01, 0xF0, 0x12, 0x08, 0x21,
+ 0x75, 0x83, 0x82, 0xE5, 0x45, 0xF0, 0xEF, 0x44,
+/*1670*/0x07, 0xF5, 0x82, 0x75, 0x83, 0x8A, 0x74, 0xFF,
+ 0xF0, 0x12, 0x1A, 0x4D, 0x12, 0x07, 0x2A, 0x75,
+/*1680*/0x83, 0xBC, 0xE0, 0x54, 0xEF, 0x12, 0x07, 0x29,
+ 0x75, 0x83, 0xBE, 0xE0, 0x54, 0xEF, 0x12, 0x07,
+/*1690*/0x29, 0x75, 0x83, 0xC0, 0xE0, 0x54, 0xEF, 0x12,
+ 0x07, 0x29, 0x75, 0x83, 0xBC, 0xE0, 0x44, 0x10,
+/*16A0*/0x12, 0x07, 0x29, 0x75, 0x83, 0xBE, 0xE0, 0x44,
+ 0x10, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC0, 0xE0,
+/*16B0*/0x44, 0x10, 0xF0, 0xAF, 0x58, 0xE5, 0x59, 0x12,
+ 0x08, 0x78, 0x02, 0x0A, 0xFF, 0xE4, 0xF5, 0x58,
+/*16C0*/0x7D, 0x01, 0xF5, 0x59, 0xAF, 0x35, 0xFE, 0xFC,
+ 0x12, 0x09, 0x15, 0x12, 0x07, 0x2A, 0x75, 0x83,
+/*16D0*/0xB6, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+ 0xB8, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*16E0*/0xBA, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+ 0xBC, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*16F0*/0xBE, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+ 0xC0, 0x74, 0x10, 0x12, 0x07, 0x29, 0x75, 0x83,
+/*1700*/0x90, 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC2,
+ 0xE4, 0x12, 0x07, 0x29, 0x75, 0x83, 0xC4, 0xE4,
+/*1710*/0x12, 0x07, 0x29, 0x75, 0x83, 0x92, 0xE4, 0x12,
+ 0x07, 0x29, 0x75, 0x83, 0xC6, 0xE4, 0x12, 0x07,
+/*1720*/0x29, 0x75, 0x83, 0xC8, 0xE4, 0xF0, 0xAF, 0x58,
+ 0xFE, 0xE5, 0x59, 0x12, 0x08, 0x7A, 0x02, 0x0A,
+/*1730*/0xFF, 0xE5, 0xE2, 0x30, 0xE4, 0x6C, 0xE5, 0xE7,
+ 0x54, 0xC0, 0x64, 0x40, 0x70, 0x64, 0xE5, 0x09,
+/*1740*/0xC4, 0x54, 0x30, 0xFE, 0xE5, 0x08, 0x25, 0xE0,
+ 0x25, 0xE0, 0x54, 0xC0, 0x4E, 0xFE, 0xEF, 0x54,
+/*1750*/0x3F, 0x4E, 0xFD, 0xE5, 0x2B, 0xAE, 0x2A, 0x78,
+ 0x02, 0xC3, 0x33, 0xCE, 0x33, 0xCE, 0xD8, 0xF9,
+/*1760*/0xF5, 0x82, 0x8E, 0x83, 0xED, 0xF0, 0xE5, 0x2B,
+ 0xAE, 0x2A, 0x78, 0x02, 0xC3, 0x33, 0xCE, 0x33,
+/*1770*/0xCE, 0xD8, 0xF9, 0xFF, 0xF5, 0x82, 0x8E, 0x83,
+ 0xA3, 0xE5, 0xFE, 0xF0, 0x8F, 0x82, 0x8E, 0x83,
+/*1780*/0xA3, 0xA3, 0xE5, 0xFD, 0xF0, 0x8F, 0x82, 0x8E,
+ 0x83, 0xA3, 0xA3, 0xA3, 0xE5, 0xFC, 0xF0, 0xC3,
+/*1790*/0xE5, 0x2B, 0x94, 0xFA, 0xE5, 0x2A, 0x94, 0x00,
+ 0x50, 0x08, 0x05, 0x2B, 0xE5, 0x2B, 0x70, 0x02,
+/*17A0*/0x05, 0x2A, 0x22, 0xE4, 0xFF, 0xE4, 0xF5, 0x58,
+ 0xF5, 0x56, 0xF5, 0x57, 0x74, 0x82, 0xFC, 0x12,
+/*17B0*/0x0E, 0x04, 0x8C, 0x83, 0xE0, 0xF5, 0x10, 0x54,
+ 0x7F, 0xF0, 0xE5, 0x10, 0x44, 0x80, 0x12, 0x0E,
+/*17C0*/0x98, 0xED, 0xF0, 0x7E, 0x0A, 0x12, 0x0E, 0x04,
+ 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0, 0x26, 0xDE,
+/*17D0*/0xF4, 0x05, 0x57, 0xE5, 0x57, 0x70, 0x02, 0x05,
+ 0x56, 0xE5, 0x14, 0x24, 0x01, 0xFD, 0xE4, 0x33,
+/*17E0*/0xFC, 0xD3, 0xE5, 0x57, 0x9D, 0xE5, 0x56, 0x9C,
+ 0x40, 0xD9, 0xE5, 0x0A, 0x94, 0x20, 0x50, 0x02,
+/*17F0*/0x05, 0x0A, 0x43, 0xE1, 0x08, 0xC2, 0x31, 0x12,
+ 0x0E, 0x04, 0x75, 0x83, 0xA6, 0xE0, 0x55, 0x12,
+/*1800*/0x65, 0x12, 0x70, 0x03, 0xD2, 0x31, 0x22, 0xC2,
+ 0x31, 0x22, 0x90, 0x07, 0x26, 0xE0, 0xFA, 0xA3,
+/*1810*/0xE0, 0xF5, 0x82, 0x8A, 0x83, 0xE0, 0xF5, 0x41,
+ 0xE5, 0x39, 0xC3, 0x95, 0x41, 0x40, 0x26, 0xE5,
+/*1820*/0x39, 0x95, 0x41, 0xC3, 0x9F, 0xEE, 0x12, 0x07,
+ 0x6B, 0x40, 0x04, 0x7C, 0x01, 0x80, 0x02, 0x7C,
+/*1830*/0x00, 0xE5, 0x41, 0x64, 0x3F, 0x60, 0x04, 0x7B,
+ 0x01, 0x80, 0x02, 0x7B, 0x00, 0xEC, 0x5B, 0x60,
+/*1840*/0x29, 0x05, 0x41, 0x80, 0x28, 0xC3, 0xE5, 0x41,
+ 0x95, 0x39, 0xC3, 0x9F, 0xEE, 0x12, 0x07, 0x6B,
+/*1850*/0x40, 0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00,
+ 0xE5, 0x41, 0x60, 0x04, 0x7E, 0x01, 0x80, 0x02,
+/*1860*/0x7E, 0x00, 0xEF, 0x5E, 0x60, 0x04, 0x15, 0x41,
+ 0x80, 0x03, 0x85, 0x39, 0x41, 0x85, 0x3A, 0x40,
+/*1870*/0x22, 0xE5, 0xE2, 0x30, 0xE4, 0x60, 0xE5, 0xE1,
+ 0x30, 0xE2, 0x5B, 0xE5, 0x09, 0x70, 0x04, 0x7F,
+/*1880*/0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5, 0x08, 0x70,
+ 0x04, 0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE,
+/*1890*/0x5F, 0x60, 0x43, 0x53, 0xF9, 0xF8, 0xE5, 0xE2,
+ 0x30, 0xE4, 0x3B, 0xE5, 0xE1, 0x30, 0xE2, 0x2E,
+/*18A0*/0x43, 0xFA, 0x02, 0x53, 0xFA, 0xFB, 0xE4, 0xF5,
+ 0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0xE5,
+/*18B0*/0xE1, 0x30, 0xE2, 0xE7, 0x90, 0x94, 0x70, 0xE0,
+ 0x65, 0x10, 0x60, 0x03, 0x43, 0xFA, 0x04, 0x05,
+/*18C0*/0x10, 0x90, 0x94, 0x70, 0xE5, 0x10, 0xF0, 0x70,
+ 0xE6, 0x12, 0x00, 0x06, 0x80, 0xE1, 0x53, 0xFA,
+/*18D0*/0xFD, 0x53, 0xFA, 0xFB, 0x80, 0xC0, 0x22, 0x8F,
+ 0x54, 0x12, 0x00, 0x06, 0xE5, 0xE1, 0x30, 0xE0,
+/*18E0*/0x04, 0x7F, 0x01, 0x80, 0x02, 0x7F, 0x00, 0xE5,
+ 0x7E, 0xD3, 0x94, 0x05, 0x40, 0x04, 0x7E, 0x01,
+/*18F0*/0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F, 0x60, 0x3D,
+ 0x85, 0x54, 0x11, 0xE5, 0xE2, 0x20, 0xE1, 0x32,
+/*1900*/0x74, 0xCE, 0x12, 0x1A, 0x05, 0x30, 0xE7, 0x04,
+ 0x7D, 0x01, 0x80, 0x02, 0x7D, 0x00, 0x8F, 0x82,
+/*1910*/0x8E, 0x83, 0xE0, 0x30, 0xE6, 0x04, 0x7F, 0x01,
+ 0x80, 0x02, 0x7F, 0x00, 0xEF, 0x5D, 0x70, 0x15,
+/*1920*/0x12, 0x15, 0xC6, 0x74, 0xCE, 0x12, 0x1A, 0x05,
+ 0x30, 0xE6, 0x07, 0xE0, 0x44, 0x80, 0xF0, 0x43,
+/*1930*/0xF9, 0x80, 0x12, 0x18, 0x71, 0x22, 0x12, 0x0E,
+ 0x44, 0xE5, 0x16, 0x25, 0xE0, 0x25, 0xE0, 0x24,
+/*1940*/0xB0, 0xF5, 0x82, 0xE4, 0x34, 0x1A, 0xF5, 0x83,
+ 0xE4, 0x93, 0xF5, 0x0F, 0xE5, 0x16, 0x25, 0xE0,
+/*1950*/0x25, 0xE0, 0x24, 0xB1, 0xF5, 0x82, 0xE4, 0x34,
+ 0x1A, 0xF5, 0x83, 0xE4, 0x93, 0xF5, 0x0E, 0x12,
+/*1960*/0x0E, 0x65, 0xF5, 0x10, 0xE5, 0x0F, 0x54, 0xF0,
+ 0x12, 0x0E, 0x17, 0x75, 0x83, 0x8C, 0xEF, 0xF0,
+/*1970*/0xE5, 0x0F, 0x30, 0xE0, 0x0C, 0x12, 0x0E, 0x04,
+ 0x75, 0x83, 0x86, 0xE0, 0x44, 0x40, 0xF0, 0x80,
+/*1980*/0x0A, 0x12, 0x0E, 0x04, 0x75, 0x83, 0x86, 0xE0,
+ 0x54, 0xBF, 0xF0, 0x12, 0x0E, 0x91, 0x75, 0x83,
+/*1990*/0x82, 0xE5, 0x0E, 0xF0, 0x22, 0x7F, 0x05, 0x12,
+ 0x17, 0x31, 0x12, 0x0E, 0x04, 0x12, 0x0E, 0x33,
+/*19A0*/0x74, 0x02, 0xF0, 0x74, 0x8E, 0xFE, 0x12, 0x0E,
+ 0x04, 0x12, 0x0E, 0x0B, 0xEF, 0xF0, 0x75, 0x15,
+/*19B0*/0x70, 0x12, 0x0F, 0xF7, 0x20, 0x34, 0x05, 0x75,
+ 0x15, 0x10, 0x80, 0x03, 0x75, 0x15, 0x50, 0x12,
+/*19C0*/0x0F, 0xF7, 0x20, 0x34, 0x04, 0x74, 0x10, 0x80,
+ 0x02, 0x74, 0xF0, 0x25, 0x15, 0xF5, 0x15, 0x12,
+/*19D0*/0x0E, 0x21, 0xEF, 0xF0, 0x12, 0x10, 0x91, 0x20,
+ 0x34, 0x17, 0xE5, 0x15, 0x64, 0x30, 0x60, 0x0C,
+/*19E0*/0x74, 0x10, 0x25, 0x15, 0xF5, 0x15, 0xB4, 0x80,
+ 0x03, 0xE4, 0xF5, 0x15, 0x12, 0x0E, 0x21, 0xEF,
+/*19F0*/0xF0, 0x22, 0xF0, 0xE5, 0x0B, 0x25, 0xE0, 0x25,
+ 0xE0, 0x24, 0x82, 0xF5, 0x82, 0xE4, 0x34, 0x07,
+/*1A00*/0xF5, 0x83, 0x22, 0x74, 0x88, 0xFE, 0xE5, 0x08,
+ 0x44, 0x07, 0xFF, 0xF5, 0x82, 0x8E, 0x83, 0xE0,
+/*1A10*/0x22, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0xF5, 0x82,
+ 0x22, 0xF0, 0xE0, 0x54, 0xC0, 0x8F, 0x82, 0x8E,
+/*1A20*/0x83, 0xF0, 0x22, 0xEF, 0x44, 0x07, 0xF5, 0x82,
+ 0x75, 0x83, 0x86, 0xE0, 0x54, 0x10, 0xD3, 0x94,
+/*1A30*/0x00, 0x22, 0xF0, 0x90, 0x07, 0x15, 0xE0, 0x04,
+ 0xF0, 0x22, 0x44, 0x06, 0xF5, 0x82, 0x75, 0x83,
+/*1A40*/0x9E, 0xE0, 0x22, 0xFE, 0xEF, 0x44, 0x07, 0xF5,
+ 0x82, 0x8E, 0x83, 0xE0, 0x22, 0xE4, 0x90, 0x07,
+/*1A50*/0x2A, 0xF0, 0xA3, 0xF0, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0x82, 0xE0, 0x54, 0x7F, 0x12, 0x07, 0x29,
+/*1A60*/0xE0, 0x44, 0x80, 0xF0, 0x12, 0x10, 0xFC, 0x12,
+ 0x08, 0x1F, 0x75, 0x83, 0xA0, 0xE0, 0x20, 0xE0,
+/*1A70*/0x1A, 0x90, 0x07, 0x2B, 0xE0, 0x04, 0xF0, 0x70,
+ 0x06, 0x90, 0x07, 0x2A, 0xE0, 0x04, 0xF0, 0x90,
+/*1A80*/0x07, 0x2A, 0xE0, 0xB4, 0x10, 0xE1, 0xA3, 0xE0,
+ 0xB4, 0x00, 0xDC, 0xEE, 0x44, 0xA6, 0xFC, 0xEF,
+/*1A90*/0x44, 0x07, 0xF5, 0x82, 0x8C, 0x83, 0xE0, 0xF5,
+ 0x32, 0xEE, 0x44, 0xA8, 0xFE, 0xEF, 0x44, 0x07,
+/*1AA0*/0xF5, 0x82, 0x8E, 0x83, 0xE0, 0xF5, 0x33, 0x22,
+ 0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x90,
+/*1AB0*/0x00, 0x20, 0x0F, 0x92, 0x00, 0x21, 0x0F, 0x94,
+ 0x00, 0x22, 0x0F, 0x96, 0x00, 0x23, 0x0F, 0x98,
+/*1AC0*/0x00, 0x24, 0x0F, 0x9A, 0x00, 0x25, 0x0F, 0x9C,
+ 0x00, 0x26, 0x0F, 0x9E, 0x00, 0x27, 0x0F, 0xA0,
+/*1AD0*/0x01, 0x20, 0x01, 0xA2, 0x01, 0x21, 0x01, 0xA4,
+ 0x01, 0x22, 0x01, 0xA6, 0x01, 0x23, 0x01, 0xA8,
+/*1AE0*/0x01, 0x24, 0x01, 0xAA, 0x01, 0x25, 0x01, 0xAC,
+ 0x01, 0x26, 0x01, 0xAE, 0x01, 0x27, 0x01, 0xB0,
+/*1AF0*/0x01, 0x28, 0x01, 0xB4, 0x00, 0x28, 0x0F, 0xB6,
+ 0x40, 0x28, 0x0F, 0xB8, 0x61, 0x28, 0x01, 0xCB,
+/*1B00*/0xEF, 0xCB, 0xCA, 0xEE, 0xCA, 0x7F, 0x01, 0xE4,
+ 0xFD, 0xEB, 0x4A, 0x70, 0x24, 0xE5, 0x08, 0xF5,
+/*1B10*/0x82, 0x74, 0xB6, 0x12, 0x08, 0x29, 0xE5, 0x08,
+ 0xF5, 0x82, 0x74, 0xB8, 0x12, 0x08, 0x29, 0xE5,
+/*1B20*/0x08, 0xF5, 0x82, 0x74, 0xBA, 0x12, 0x08, 0x29,
+ 0x7E, 0x00, 0x7C, 0x00, 0x12, 0x0A, 0xFF, 0x80,
+/*1B30*/0x12, 0x90, 0x07, 0x26, 0x12, 0x07, 0x35, 0xE5,
+ 0x41, 0xF0, 0x90, 0x07, 0x24, 0x12, 0x07, 0x35,
+/*1B40*/0xE5, 0x40, 0xF0, 0x12, 0x07, 0x2A, 0x75, 0x83,
+ 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74, 0x01, 0x12,
+/*1B50*/0x07, 0x29, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x26,
+ 0xF5, 0x27, 0x53, 0xE1, 0xFE, 0xF5, 0x2A, 0x75,
+/*1B60*/0x2B, 0x01, 0xF5, 0x08, 0x7F, 0x01, 0x12, 0x17,
+ 0x31, 0x30, 0x30, 0x1C, 0x90, 0x1A, 0xA9, 0xE4,
+/*1B70*/0x93, 0xF5, 0x10, 0x90, 0x1F, 0xF9, 0xE4, 0x93,
+ 0xF5, 0x10, 0x90, 0x00, 0x41, 0xE4, 0x93, 0xF5,
+/*1B80*/0x10, 0x90, 0x1E, 0xCA, 0xE4, 0x93, 0xF5, 0x10,
+ 0x7F, 0x02, 0x12, 0x17, 0x31, 0x12, 0x0F, 0x54,
+/*1B90*/0x7F, 0x03, 0x12, 0x17, 0x31, 0x12, 0x00, 0x06,
+ 0xE5, 0xE2, 0x30, 0xE7, 0x09, 0x12, 0x10, 0x00,
+/*1BA0*/0x30, 0x30, 0x03, 0x12, 0x11, 0x00, 0x02, 0x00,
+ 0x47, 0x12, 0x08, 0x1F, 0x75, 0x83, 0xD0, 0xE0,
+/*1BB0*/0xC4, 0x54, 0x0F, 0xFD, 0x75, 0x43, 0x01, 0x75,
+ 0x44, 0xFF, 0x12, 0x08, 0xAA, 0x74, 0x04, 0xF0,
+/*1BC0*/0x75, 0x3B, 0x01, 0xED, 0x14, 0x60, 0x0C, 0x14,
+ 0x60, 0x0B, 0x14, 0x60, 0x0F, 0x24, 0x03, 0x70,
+/*1BD0*/0x0B, 0x80, 0x09, 0x80, 0x00, 0x12, 0x08, 0xA7,
+ 0x04, 0xF0, 0x80, 0x06, 0x12, 0x08, 0xA7, 0x74,
+/*1BE0*/0x04, 0xF0, 0xEE, 0x44, 0x82, 0xFE, 0xEF, 0x44,
+ 0x07, 0xF5, 0x82, 0x8E, 0x83, 0xE5, 0x45, 0x12,
+/*1BF0*/0x08, 0xBE, 0x75, 0x83, 0x82, 0xE5, 0x31, 0xF0,
+ 0x02, 0x11, 0x4C, 0x8E, 0x60, 0x8F, 0x61, 0x12,
+/*1C00*/0x1E, 0xA5, 0xE4, 0xFF, 0xCE, 0xED, 0xCE, 0xEE,
+ 0xD3, 0x95, 0x61, 0xE5, 0x60, 0x12, 0x07, 0x6B,
+/*1C10*/0x40, 0x39, 0x74, 0x20, 0x2E, 0xF5, 0x82, 0xE4,
+ 0x34, 0x03, 0xF5, 0x83, 0xE0, 0x70, 0x03, 0xFF,
+/*1C20*/0x80, 0x26, 0x12, 0x08, 0xE2, 0xFD, 0xC3, 0x9F,
+ 0x40, 0x1E, 0xCF, 0xED, 0xCF, 0xEB, 0x4A, 0x70,
+/*1C30*/0x0B, 0x8D, 0x42, 0x12, 0x08, 0xEE, 0xF5, 0x41,
+ 0x8E, 0x40, 0x80, 0x0C, 0x12, 0x08, 0xE2, 0xF5,
+/*1C40*/0x38, 0x12, 0x08, 0xEE, 0xF5, 0x39, 0x8E, 0x3A,
+ 0x1E, 0x80, 0xBC, 0x22, 0x75, 0x58, 0x01, 0xE5,
+/*1C50*/0x35, 0x70, 0x0C, 0x12, 0x07, 0xCC, 0xE0, 0xF5,
+ 0x4A, 0x12, 0x07, 0xD8, 0xE0, 0xF5, 0x4C, 0xE5,
+/*1C60*/0x35, 0xB4, 0x04, 0x0C, 0x12, 0x07, 0xE4, 0xE0,
+ 0xF5, 0x4A, 0x12, 0x07, 0xF0, 0xE0, 0xF5, 0x4C,
+/*1C70*/0xE5, 0x35, 0xB4, 0x01, 0x04, 0x7F, 0x01, 0x80,
+ 0x02, 0x7F, 0x00, 0xE5, 0x35, 0xB4, 0x02, 0x04,
+/*1C80*/0x7E, 0x01, 0x80, 0x02, 0x7E, 0x00, 0xEE, 0x4F,
+ 0x60, 0x0C, 0x12, 0x07, 0xFC, 0xE0, 0xF5, 0x4A,
+/*1C90*/0x12, 0x08, 0x08, 0xE0, 0xF5, 0x4C, 0x85, 0x41,
+ 0x49, 0x85, 0x40, 0x4B, 0x22, 0x75, 0x5B, 0x01,
+/*1CA0*/0x90, 0x07, 0x24, 0x12, 0x07, 0x35, 0xE0, 0x54,
+ 0x1F, 0xFF, 0xD3, 0x94, 0x02, 0x50, 0x04, 0x8F,
+/*1CB0*/0x58, 0x80, 0x05, 0xEF, 0x24, 0xFE, 0xF5, 0x58,
+ 0xEF, 0xC3, 0x94, 0x18, 0x40, 0x05, 0x75, 0x59,
+/*1CC0*/0x18, 0x80, 0x04, 0xEF, 0x04, 0xF5, 0x59, 0x85,
+ 0x43, 0x5A, 0xAF, 0x58, 0x7E, 0x00, 0xAD, 0x59,
+/*1CD0*/0x7C, 0x00, 0xAB, 0x5B, 0x7A, 0x00, 0x12, 0x15,
+ 0x41, 0xAF, 0x5A, 0x7E, 0x00, 0x12, 0x18, 0x0A,
+/*1CE0*/0xAF, 0x5B, 0x7E, 0x00, 0x02, 0x1A, 0xFF, 0xE5,
+ 0xE2, 0x30, 0xE7, 0x0E, 0x12, 0x10, 0x03, 0xC2,
+/*1CF0*/0x30, 0x30, 0x30, 0x03, 0x12, 0x10, 0xFF, 0x20,
+ 0x33, 0x28, 0xE5, 0xE7, 0x30, 0xE7, 0x05, 0x12,
+/*1D00*/0x0E, 0xA2, 0x80, 0x0D, 0xE5, 0xFE, 0xC3, 0x94,
+ 0x20, 0x50, 0x06, 0x12, 0x0E, 0xA2, 0x43, 0xF9,
+/*1D10*/0x08, 0xE5, 0xF2, 0x30, 0xE7, 0x03, 0x53, 0xF9,
+ 0x7F, 0xE5, 0xF1, 0x54, 0x70, 0xD3, 0x94, 0x00,
+/*1D20*/0x50, 0xD8, 0x22, 0x12, 0x0E, 0x04, 0x75, 0x83,
+ 0x80, 0xE4, 0xF0, 0xE5, 0x08, 0x44, 0x07, 0x12,
+/*1D30*/0x0D, 0xFD, 0x75, 0x83, 0x84, 0x12, 0x0E, 0x02,
+ 0x75, 0x83, 0x86, 0x12, 0x0E, 0x02, 0x75, 0x83,
+/*1D40*/0x8C, 0xE0, 0x54, 0xF3, 0x12, 0x0E, 0x03, 0x75,
+ 0x83, 0x8E, 0x12, 0x0E, 0x02, 0x75, 0x83, 0x94,
+/*1D50*/0xE0, 0x54, 0xFB, 0xF0, 0x22, 0x12, 0x07, 0x2A,
+ 0x75, 0x83, 0x8E, 0xE4, 0x12, 0x07, 0x29, 0x74,
+/*1D60*/0x01, 0x12, 0x07, 0x29, 0xE4, 0x12, 0x08, 0xBE,
+ 0x75, 0x83, 0x8C, 0xE0, 0x44, 0x20, 0x12, 0x08,
+/*1D70*/0xBE, 0xE0, 0x54, 0xDF, 0xF0, 0x74, 0x84, 0x85,
+ 0x08, 0x82, 0xF5, 0x83, 0xE0, 0x54, 0x7F, 0xF0,
+/*1D80*/0xE0, 0x44, 0x80, 0xF0, 0x22, 0x75, 0x56, 0x01,
+ 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE, 0xFC,
+/*1D90*/0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12, 0x1E,
+ 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E, 0x00,
+/*1DA0*/0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44, 0xAF,
+ 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0x75, 0x56,
+/*1DB0*/0x01, 0xE4, 0xFD, 0xF5, 0x57, 0xAF, 0x35, 0xFE,
+ 0xFC, 0x12, 0x09, 0x15, 0x12, 0x1C, 0x9D, 0x12,
+/*1DC0*/0x1E, 0x7A, 0x12, 0x1C, 0x4C, 0xAF, 0x57, 0x7E,
+ 0x00, 0xAD, 0x56, 0x7C, 0x00, 0x12, 0x04, 0x44,
+/*1DD0*/0xAF, 0x56, 0x7E, 0x00, 0x02, 0x11, 0xEE, 0xE4,
+ 0xF5, 0x16, 0x12, 0x0E, 0x44, 0xFE, 0xE5, 0x08,
+/*1DE0*/0x44, 0x05, 0xFF, 0x12, 0x0E, 0x65, 0x8F, 0x82,
+ 0x8E, 0x83, 0xF0, 0x05, 0x16, 0xE5, 0x16, 0xC3,
+/*1DF0*/0x94, 0x14, 0x40, 0xE6, 0xE5, 0x08, 0x12, 0x0E,
+ 0x2B, 0xE4, 0xF0, 0x22, 0xE4, 0xF5, 0x58, 0xF5,
+/*1E00*/0x59, 0xF5, 0x5A, 0xFF, 0xFE, 0xAD, 0x58, 0xFC,
+ 0x12, 0x09, 0x15, 0x7F, 0x04, 0x7E, 0x00, 0xAD,
+/*1E10*/0x58, 0x7C, 0x00, 0x12, 0x09, 0x15, 0x7F, 0x02,
+ 0x7E, 0x00, 0xAD, 0x58, 0x7C, 0x00, 0x02, 0x09,
+/*1E20*/0x15, 0xE5, 0x3C, 0x25, 0x3E, 0xFC, 0xE5, 0x42,
+ 0x24, 0x00, 0xFB, 0xE4, 0x33, 0xFA, 0xEC, 0xC3,
+/*1E30*/0x9B, 0xEA, 0x12, 0x07, 0x6B, 0x40, 0x0B, 0x8C,
+ 0x42, 0xE5, 0x3D, 0x25, 0x3F, 0xF5, 0x41, 0x8F,
+/*1E40*/0x40, 0x22, 0x12, 0x09, 0x0B, 0x22, 0x74, 0x84,
+ 0xF5, 0x18, 0x85, 0x08, 0x19, 0x85, 0x19, 0x82,
+/*1E50*/0x85, 0x18, 0x83, 0xE0, 0x54, 0x7F, 0xF0, 0xE0,
+ 0x44, 0x80, 0xF0, 0xE0, 0x44, 0x80, 0xF0, 0x22,
+/*1E60*/0xEF, 0x4E, 0x70, 0x0B, 0x12, 0x07, 0x2A, 0x75,
+ 0x83, 0xD2, 0xE0, 0x54, 0xDF, 0xF0, 0x22, 0x12,
+/*1E70*/0x07, 0x2A, 0x75, 0x83, 0xD2, 0xE0, 0x44, 0x20,
+ 0xF0, 0x22, 0x75, 0x58, 0x01, 0x90, 0x07, 0x26,
+/*1E80*/0x12, 0x07, 0x35, 0xE0, 0x54, 0x3F, 0xF5, 0x41,
+ 0x12, 0x07, 0x32, 0xE0, 0x54, 0x3F, 0xF5, 0x40,
+/*1E90*/0x22, 0x75, 0x56, 0x02, 0xE4, 0xF5, 0x57, 0x12,
+ 0x1D, 0xFC, 0xAF, 0x57, 0x7E, 0x00, 0xAD, 0x56,
+/*1EA0*/0x7C, 0x00, 0x02, 0x04, 0x44, 0xE4, 0xF5, 0x42,
+ 0xF5, 0x41, 0xF5, 0x40, 0xF5, 0x38, 0xF5, 0x39,
+/*1EB0*/0xF5, 0x3A, 0x22, 0xEF, 0x54, 0x07, 0xFF, 0xE5,
+ 0xF9, 0x54, 0xF8, 0x4F, 0xF5, 0xF9, 0x22, 0x7F,
+/*1EC0*/0x01, 0xE4, 0xFE, 0x0F, 0x0E, 0xBE, 0xFF, 0xFB,
+ 0x22, 0x01, 0x20, 0x00, 0x01, 0x04, 0x20, 0x00,
+/*1ED0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1EE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1EF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F00*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F10*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F20*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F30*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F40*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F50*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F60*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F70*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F80*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1F90*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FA0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FB0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FC0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FD0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FE0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+/*1FF0*/0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x01, 0x20, 0x11, 0x00, 0x04, 0x20, 0x00, 0x81
+};
+
+int qib_sd7220_ib_load(struct qib_devdata *dd)
+{
+ return qib_sd7220_prog_ld(dd, IB_7220_SERDES, qib_sd7220_ib_img,
+ sizeof(qib_sd7220_ib_img), 0);
+}
+
+int qib_sd7220_ib_vfy(struct qib_devdata *dd)
+{
+ return qib_sd7220_prog_vfy(dd, IB_7220_SERDES, qib_sd7220_ib_img,
+ sizeof(qib_sd7220_ib_img), 0);
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 35/52] IB/qib: Add qib_sdma.c
From: Ralph Campbell @ 2010-05-07 0:01 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_sdma.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_sdma.c | 973 ++++++++++++++++++++++++++++++++++
1 files changed, 973 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_sdma.c
diff --git a/drivers/infiniband/hw/qib/qib_sdma.c b/drivers/infiniband/hw/qib/qib_sdma.c
new file mode 100644
index 0000000..b845688
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sdma.c
@@ -0,0 +1,973 @@
+/*
+ * Copyright (c) 2007, 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+
+#include "qib.h"
+#include "qib_common.h"
+
+/* default pio off, sdma on */
+static ushort sdma_descq_cnt = 256;
+module_param_named(sdma_descq_cnt, sdma_descq_cnt, ushort, S_IRUGO);
+MODULE_PARM_DESC(sdma_descq_cnt, "Number of SDMA descq entries");
+
+/*
+ * Bits defined in the send DMA descriptor.
+ */
+#define SDMA_DESC_LAST (1ULL << 11)
+#define SDMA_DESC_FIRST (1ULL << 12)
+#define SDMA_DESC_DMA_HEAD (1ULL << 13)
+#define SDMA_DESC_USE_LARGE_BUF (1ULL << 14)
+#define SDMA_DESC_INTR (1ULL << 15)
+#define SDMA_DESC_COUNT_LSB 16
+#define SDMA_DESC_GEN_LSB 30
+
+char *qib_sdma_state_names[] = {
+ [qib_sdma_state_s00_hw_down] = "s00_HwDown",
+ [qib_sdma_state_s10_hw_start_up_wait] = "s10_HwStartUpWait",
+ [qib_sdma_state_s20_idle] = "s20_Idle",
+ [qib_sdma_state_s30_sw_clean_up_wait] = "s30_SwCleanUpWait",
+ [qib_sdma_state_s40_hw_clean_up_wait] = "s40_HwCleanUpWait",
+ [qib_sdma_state_s50_hw_halt_wait] = "s50_HwHaltWait",
+ [qib_sdma_state_s99_running] = "s99_Running",
+};
+
+char *qib_sdma_event_names[] = {
+ [qib_sdma_event_e00_go_hw_down] = "e00_GoHwDown",
+ [qib_sdma_event_e10_go_hw_start] = "e10_GoHwStart",
+ [qib_sdma_event_e20_hw_started] = "e20_HwStarted",
+ [qib_sdma_event_e30_go_running] = "e30_GoRunning",
+ [qib_sdma_event_e40_sw_cleaned] = "e40_SwCleaned",
+ [qib_sdma_event_e50_hw_cleaned] = "e50_HwCleaned",
+ [qib_sdma_event_e60_hw_halted] = "e60_HwHalted",
+ [qib_sdma_event_e70_go_idle] = "e70_GoIdle",
+ [qib_sdma_event_e7220_err_halted] = "e7220_ErrHalted",
+ [qib_sdma_event_e7322_err_halted] = "e7322_ErrHalted",
+ [qib_sdma_event_e90_timer_tick] = "e90_TimerTick",
+};
+
+/* declare all statics here rather than keep sorting */
+static int alloc_sdma(struct qib_pportdata *);
+static void sdma_complete(struct kref *);
+static void sdma_finalput(struct qib_sdma_state *);
+static void sdma_get(struct qib_sdma_state *);
+static void sdma_put(struct qib_sdma_state *);
+static void sdma_set_state(struct qib_pportdata *, enum qib_sdma_states);
+static void sdma_start_sw_clean_up(struct qib_pportdata *);
+static void sdma_sw_clean_up_task(unsigned long);
+static void unmap_desc(struct qib_pportdata *, unsigned);
+
+static void sdma_get(struct qib_sdma_state *ss)
+{
+ kref_get(&ss->kref);
+}
+
+static void sdma_complete(struct kref *kref)
+{
+ struct qib_sdma_state *ss =
+ container_of(kref, struct qib_sdma_state, kref);
+
+ complete(&ss->comp);
+}
+
+static void sdma_put(struct qib_sdma_state *ss)
+{
+ kref_put(&ss->kref, sdma_complete);
+}
+
+static void sdma_finalput(struct qib_sdma_state *ss)
+{
+ sdma_put(ss);
+ wait_for_completion(&ss->comp);
+}
+
+/*
+ * Complete all the sdma requests on the active list, in the correct
+ * order, and with appropriate processing. Called when cleaning up
+ * after sdma shutdown, and when new sdma requests are submitted for
+ * a link that is down. This matches what is done for requests
+ * that complete normally, it's just the full list.
+ *
+ * Must be called with sdma_lock held
+ */
+static void clear_sdma_activelist(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_txreq *txp, *txp_next;
+
+ list_for_each_entry_safe(txp, txp_next, &ppd->sdma_activelist, list) {
+ list_del_init(&txp->list);
+ if (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) {
+ unsigned idx;
+
+ idx = txp->start_idx;
+ while (idx != txp->next_descq_idx) {
+ unmap_desc(ppd, idx);
+ if (++idx == ppd->sdma_descq_cnt)
+ idx = 0;
+ }
+ }
+ if (txp->callback)
+ (*txp->callback)(txp, QIB_SDMA_TXREQ_S_ABORTED);
+ }
+}
+
+static void sdma_sw_clean_up_task(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *) opaque;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ /*
+ * At this point, the following should always be true:
+ * - We are halted, so no more descriptors are getting retired.
+ * - We are not running, so no one is submitting new work.
+ * - Only we can send the e40_sw_cleaned, so we can't start
+ * running again until we say so. So, the active list and
+ * descq are ours to play with.
+ */
+
+ /* Process all retired requests. */
+ qib_sdma_make_progress(ppd);
+
+ clear_sdma_activelist(ppd);
+
+ /*
+ * Resync count of added and removed. It is VERY important that
+ * sdma_descq_removed NEVER decrement - user_sdma depends on it.
+ */
+ ppd->sdma_descq_removed = ppd->sdma_descq_added;
+
+ /*
+ * Reset our notion of head and tail.
+ * Note that the HW registers will be reset when switching states
+ * due to calling __qib_sdma_process_event() below.
+ */
+ ppd->sdma_descq_tail = 0;
+ ppd->sdma_descq_head = 0;
+ ppd->sdma_head_dma[0] = 0;
+ ppd->sdma_generation = 0;
+
+ __qib_sdma_process_event(ppd, qib_sdma_event_e40_sw_cleaned);
+
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+/*
+ * This is called when changing to state qib_sdma_state_s10_hw_start_up_wait
+ * as a result of send buffer errors or send DMA descriptor errors.
+ * We want to disarm the buffers in these cases.
+ */
+static void sdma_hw_start_up(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_state *ss = &ppd->sdma_state;
+ unsigned bufno;
+
+ for (bufno = ss->first_sendbuf; bufno < ss->last_sendbuf; ++bufno)
+ ppd->dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_BUF(bufno));
+
+ ppd->dd->f_sdma_hw_start_up(ppd);
+}
+
+static void sdma_sw_tear_down(struct qib_pportdata *ppd)
+{
+ struct qib_sdma_state *ss = &ppd->sdma_state;
+
+ /* Releasing this reference means the state machine has stopped. */
+ sdma_put(ss);
+}
+
+static void sdma_start_sw_clean_up(struct qib_pportdata *ppd)
+{
+ tasklet_hi_schedule(&ppd->sdma_sw_clean_up_task);
+}
+
+static void sdma_set_state(struct qib_pportdata *ppd,
+ enum qib_sdma_states next_state)
+{
+ struct qib_sdma_state *ss = &ppd->sdma_state;
+ struct sdma_set_state_action *action = ss->set_state_action;
+ unsigned op = 0;
+
+ /* debugging bookkeeping */
+ ss->previous_state = ss->current_state;
+ ss->previous_op = ss->current_op;
+
+ ss->current_state = next_state;
+
+ if (action[next_state].op_enable)
+ op |= QIB_SDMA_SENDCTRL_OP_ENABLE;
+
+ if (action[next_state].op_intenable)
+ op |= QIB_SDMA_SENDCTRL_OP_INTENABLE;
+
+ if (action[next_state].op_halt)
+ op |= QIB_SDMA_SENDCTRL_OP_HALT;
+
+ if (action[next_state].op_drain)
+ op |= QIB_SDMA_SENDCTRL_OP_DRAIN;
+
+ if (action[next_state].go_s99_running_tofalse)
+ ss->go_s99_running = 0;
+
+ if (action[next_state].go_s99_running_totrue)
+ ss->go_s99_running = 1;
+
+ ss->current_op = op;
+
+ ppd->dd->f_sdma_sendctrl(ppd, ss->current_op);
+}
+
+static void unmap_desc(struct qib_pportdata *ppd, unsigned head)
+{
+ __le64 *descqp = &ppd->sdma_descq[head].qw[0];
+ u64 desc[2];
+ dma_addr_t addr;
+ size_t len;
+
+ desc[0] = le64_to_cpu(descqp[0]);
+ desc[1] = le64_to_cpu(descqp[1]);
+
+ addr = (desc[1] << 32) | (desc[0] >> 32);
+ len = (desc[0] >> 14) & (0x7ffULL << 2);
+ dma_unmap_single(&ppd->dd->pcidev->dev, addr, len, DMA_TO_DEVICE);
+}
+
+static int alloc_sdma(struct qib_pportdata *ppd)
+{
+ ppd->sdma_descq_cnt = sdma_descq_cnt;
+ if (!ppd->sdma_descq_cnt)
+ ppd->sdma_descq_cnt = 256;
+
+ /* Allocate memory for SendDMA descriptor FIFO */
+ ppd->sdma_descq = dma_alloc_coherent(&ppd->dd->pcidev->dev,
+ ppd->sdma_descq_cnt * sizeof(u64[2]), &ppd->sdma_descq_phys,
+ GFP_KERNEL);
+
+ if (!ppd->sdma_descq) {
+ qib_dev_err(ppd->dd, "failed to allocate SendDMA descriptor "
+ "FIFO memory\n");
+ goto bail;
+ }
+
+ /* Allocate memory for DMA of head register to memory */
+ ppd->sdma_head_dma = dma_alloc_coherent(&ppd->dd->pcidev->dev,
+ PAGE_SIZE, &ppd->sdma_head_phys, GFP_KERNEL);
+ if (!ppd->sdma_head_dma) {
+ qib_dev_err(ppd->dd, "failed to allocate SendDMA "
+ "head memory\n");
+ goto cleanup_descq;
+ }
+ ppd->sdma_head_dma[0] = 0;
+ return 0;
+
+cleanup_descq:
+ dma_free_coherent(&ppd->dd->pcidev->dev,
+ ppd->sdma_descq_cnt * sizeof(u64[2]), (void *)ppd->sdma_descq,
+ ppd->sdma_descq_phys);
+ ppd->sdma_descq = NULL;
+ ppd->sdma_descq_phys = 0;
+bail:
+ ppd->sdma_descq_cnt = 0;
+ return -ENOMEM;
+}
+
+static void free_sdma(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+
+ if (ppd->sdma_head_dma) {
+ dma_free_coherent(&dd->pcidev->dev, PAGE_SIZE,
+ (void *)ppd->sdma_head_dma,
+ ppd->sdma_head_phys);
+ ppd->sdma_head_dma = NULL;
+ ppd->sdma_head_phys = 0;
+ }
+
+ if (ppd->sdma_descq) {
+ dma_free_coherent(&dd->pcidev->dev,
+ ppd->sdma_descq_cnt * sizeof(u64[2]),
+ ppd->sdma_descq, ppd->sdma_descq_phys);
+ ppd->sdma_descq = NULL;
+ ppd->sdma_descq_phys = 0;
+ }
+}
+
+static inline void make_sdma_desc(struct qib_pportdata *ppd,
+ u64 *sdmadesc, u64 addr, u64 dwlen,
+ u64 dwoffset)
+{
+
+ WARN_ON(addr & 3);
+ /* SDmaPhyAddr[47:32] */
+ sdmadesc[1] = addr >> 32;
+ /* SDmaPhyAddr[31:0] */
+ sdmadesc[0] = (addr & 0xfffffffcULL) << 32;
+ /* SDmaGeneration[1:0] */
+ sdmadesc[0] |= (ppd->sdma_generation & 3ULL) <<
+ SDMA_DESC_GEN_LSB;
+ /* SDmaDwordCount[10:0] */
+ sdmadesc[0] |= (dwlen & 0x7ffULL) << SDMA_DESC_COUNT_LSB;
+ /* SDmaBufOffset[12:2] */
+ sdmadesc[0] |= dwoffset & 0x7ffULL;
+}
+
+/* sdma_lock must be held */
+int qib_sdma_make_progress(struct qib_pportdata *ppd)
+{
+ struct list_head *lp = NULL;
+ struct qib_sdma_txreq *txp = NULL;
+ struct qib_devdata *dd = ppd->dd;
+ int progress = 0;
+ u16 hwhead;
+ u16 idx = 0;
+
+ hwhead = dd->f_sdma_gethead(ppd);
+
+ /* The reason for some of the complexity of this code is that
+ * not all descriptors have corresponding txps. So, we have to
+ * be able to skip over descs until we wander into the range of
+ * the next txp on the list.
+ */
+
+ if (!list_empty(&ppd->sdma_activelist)) {
+ lp = ppd->sdma_activelist.next;
+ txp = list_entry(lp, struct qib_sdma_txreq, list);
+ idx = txp->start_idx;
+ }
+
+ while (ppd->sdma_descq_head != hwhead) {
+ /* if desc is part of this txp, unmap if needed */
+ if (txp && (txp->flags & QIB_SDMA_TXREQ_F_FREEDESC) &&
+ (idx == ppd->sdma_descq_head)) {
+ unmap_desc(ppd, ppd->sdma_descq_head);
+ if (++idx == ppd->sdma_descq_cnt)
+ idx = 0;
+ }
+
+ /* increment dequed desc count */
+ ppd->sdma_descq_removed++;
+
+ /* advance head, wrap if needed */
+ if (++ppd->sdma_descq_head == ppd->sdma_descq_cnt)
+ ppd->sdma_descq_head = 0;
+
+ /* if now past this txp's descs, do the callback */
+ if (txp && txp->next_descq_idx == ppd->sdma_descq_head) {
+ /* remove from active list */
+ list_del_init(&txp->list);
+ if (txp->callback)
+ (*txp->callback)(txp, QIB_SDMA_TXREQ_S_OK);
+ /* see if there is another txp */
+ if (list_empty(&ppd->sdma_activelist))
+ txp = NULL;
+ else {
+ lp = ppd->sdma_activelist.next;
+ txp = list_entry(lp, struct qib_sdma_txreq,
+ list);
+ idx = txp->start_idx;
+ }
+ }
+ progress = 1;
+ }
+ if (progress)
+ qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd));
+ return progress;
+}
+
+/*
+ * This is called from interrupt context.
+ */
+void qib_sdma_intr(struct qib_pportdata *ppd)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ __qib_sdma_intr(ppd);
+
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+void __qib_sdma_intr(struct qib_pportdata *ppd)
+{
+ if (__qib_sdma_running(ppd))
+ qib_sdma_make_progress(ppd);
+}
+
+int qib_setup_sdma(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ unsigned long flags;
+ int ret = 0;
+
+ ret = alloc_sdma(ppd);
+ if (ret)
+ goto bail;
+
+ /* set consistent sdma state */
+ ppd->dd->f_sdma_init_early(ppd);
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ /* set up reference counting */
+ kref_init(&ppd->sdma_state.kref);
+ init_completion(&ppd->sdma_state.comp);
+
+ ppd->sdma_generation = 0;
+ ppd->sdma_descq_head = 0;
+ ppd->sdma_descq_removed = 0;
+ ppd->sdma_descq_added = 0;
+
+ INIT_LIST_HEAD(&ppd->sdma_activelist);
+
+ tasklet_init(&ppd->sdma_sw_clean_up_task, sdma_sw_clean_up_task,
+ (unsigned long)ppd);
+
+ ret = dd->f_init_sdma_regs(ppd);
+ if (ret)
+ goto bail_alloc;
+
+ qib_sdma_process_event(ppd, qib_sdma_event_e10_go_hw_start);
+
+ return 0;
+
+bail_alloc:
+ qib_teardown_sdma(ppd);
+bail:
+ return ret;
+}
+
+void qib_teardown_sdma(struct qib_pportdata *ppd)
+{
+ qib_sdma_process_event(ppd, qib_sdma_event_e00_go_hw_down);
+
+ /*
+ * This waits for the state machine to exit so it is not
+ * necessary to kill the sdma_sw_clean_up_task to make sure
+ * it is not running.
+ */
+ sdma_finalput(&ppd->sdma_state);
+
+ free_sdma(ppd);
+}
+
+int qib_sdma_running(struct qib_pportdata *ppd)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ ret = __qib_sdma_running(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ return ret;
+}
+
+/*
+ * Complete a request when sdma not running; likely only request
+ * but to simplify the code, always queue it, then process the full
+ * activelist. We process the entire list to ensure that this particular
+ * request does get it's callback, but in the correct order.
+ * Must be called with sdma_lock held
+ */
+static void complete_sdma_err_req(struct qib_pportdata *ppd,
+ struct qib_verbs_txreq *tx)
+{
+ atomic_inc(&tx->qp->s_dma_busy);
+ /* no sdma descriptors, so no unmap_desc */
+ tx->txreq.start_idx = 0;
+ tx->txreq.next_descq_idx = 0;
+ list_add_tail(&tx->txreq.list, &ppd->sdma_activelist);
+ clear_sdma_activelist(ppd);
+}
+
+/*
+ * This function queues one IB packet onto the send DMA queue per call.
+ * The caller is responsible for checking:
+ * 1) The number of send DMA descriptor entries is less than the size of
+ * the descriptor queue.
+ * 2) The IB SGE addresses and lengths are 32-bit aligned
+ * (except possibly the last SGE's length)
+ * 3) The SGE addresses are suitable for passing to dma_map_single().
+ */
+int qib_sdma_verbs_send(struct qib_pportdata *ppd,
+ struct qib_sge_state *ss, u32 dwords,
+ struct qib_verbs_txreq *tx)
+{
+ unsigned long flags;
+ struct qib_sge *sge;
+ struct qib_qp *qp;
+ int ret = 0;
+ u16 tail;
+ __le64 *descqp;
+ u64 sdmadesc[2];
+ u32 dwoffset;
+ dma_addr_t addr;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+retry:
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ complete_sdma_err_req(ppd, tx);
+ goto unlock;
+ }
+
+ if (tx->txreq.sg_count > qib_sdma_descq_freecnt(ppd)) {
+ if (qib_sdma_make_progress(ppd))
+ goto retry;
+ if (ppd->dd->flags & QIB_HAS_SDMA_TIMEOUT)
+ ppd->dd->f_sdma_set_desc_cnt(ppd,
+ ppd->sdma_descq_cnt / 2);
+ goto busy;
+ }
+
+ dwoffset = tx->hdr_dwords;
+ make_sdma_desc(ppd, sdmadesc, (u64) tx->txreq.addr, dwoffset, 0);
+
+ sdmadesc[0] |= SDMA_DESC_FIRST;
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF)
+ sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF;
+
+ /* write to the descq */
+ tail = ppd->sdma_descq_tail;
+ descqp = &ppd->sdma_descq[tail].qw[0];
+ *descqp++ = cpu_to_le64(sdmadesc[0]);
+ *descqp++ = cpu_to_le64(sdmadesc[1]);
+
+ /* increment the tail */
+ if (++tail == ppd->sdma_descq_cnt) {
+ tail = 0;
+ descqp = &ppd->sdma_descq[0].qw[0];
+ ++ppd->sdma_generation;
+ }
+
+ tx->txreq.start_idx = tail;
+
+ sge = &ss->sge;
+ while (dwords) {
+ u32 dw;
+ u32 len;
+
+ len = dwords << 2;
+ if (len > sge->length)
+ len = sge->length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ dw = (len + 3) >> 2;
+ addr = dma_map_single(&ppd->dd->pcidev->dev, sge->vaddr,
+ dw << 2, DMA_TO_DEVICE);
+ if (dma_mapping_error(&ppd->dd->pcidev->dev, addr))
+ goto unmap;
+ sdmadesc[0] = 0;
+ make_sdma_desc(ppd, sdmadesc, (u64) addr, dw, dwoffset);
+ /* SDmaUseLargeBuf has to be set in every descriptor */
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_USELARGEBUF)
+ sdmadesc[0] |= SDMA_DESC_USE_LARGE_BUF;
+ /* write to the descq */
+ *descqp++ = cpu_to_le64(sdmadesc[0]);
+ *descqp++ = cpu_to_le64(sdmadesc[1]);
+
+ /* increment the tail */
+ if (++tail == ppd->sdma_descq_cnt) {
+ tail = 0;
+ descqp = &ppd->sdma_descq[0].qw[0];
+ ++ppd->sdma_generation;
+ }
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+
+ dwoffset += dw;
+ dwords -= dw;
+ }
+
+ if (!tail)
+ descqp = &ppd->sdma_descq[ppd->sdma_descq_cnt].qw[0];
+ descqp -= 2;
+ descqp[0] |= cpu_to_le64(SDMA_DESC_LAST);
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_HEADTOHOST)
+ descqp[0] |= cpu_to_le64(SDMA_DESC_DMA_HEAD);
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_INTREQ)
+ descqp[0] |= cpu_to_le64(SDMA_DESC_INTR);
+
+ atomic_inc(&tx->qp->s_dma_busy);
+ tx->txreq.next_descq_idx = tail;
+ ppd->dd->f_sdma_update_tail(ppd, tail);
+ ppd->sdma_descq_added += tx->txreq.sg_count;
+ list_add_tail(&tx->txreq.list, &ppd->sdma_activelist);
+ goto unlock;
+
+unmap:
+ for (;;) {
+ if (!tail)
+ tail = ppd->sdma_descq_cnt - 1;
+ else
+ tail--;
+ if (tail == ppd->sdma_descq_tail)
+ break;
+ unmap_desc(ppd, tail);
+ }
+ qp = tx->qp;
+ qib_put_txreq(tx);
+ spin_lock(&qp->s_lock);
+ if (qp->ibqp.qp_type == IB_QPT_RC) {
+ /* XXX what about error sending RDMA read responses? */
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)
+ qib_error_qp(qp, IB_WC_GENERAL_ERR);
+ } else if (qp->s_wqe)
+ qib_send_complete(qp, qp->s_wqe, IB_WC_GENERAL_ERR);
+ spin_unlock(&qp->s_lock);
+ /* return zero to process the next send work request */
+ goto unlock;
+
+busy:
+ qp = tx->qp;
+ spin_lock(&qp->s_lock);
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ struct qib_ibdev *dev;
+
+ /*
+ * If we couldn't queue the DMA request, save the info
+ * and try again later rather than destroying the
+ * buffer and undoing the side effects of the copy.
+ */
+ tx->ss = ss;
+ tx->dwords = dwords;
+ qp->s_tx = tx;
+ dev = &ppd->dd->verbs_dev;
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->iowait)) {
+ struct qib_ibport *ibp;
+
+ ibp = &ppd->ibport_data;
+ ibp->n_dmawait++;
+ qp->s_flags |= QIB_S_WAIT_DMA_DESC;
+ list_add_tail(&qp->iowait, &dev->dmawait);
+ }
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~QIB_S_BUSY;
+ spin_unlock(&qp->s_lock);
+ ret = -EBUSY;
+ } else {
+ spin_unlock(&qp->s_lock);
+ qib_put_txreq(tx);
+ }
+unlock:
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+ return ret;
+}
+
+void qib_sdma_process_event(struct qib_pportdata *ppd,
+ enum qib_sdma_events event)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ __qib_sdma_process_event(ppd, event);
+
+ if (ppd->sdma_state.current_state == qib_sdma_state_s99_running)
+ qib_verbs_sdma_desc_avail(ppd, qib_sdma_descq_freecnt(ppd));
+
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+}
+
+void __qib_sdma_process_event(struct qib_pportdata *ppd,
+ enum qib_sdma_events event)
+{
+ struct qib_sdma_state *ss = &ppd->sdma_state;
+
+ switch (ss->current_state) {
+ case qib_sdma_state_s00_hw_down:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ break;
+ case qib_sdma_event_e30_go_running:
+ /*
+ * If down, but running requested (usually result
+ * of link up, then we need to start up.
+ * This can happen when hw down is requested while
+ * bringing the link up with traffic active on
+ * 7220, e.g. */
+ ss->go_s99_running = 1;
+ /* fall through and start dma engine */
+ case qib_sdma_event_e10_go_hw_start:
+ /* This reference means the state machine is started */
+ sdma_get(&ppd->sdma_state);
+ sdma_set_state(ppd,
+ qib_sdma_state_s10_hw_start_up_wait);
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ sdma_sw_tear_down(ppd);
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s10_hw_start_up_wait:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_sw_tear_down(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ sdma_set_state(ppd, ss->go_s99_running ?
+ qib_sdma_state_s99_running :
+ qib_sdma_state_s20_idle);
+ break;
+ case qib_sdma_event_e30_go_running:
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s20_idle:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_sw_tear_down(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ sdma_set_state(ppd, qib_sdma_state_s99_running);
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s30_sw_clean_up_wait:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ sdma_set_state(ppd,
+ qib_sdma_state_s10_hw_start_up_wait);
+ sdma_hw_start_up(ppd);
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s40_hw_clean_up_wait:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ sdma_set_state(ppd,
+ qib_sdma_state_s30_sw_clean_up_wait);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ break;
+ case qib_sdma_event_e70_go_idle:
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s50_hw_halt_wait:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ ss->go_s99_running = 1;
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ sdma_set_state(ppd,
+ qib_sdma_state_s40_hw_clean_up_wait);
+ ppd->dd->f_sdma_hw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e70_go_idle:
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+
+ case qib_sdma_state_s99_running:
+ switch (event) {
+ case qib_sdma_event_e00_go_hw_down:
+ sdma_set_state(ppd, qib_sdma_state_s00_hw_down);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e10_go_hw_start:
+ break;
+ case qib_sdma_event_e20_hw_started:
+ break;
+ case qib_sdma_event_e30_go_running:
+ break;
+ case qib_sdma_event_e40_sw_cleaned:
+ break;
+ case qib_sdma_event_e50_hw_cleaned:
+ break;
+ case qib_sdma_event_e60_hw_halted:
+ sdma_set_state(ppd,
+ qib_sdma_state_s30_sw_clean_up_wait);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e70_go_idle:
+ sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait);
+ ss->go_s99_running = 0;
+ break;
+ case qib_sdma_event_e7220_err_halted:
+ sdma_set_state(ppd,
+ qib_sdma_state_s30_sw_clean_up_wait);
+ sdma_start_sw_clean_up(ppd);
+ break;
+ case qib_sdma_event_e7322_err_halted:
+ sdma_set_state(ppd, qib_sdma_state_s50_hw_halt_wait);
+ break;
+ case qib_sdma_event_e90_timer_tick:
+ break;
+ }
+ break;
+ }
+
+ ss->last_event = event;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 36/52] IB/qib: Add qib_srq.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_srq.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_srq.c | 374 +++++++++++++++++++++++++++++++++++
1 files changed, 374 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_srq.c
diff --git a/drivers/infiniband/hw/qib/qib_srq.c b/drivers/infiniband/hw/qib/qib_srq.c
new file mode 100644
index 0000000..d79ae33
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_srq.c
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/err.h>
+#include <linux/vmalloc.h>
+
+#include "qib_verbs.h"
+
+/**
+ * qib_post_srq_receive - post a receive on a shared receive queue
+ * @ibsrq: the SRQ to post the receive on
+ * @wr: the list of work requests to post
+ * @bad_wr: A pointer to the first WR to cause a problem is put here
+ *
+ * This may be called from interrupt context.
+ */
+int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct qib_srq *srq = to_isrq(ibsrq);
+ struct qib_rwq *wq;
+ unsigned long flags;
+ int ret;
+
+ for (; wr; wr = wr->next) {
+ struct qib_rwqe *wqe;
+ u32 next;
+ int i;
+
+ if ((unsigned) wr->num_sge > srq->rq.max_sge) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&srq->rq.lock, flags);
+ wq = srq->rq.wq;
+ next = wq->head + 1;
+ if (next >= srq->rq.size)
+ next = 0;
+ if (next == wq->tail) {
+ spin_unlock_irqrestore(&srq->rq.lock, flags);
+ *bad_wr = wr;
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ wqe = get_rwqe_ptr(&srq->rq, wq->head);
+ wqe->wr_id = wr->wr_id;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
+ wq->head = next;
+ spin_unlock_irqrestore(&srq->rq.lock, flags);
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_create_srq - create a shared receive queue
+ * @ibpd: the protection domain of the SRQ to create
+ * @srq_init_attr: the attributes of the SRQ
+ * @udata: data from libibverbs when creating a user SRQ
+ */
+struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata)
+{
+ struct qib_ibdev *dev = to_idev(ibpd->device);
+ struct qib_srq *srq;
+ u32 sz;
+ struct ib_srq *ret;
+
+ if (srq_init_attr->attr.max_sge == 0 ||
+ srq_init_attr->attr.max_sge > ib_qib_max_srq_sges ||
+ srq_init_attr->attr.max_wr == 0 ||
+ srq_init_attr->attr.max_wr > ib_qib_max_srq_wrs) {
+ ret = ERR_PTR(-EINVAL);
+ goto done;
+ }
+
+ srq = kmalloc(sizeof(*srq), GFP_KERNEL);
+ if (!srq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto done;
+ }
+
+ /*
+ * Need to use vmalloc() if we want to support large #s of entries.
+ */
+ srq->rq.size = srq_init_attr->attr.max_wr + 1;
+ srq->rq.max_sge = srq_init_attr->attr.max_sge;
+ sz = sizeof(struct ib_sge) * srq->rq.max_sge +
+ sizeof(struct qib_rwqe);
+ srq->rq.wq = vmalloc_user(sizeof(struct qib_rwq) + srq->rq.size * sz);
+ if (!srq->rq.wq) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_srq;
+ }
+
+ /*
+ * Return the address of the RWQ as the offset to mmap.
+ * See qib_mmap() for details.
+ */
+ if (udata && udata->outlen >= sizeof(__u64)) {
+ int err;
+ u32 s = sizeof(struct qib_rwq) + srq->rq.size * sz;
+
+ srq->ip =
+ qib_create_mmap_info(dev, s, ibpd->uobject->context,
+ srq->rq.wq);
+ if (!srq->ip) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_wq;
+ }
+
+ err = ib_copy_to_udata(udata, &srq->ip->offset,
+ sizeof(srq->ip->offset));
+ if (err) {
+ ret = ERR_PTR(err);
+ goto bail_ip;
+ }
+ } else
+ srq->ip = NULL;
+
+ /*
+ * ib_create_srq() will initialize srq->ibsrq.
+ */
+ spin_lock_init(&srq->rq.lock);
+ srq->rq.wq->head = 0;
+ srq->rq.wq->tail = 0;
+ srq->limit = srq_init_attr->attr.srq_limit;
+
+ spin_lock(&dev->n_srqs_lock);
+ if (dev->n_srqs_allocated == ib_qib_max_srqs) {
+ spin_unlock(&dev->n_srqs_lock);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail_ip;
+ }
+
+ dev->n_srqs_allocated++;
+ spin_unlock(&dev->n_srqs_lock);
+
+ if (srq->ip) {
+ spin_lock_irq(&dev->pending_lock);
+ list_add(&srq->ip->pending_mmaps, &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+
+ ret = &srq->ibsrq;
+ goto done;
+
+bail_ip:
+ kfree(srq->ip);
+bail_wq:
+ vfree(srq->rq.wq);
+bail_srq:
+ kfree(srq);
+done:
+ return ret;
+}
+
+/**
+ * qib_modify_srq - modify a shared receive queue
+ * @ibsrq: the SRQ to modify
+ * @attr: the new attributes of the SRQ
+ * @attr_mask: indicates which attributes to modify
+ * @udata: user data for libibverbs.so
+ */
+int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata)
+{
+ struct qib_srq *srq = to_isrq(ibsrq);
+ struct qib_rwq *wq;
+ int ret = 0;
+
+ if (attr_mask & IB_SRQ_MAX_WR) {
+ struct qib_rwq *owq;
+ struct qib_rwqe *p;
+ u32 sz, size, n, head, tail;
+
+ /* Check that the requested sizes are below the limits. */
+ if ((attr->max_wr > ib_qib_max_srq_wrs) ||
+ ((attr_mask & IB_SRQ_LIMIT) ?
+ attr->srq_limit : srq->limit) > attr->max_wr) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ sz = sizeof(struct qib_rwqe) +
+ srq->rq.max_sge * sizeof(struct ib_sge);
+ size = attr->max_wr + 1;
+ wq = vmalloc_user(sizeof(struct qib_rwq) + size * sz);
+ if (!wq) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ /* Check that we can write the offset to mmap. */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ __u64 offset_addr;
+ __u64 offset = 0;
+
+ ret = ib_copy_from_udata(&offset_addr, udata,
+ sizeof(offset_addr));
+ if (ret)
+ goto bail_free;
+ udata->outbuf =
+ (void __user *) (unsigned long) offset_addr;
+ ret = ib_copy_to_udata(udata, &offset,
+ sizeof(offset));
+ if (ret)
+ goto bail_free;
+ }
+
+ spin_lock_irq(&srq->rq.lock);
+ /*
+ * validate head and tail pointer values and compute
+ * the number of remaining WQEs.
+ */
+ owq = srq->rq.wq;
+ head = owq->head;
+ tail = owq->tail;
+ if (head >= srq->rq.size || tail >= srq->rq.size) {
+ ret = -EINVAL;
+ goto bail_unlock;
+ }
+ n = head;
+ if (n < tail)
+ n += srq->rq.size - tail;
+ else
+ n -= tail;
+ if (size <= n) {
+ ret = -EINVAL;
+ goto bail_unlock;
+ }
+ n = 0;
+ p = wq->wq;
+ while (tail != head) {
+ struct qib_rwqe *wqe;
+ int i;
+
+ wqe = get_rwqe_ptr(&srq->rq, tail);
+ p->wr_id = wqe->wr_id;
+ p->num_sge = wqe->num_sge;
+ for (i = 0; i < wqe->num_sge; i++)
+ p->sg_list[i] = wqe->sg_list[i];
+ n++;
+ p = (struct qib_rwqe *)((char *) p + sz);
+ if (++tail >= srq->rq.size)
+ tail = 0;
+ }
+ srq->rq.wq = wq;
+ srq->rq.size = size;
+ wq->head = n;
+ wq->tail = 0;
+ if (attr_mask & IB_SRQ_LIMIT)
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
+
+ vfree(owq);
+
+ if (srq->ip) {
+ struct qib_mmap_info *ip = srq->ip;
+ struct qib_ibdev *dev = to_idev(srq->ibsrq.device);
+ u32 s = sizeof(struct qib_rwq) + size * sz;
+
+ qib_update_mmap_info(dev, ip, s, wq);
+
+ /*
+ * Return the offset to mmap.
+ * See qib_mmap() for details.
+ */
+ if (udata && udata->inlen >= sizeof(__u64)) {
+ ret = ib_copy_to_udata(udata, &ip->offset,
+ sizeof(ip->offset));
+ if (ret)
+ goto bail;
+ }
+
+ /*
+ * Put user mapping info onto the pending list
+ * unless it already is on the list.
+ */
+ spin_lock_irq(&dev->pending_lock);
+ if (list_empty(&ip->pending_mmaps))
+ list_add(&ip->pending_mmaps,
+ &dev->pending_mmaps);
+ spin_unlock_irq(&dev->pending_lock);
+ }
+ } else if (attr_mask & IB_SRQ_LIMIT) {
+ spin_lock_irq(&srq->rq.lock);
+ if (attr->srq_limit >= srq->rq.size)
+ ret = -EINVAL;
+ else
+ srq->limit = attr->srq_limit;
+ spin_unlock_irq(&srq->rq.lock);
+ }
+ goto bail;
+
+bail_unlock:
+ spin_unlock_irq(&srq->rq.lock);
+bail_free:
+ vfree(wq);
+bail:
+ return ret;
+}
+
+int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
+{
+ struct qib_srq *srq = to_isrq(ibsrq);
+
+ attr->max_wr = srq->rq.size - 1;
+ attr->max_sge = srq->rq.max_sge;
+ attr->srq_limit = srq->limit;
+ return 0;
+}
+
+/**
+ * qib_destroy_srq - destroy a shared receive queue
+ * @ibsrq: the SRQ to destroy
+ */
+int qib_destroy_srq(struct ib_srq *ibsrq)
+{
+ struct qib_srq *srq = to_isrq(ibsrq);
+ struct qib_ibdev *dev = to_idev(ibsrq->device);
+
+ spin_lock(&dev->n_srqs_lock);
+ dev->n_srqs_allocated--;
+ spin_unlock(&dev->n_srqs_lock);
+ if (srq->ip)
+ kref_put(&srq->ip->ref, qib_release_mmap_info);
+ else
+ vfree(srq->rq.wq);
+ kfree(srq);
+
+ return 0;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 37/52] IB/qib: Add qib_sysfs.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_sysfs.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_sysfs.c | 691 +++++++++++++++++++++++++++++++++
1 files changed, 691 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_sysfs.c
diff --git a/drivers/infiniband/hw/qib/qib_sysfs.c b/drivers/infiniband/hw/qib/qib_sysfs.c
new file mode 100644
index 0000000..dab4d9f
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sysfs.c
@@ -0,0 +1,691 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/ctype.h>
+
+#include "qib.h"
+
+/**
+ * qib_parse_ushort - parse an unsigned short value in an arbitrary base
+ * @str: the string containing the number
+ * @valp: where to put the result
+ *
+ * Returns the number of bytes consumed, or negative value on error.
+ */
+static int qib_parse_ushort(const char *str, unsigned short *valp)
+{
+ unsigned long val;
+ char *end;
+ int ret;
+
+ if (!isdigit(str[0])) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ val = simple_strtoul(str, &end, 0);
+
+ if (val > 0xffff) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ *valp = val;
+
+ ret = end + 1 - str;
+ if (ret == 0)
+ ret = -EINVAL;
+
+bail:
+ return ret;
+}
+
+/* start of per-port functions */
+/*
+ * Get/Set heartbeat enable. OR of 1=enabled, 2=auto
+ */
+static ssize_t show_hrtbt_enb(struct qib_pportdata *ppd, char *buf)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret;
+
+ ret = dd->f_get_ib_cfg(ppd, QIB_IB_CFG_HRTBT);
+ ret = scnprintf(buf, PAGE_SIZE, "%d\n", ret);
+ return ret;
+}
+
+static ssize_t store_hrtbt_enb(struct qib_pportdata *ppd, const char *buf,
+ size_t count)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret;
+ u16 val;
+
+ ret = qib_parse_ushort(buf, &val);
+
+ /*
+ * Set the "intentional" heartbeat enable per either of
+ * "Enable" and "Auto", as these are normally set together.
+ * This bit is consulted when leaving loopback mode,
+ * because entering loopback mode overrides it and automatically
+ * disables heartbeat.
+ */
+ if (ret >= 0)
+ ret = dd->f_set_ib_cfg(ppd, QIB_IB_CFG_HRTBT, val);
+ if (ret < 0)
+ qib_dev_err(dd, "attempt to set invalid Heartbeat enable\n");
+ return ret < 0 ? ret : count;
+}
+
+static ssize_t store_loopback(struct qib_pportdata *ppd, const char *buf,
+ size_t count)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret = count, r;
+
+ r = dd->f_set_ib_loopback(ppd, buf);
+ if (r < 0)
+ ret = r;
+
+ return ret;
+}
+
+static ssize_t store_led_override(struct qib_pportdata *ppd, const char *buf,
+ size_t count)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret;
+ u16 val;
+
+ ret = qib_parse_ushort(buf, &val);
+ if (ret > 0)
+ qib_set_led_override(ppd, val);
+ else
+ qib_dev_err(dd, "attempt to set invalid LED override\n");
+ return ret < 0 ? ret : count;
+}
+
+static ssize_t show_status(struct qib_pportdata *ppd, char *buf)
+{
+ ssize_t ret;
+
+ if (!ppd->statusp)
+ ret = -EINVAL;
+ else
+ ret = scnprintf(buf, PAGE_SIZE, "0x%llx\n",
+ (unsigned long long) *(ppd->statusp));
+ return ret;
+}
+
+/*
+ * For userland compatibility, these offsets must remain fixed.
+ * They are strings for QIB_STATUS_*
+ */
+static const char *qib_status_str[] = {
+ "Initted",
+ "",
+ "",
+ "",
+ "",
+ "Present",
+ "IB_link_up",
+ "IB_configured",
+ "",
+ "Fatal_Hardware_Error",
+ NULL,
+};
+
+static ssize_t show_status_str(struct qib_pportdata *ppd, char *buf)
+{
+ int i, any;
+ u64 s;
+ ssize_t ret;
+
+ if (!ppd->statusp) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ s = *(ppd->statusp);
+ *buf = '\0';
+ for (any = i = 0; s && qib_status_str[i]; i++) {
+ if (s & 1) {
+ /* if overflow */
+ if (any && strlcat(buf, " ", PAGE_SIZE) >= PAGE_SIZE)
+ break;
+ if (strlcat(buf, qib_status_str[i], PAGE_SIZE) >=
+ PAGE_SIZE)
+ break;
+ any = 1;
+ }
+ s >>= 1;
+ }
+ if (any)
+ strlcat(buf, "\n", PAGE_SIZE);
+
+ ret = strlen(buf);
+
+bail:
+ return ret;
+}
+
+/* end of per-port functions */
+
+/*
+ * Start of per-port file structures and support code
+ * Because we are fitting into other infrastructure, we have to supply the
+ * full set of kobject/sysfs_ops structures and routines.
+ */
+#define QIB_PORT_ATTR(name, mode, show, store) \
+ static struct qib_port_attr qib_port_attr_##name = \
+ __ATTR(name, mode, show, store)
+
+struct qib_port_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct qib_pportdata *, char *);
+ ssize_t (*store)(struct qib_pportdata *, const char *, size_t);
+};
+
+QIB_PORT_ATTR(loopback, S_IWUSR, NULL, store_loopback);
+QIB_PORT_ATTR(led_override, S_IWUSR, NULL, store_led_override);
+QIB_PORT_ATTR(hrtbt_enable, S_IWUSR | S_IRUGO, show_hrtbt_enb,
+ store_hrtbt_enb);
+QIB_PORT_ATTR(status, S_IRUGO, show_status, NULL);
+QIB_PORT_ATTR(status_str, S_IRUGO, show_status_str, NULL);
+
+static struct attribute *port_default_attributes[] = {
+ &qib_port_attr_loopback.attr,
+ &qib_port_attr_led_override.attr,
+ &qib_port_attr_hrtbt_enable.attr,
+ &qib_port_attr_status.attr,
+ &qib_port_attr_status_str.attr,
+ NULL
+};
+
+static ssize_t qib_portattr_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct qib_port_attr *pattr =
+ container_of(attr, struct qib_port_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_kobj);
+
+ return pattr->show(ppd, buf);
+}
+
+static ssize_t qib_portattr_store(struct kobject *kobj,
+ struct attribute *attr, const char *buf, size_t len)
+{
+ struct qib_port_attr *pattr =
+ container_of(attr, struct qib_port_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, pport_kobj);
+
+ return pattr->store(ppd, buf, len);
+}
+
+static void qib_port_release(struct kobject *kobj)
+{
+ /* nothing to do since memory is freed by qib_free_devdata() */
+}
+
+static const struct sysfs_ops qib_port_ops = {
+ .show = qib_portattr_show,
+ .store = qib_portattr_store,
+};
+
+static struct kobj_type qib_port_ktype = {
+ .release = qib_port_release,
+ .sysfs_ops = &qib_port_ops,
+ .default_attrs = port_default_attributes
+};
+
+/* Start sl2vl */
+
+#define QIB_SL2VL_ATTR(N) \
+ static struct qib_sl2vl_attr qib_sl2vl_attr_##N = { \
+ .attr = { .name = __stringify(N), .mode = 0444 }, \
+ .sl = N \
+ }
+
+struct qib_sl2vl_attr {
+ struct attribute attr;
+ int sl;
+};
+
+QIB_SL2VL_ATTR(0);
+QIB_SL2VL_ATTR(1);
+QIB_SL2VL_ATTR(2);
+QIB_SL2VL_ATTR(3);
+QIB_SL2VL_ATTR(4);
+QIB_SL2VL_ATTR(5);
+QIB_SL2VL_ATTR(6);
+QIB_SL2VL_ATTR(7);
+QIB_SL2VL_ATTR(8);
+QIB_SL2VL_ATTR(9);
+QIB_SL2VL_ATTR(10);
+QIB_SL2VL_ATTR(11);
+QIB_SL2VL_ATTR(12);
+QIB_SL2VL_ATTR(13);
+QIB_SL2VL_ATTR(14);
+QIB_SL2VL_ATTR(15);
+
+static struct attribute *sl2vl_default_attributes[] = {
+ &qib_sl2vl_attr_0.attr,
+ &qib_sl2vl_attr_1.attr,
+ &qib_sl2vl_attr_2.attr,
+ &qib_sl2vl_attr_3.attr,
+ &qib_sl2vl_attr_4.attr,
+ &qib_sl2vl_attr_5.attr,
+ &qib_sl2vl_attr_6.attr,
+ &qib_sl2vl_attr_7.attr,
+ &qib_sl2vl_attr_8.attr,
+ &qib_sl2vl_attr_9.attr,
+ &qib_sl2vl_attr_10.attr,
+ &qib_sl2vl_attr_11.attr,
+ &qib_sl2vl_attr_12.attr,
+ &qib_sl2vl_attr_13.attr,
+ &qib_sl2vl_attr_14.attr,
+ &qib_sl2vl_attr_15.attr,
+ NULL
+};
+
+static ssize_t sl2vl_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct qib_sl2vl_attr *sattr =
+ container_of(attr, struct qib_sl2vl_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, sl2vl_kobj);
+ struct qib_ibport *qibp = &ppd->ibport_data;
+
+ return sprintf(buf, "%u\n", qibp->sl_to_vl[sattr->sl]);
+}
+
+static const struct sysfs_ops qib_sl2vl_ops = {
+ .show = sl2vl_attr_show,
+};
+
+static struct kobj_type qib_sl2vl_ktype = {
+ .release = qib_port_release,
+ .sysfs_ops = &qib_sl2vl_ops,
+ .default_attrs = sl2vl_default_attributes
+};
+
+/* End sl2vl */
+
+/* Start diag_counters */
+
+#define QIB_DIAGC_ATTR(N) \
+ static struct qib_diagc_attr qib_diagc_attr_##N = { \
+ .attr = { .name = __stringify(N), .mode = 0444 }, \
+ .counter = offsetof(struct qib_ibport, n_##N) \
+ }
+
+struct qib_diagc_attr {
+ struct attribute attr;
+ size_t counter;
+};
+
+QIB_DIAGC_ATTR(rc_resends);
+QIB_DIAGC_ATTR(rc_acks);
+QIB_DIAGC_ATTR(rc_qacks);
+QIB_DIAGC_ATTR(rc_delayed_comp);
+QIB_DIAGC_ATTR(seq_naks);
+QIB_DIAGC_ATTR(rdma_seq);
+QIB_DIAGC_ATTR(rnr_naks);
+QIB_DIAGC_ATTR(other_naks);
+QIB_DIAGC_ATTR(rc_timeouts);
+QIB_DIAGC_ATTR(loop_pkts);
+QIB_DIAGC_ATTR(pkt_drops);
+QIB_DIAGC_ATTR(dmawait);
+QIB_DIAGC_ATTR(unaligned);
+QIB_DIAGC_ATTR(rc_dupreq);
+QIB_DIAGC_ATTR(rc_seqnak);
+
+static struct attribute *diagc_default_attributes[] = {
+ &qib_diagc_attr_rc_resends.attr,
+ &qib_diagc_attr_rc_acks.attr,
+ &qib_diagc_attr_rc_qacks.attr,
+ &qib_diagc_attr_rc_delayed_comp.attr,
+ &qib_diagc_attr_seq_naks.attr,
+ &qib_diagc_attr_rdma_seq.attr,
+ &qib_diagc_attr_rnr_naks.attr,
+ &qib_diagc_attr_other_naks.attr,
+ &qib_diagc_attr_rc_timeouts.attr,
+ &qib_diagc_attr_loop_pkts.attr,
+ &qib_diagc_attr_pkt_drops.attr,
+ &qib_diagc_attr_dmawait.attr,
+ &qib_diagc_attr_unaligned.attr,
+ &qib_diagc_attr_rc_dupreq.attr,
+ &qib_diagc_attr_rc_seqnak.attr,
+ NULL
+};
+
+static ssize_t diagc_attr_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
+{
+ struct qib_diagc_attr *dattr =
+ container_of(attr, struct qib_diagc_attr, attr);
+ struct qib_pportdata *ppd =
+ container_of(kobj, struct qib_pportdata, diagc_kobj);
+ struct qib_ibport *qibp = &ppd->ibport_data;
+
+ return sprintf(buf, "%u\n", *(u32 *)((char *)qibp + dattr->counter));
+}
+
+static const struct sysfs_ops qib_diagc_ops = {
+ .show = diagc_attr_show,
+};
+
+static struct kobj_type qib_diagc_ktype = {
+ .release = qib_port_release,
+ .sysfs_ops = &qib_diagc_ops,
+ .default_attrs = diagc_default_attributes
+};
+
+/* End diag_counters */
+
+/* end of per-port file structures and support code */
+
+/*
+ * Start of per-unit (or driver, in some cases, but replicated
+ * per unit) functions (these get a device *)
+ */
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+
+ return sprintf(buf, "%x\n", dd_from_dev(dev)->minrev);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+ char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int ret;
+
+ if (!dd->boardname)
+ ret = -EINVAL;
+ else
+ ret = scnprintf(buf, PAGE_SIZE, "%s\n", dd->boardname);
+ return ret;
+}
+
+static ssize_t show_version(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", (char *)ib_qib_version);
+}
+
+static ssize_t show_boardversion(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", dd->boardversion);
+}
+
+
+static ssize_t show_localbus_info(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ /* The string printed here is already newline-terminated. */
+ return scnprintf(buf, PAGE_SIZE, "%s", dd->lbus_info);
+}
+
+
+static ssize_t show_nctxts(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ /* Return the number of user ports (contexts) available. */
+ return scnprintf(buf, PAGE_SIZE, "%u\n", dd->cfgctxts -
+ dd->first_user_ctxt);
+}
+
+static ssize_t show_serial(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+
+ buf[sizeof dd->serial] = '\0';
+ memcpy(buf, dd->serial, sizeof dd->serial);
+ strcat(buf, "\n");
+ return strlen(buf);
+}
+
+static ssize_t store_chip_reset(struct device *device,
+ struct device_attribute *attr, const char *buf,
+ size_t count)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int ret;
+
+ if (count < 5 || memcmp(buf, "reset", 5) || !dd->diag_client) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ ret = qib_reset_device(dd->unit);
+bail:
+ return ret < 0 ? ret : count;
+}
+
+static ssize_t show_logged_errs(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int idx, count;
+
+ /* force consistency with actual EEPROM */
+ if (qib_update_eeprom_log(dd) != 0)
+ return -ENXIO;
+
+ count = 0;
+ for (idx = 0; idx < QIB_EEP_LOG_CNT; ++idx) {
+ count += scnprintf(buf + count, PAGE_SIZE - count, "%d%c",
+ dd->eep_st_errs[idx],
+ idx == (QIB_EEP_LOG_CNT - 1) ? '\n' : ' ');
+ }
+
+ return count;
+}
+
+/*
+ * Dump tempsense regs. in decimal, to ease shell-scripts.
+ */
+static ssize_t show_tempsense(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct qib_ibdev *dev =
+ container_of(device, struct qib_ibdev, ibdev.dev);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ int ret;
+ int idx;
+ u8 regvals[8];
+
+ ret = -ENXIO;
+ for (idx = 0; idx < 8; ++idx) {
+ if (idx == 6)
+ continue;
+ ret = dd->f_tempsense_rd(dd, idx);
+ if (ret < 0)
+ break;
+ regvals[idx] = ret;
+ }
+ if (idx == 8)
+ ret = scnprintf(buf, PAGE_SIZE, "%d %d %02X %02X %d %d\n",
+ *(signed char *)(regvals),
+ *(signed char *)(regvals + 1),
+ regvals[2], regvals[3],
+ *(signed char *)(regvals + 5),
+ *(signed char *)(regvals + 7));
+ return ret;
+}
+
+/*
+ * end of per-unit (or driver, in some cases, but replicated
+ * per unit) functions
+ */
+
+/* start of per-unit file structures and support code */
+static DEVICE_ATTR(hw_rev, S_IRUGO, show_rev, NULL);
+static DEVICE_ATTR(hca_type, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(board_id, S_IRUGO, show_hca, NULL);
+static DEVICE_ATTR(version, S_IRUGO, show_version, NULL);
+static DEVICE_ATTR(nctxts, S_IRUGO, show_nctxts, NULL);
+static DEVICE_ATTR(serial, S_IRUGO, show_serial, NULL);
+static DEVICE_ATTR(boardversion, S_IRUGO, show_boardversion, NULL);
+static DEVICE_ATTR(logged_errors, S_IRUGO, show_logged_errs, NULL);
+static DEVICE_ATTR(tempsense, S_IRUGO, show_tempsense, NULL);
+static DEVICE_ATTR(localbus_info, S_IRUGO, show_localbus_info, NULL);
+static DEVICE_ATTR(chip_reset, S_IWUSR, NULL, store_chip_reset);
+
+static struct device_attribute *qib_attributes[] = {
+ &dev_attr_hw_rev,
+ &dev_attr_hca_type,
+ &dev_attr_board_id,
+ &dev_attr_version,
+ &dev_attr_nctxts,
+ &dev_attr_serial,
+ &dev_attr_boardversion,
+ &dev_attr_logged_errors,
+ &dev_attr_tempsense,
+ &dev_attr_localbus_info,
+ &dev_attr_chip_reset,
+};
+
+int qib_create_port_files(struct ib_device *ibdev, u8 port_num,
+ struct kobject *kobj)
+{
+ struct qib_pportdata *ppd;
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ int ret;
+
+ if (!port_num || port_num > dd->num_pports) {
+ qib_dev_err(dd, "Skipping infiniband class with "
+ "invalid port %u\n", port_num);
+ ret = -ENODEV;
+ goto bail;
+ }
+ ppd = &dd->pport[port_num - 1];
+
+ ret = kobject_init_and_add(&ppd->pport_kobj, &qib_port_ktype, kobj,
+ "linkcontrol");
+ if (ret) {
+ qib_dev_err(dd, "Skipping linkcontrol sysfs info, "
+ "(err %d) port %u\n", ret, port_num);
+ goto bail;
+ }
+ kobject_uevent(&ppd->pport_kobj, KOBJ_ADD);
+
+ ret = kobject_init_and_add(&ppd->sl2vl_kobj, &qib_sl2vl_ktype, kobj,
+ "sl2vl");
+ if (ret) {
+ qib_dev_err(dd, "Skipping sl2vl sysfs info, "
+ "(err %d) port %u\n", ret, port_num);
+ goto bail_sl;
+ }
+ kobject_uevent(&ppd->sl2vl_kobj, KOBJ_ADD);
+
+ ret = kobject_init_and_add(&ppd->diagc_kobj, &qib_diagc_ktype, kobj,
+ "diag_counters");
+ if (ret) {
+ qib_dev_err(dd, "Skipping diag_counters sysfs info, "
+ "(err %d) port %u\n", ret, port_num);
+ goto bail_diagc;
+ }
+ kobject_uevent(&ppd->diagc_kobj, KOBJ_ADD);
+
+ return 0;
+
+bail_diagc:
+ kobject_put(&ppd->sl2vl_kobj);
+bail_sl:
+ kobject_put(&ppd->pport_kobj);
+bail:
+ return ret;
+}
+
+/*
+ * Register and create our files in /sys/class/infiniband.
+ */
+int qib_verbs_register_sysfs(struct qib_devdata *dd)
+{
+ struct ib_device *dev = &dd->verbs_dev.ibdev;
+ int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(qib_attributes); ++i) {
+ ret = device_create_file(&dev->dev, qib_attributes[i]);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * Unregister and remove our files in /sys/class/infiniband.
+ */
+void qib_verbs_unregister_sysfs(struct qib_devdata *dd)
+{
+ struct qib_pportdata *ppd;
+ int i;
+
+ for (i = 0; i < dd->num_pports; i++) {
+ ppd = &dd->pport[i];
+ kobject_put(&ppd->pport_kobj);
+ kobject_put(&ppd->sl2vl_kobj);
+ }
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 38/52] IB/qib: Add qib_twsi.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_twsi.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_twsi.c | 498 ++++++++++++++++++++++++++++++++++
1 files changed, 498 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_twsi.c
diff --git a/drivers/infiniband/hw/qib/qib_twsi.c b/drivers/infiniband/hw/qib/qib_twsi.c
new file mode 100644
index 0000000..6f31ca5
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_twsi.c
@@ -0,0 +1,498 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/delay.h>
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+
+/*
+ * QLogic_IB "Two Wire Serial Interface" driver.
+ * Originally written for a not-quite-i2c serial eeprom, which is
+ * still used on some supported boards. Later boards have added a
+ * variety of other uses, most board-specific, so teh bit-boffing
+ * part has been split off to this file, while the other parts
+ * have been moved to chip-specific files.
+ *
+ * We have also dropped all pretense of fully generic (e.g. pretend
+ * we don't know whether '1' is the higher voltage) interface, as
+ * the restrictions of the generic i2c interface (e.g. no access from
+ * driver itself) make it unsuitable for this use.
+ */
+
+#define READ_CMD 1
+#define WRITE_CMD 0
+
+/**
+ * i2c_wait_for_writes - wait for a write
+ * @dd: the qlogic_ib device
+ *
+ * We use this instead of udelay directly, so we can make sure
+ * that previous register writes have been flushed all the way
+ * to the chip. Since we are delaying anyway, the cost doesn't
+ * hurt, and makes the bit twiddling more regular
+ */
+static void i2c_wait_for_writes(struct qib_devdata *dd)
+{
+ /*
+ * implicit read of EXTStatus is as good as explicit
+ * read of scratch, if all we want to do is flush
+ * writes.
+ */
+ dd->f_gpio_mod(dd, 0, 0, 0);
+ rmb(); /* inlined, so prevent compiler reordering */
+}
+
+/*
+ * QSFP modules are allowed to hold SCL low for 500uSec. Allow twice that
+ * for "almost compliant" modules
+ */
+#define SCL_WAIT_USEC 1000
+
+/* BUF_WAIT is time bus must be free between STOP or ACK and to next START.
+ * Should be 20, but some chips need more.
+ */
+#define TWSI_BUF_WAIT_USEC 60
+
+static void scl_out(struct qib_devdata *dd, u8 bit)
+{
+ u32 mask;
+
+ udelay(1);
+
+ mask = 1UL << dd->gpio_scl_num;
+
+ /* SCL is meant to be bare-drain, so never set "OUT", just DIR */
+ dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask);
+
+ /*
+ * Allow for slow slaves by simple
+ * delay for falling edge, sampling on rise.
+ */
+ if (!bit)
+ udelay(2);
+ else {
+ int rise_usec;
+ for (rise_usec = SCL_WAIT_USEC; rise_usec > 0; rise_usec -= 2) {
+ if (mask & dd->f_gpio_mod(dd, 0, 0, 0))
+ break;
+ udelay(2);
+ }
+ if (rise_usec <= 0)
+ qib_dev_err(dd, "SCL interface stuck low > %d uSec\n",
+ SCL_WAIT_USEC);
+ }
+ i2c_wait_for_writes(dd);
+}
+
+static void sda_out(struct qib_devdata *dd, u8 bit)
+{
+ u32 mask;
+
+ mask = 1UL << dd->gpio_sda_num;
+
+ /* SDA is meant to be bare-drain, so never set "OUT", just DIR */
+ dd->f_gpio_mod(dd, 0, bit ? 0 : mask, mask);
+
+ i2c_wait_for_writes(dd);
+ udelay(2);
+}
+
+static u8 sda_in(struct qib_devdata *dd, int wait)
+{
+ int bnum;
+ u32 read_val, mask;
+
+ bnum = dd->gpio_sda_num;
+ mask = (1UL << bnum);
+ /* SDA is meant to be bare-drain, so never set "OUT", just DIR */
+ dd->f_gpio_mod(dd, 0, 0, mask);
+ read_val = dd->f_gpio_mod(dd, 0, 0, 0);
+ if (wait)
+ i2c_wait_for_writes(dd);
+ return (read_val & mask) >> bnum;
+}
+
+/**
+ * i2c_ackrcv - see if ack following write is true
+ * @dd: the qlogic_ib device
+ */
+static int i2c_ackrcv(struct qib_devdata *dd)
+{
+ u8 ack_received;
+
+ /* AT ENTRY SCL = LOW */
+ /* change direction, ignore data */
+ ack_received = sda_in(dd, 1);
+ scl_out(dd, 1);
+ ack_received = sda_in(dd, 1) == 0;
+ scl_out(dd, 0);
+ return ack_received;
+}
+
+static void stop_cmd(struct qib_devdata *dd);
+
+/**
+ * rd_byte - read a byte, sending STOP on last, else ACK
+ * @dd: the qlogic_ib device
+ *
+ * Returns byte shifted out of device
+ */
+static int rd_byte(struct qib_devdata *dd, int last)
+{
+ int bit_cntr, data;
+
+ data = 0;
+
+ for (bit_cntr = 7; bit_cntr >= 0; --bit_cntr) {
+ data <<= 1;
+ scl_out(dd, 1);
+ data |= sda_in(dd, 0);
+ scl_out(dd, 0);
+ }
+ if (last) {
+ scl_out(dd, 1);
+ stop_cmd(dd);
+ } else {
+ sda_out(dd, 0);
+ scl_out(dd, 1);
+ scl_out(dd, 0);
+ sda_out(dd, 1);
+ }
+ return data;
+}
+
+/**
+ * wr_byte - write a byte, one bit at a time
+ * @dd: the qlogic_ib device
+ * @data: the byte to write
+ *
+ * Returns 0 if we got the following ack, otherwise 1
+ */
+static int wr_byte(struct qib_devdata *dd, u8 data)
+{
+ int bit_cntr;
+ u8 bit;
+
+ for (bit_cntr = 7; bit_cntr >= 0; bit_cntr--) {
+ bit = (data >> bit_cntr) & 1;
+ sda_out(dd, bit);
+ scl_out(dd, 1);
+ scl_out(dd, 0);
+ }
+ return (!i2c_ackrcv(dd)) ? 1 : 0;
+}
+
+/*
+ * issue TWSI start sequence:
+ * (both clock/data high, clock high, data low while clock is high)
+ */
+static void start_seq(struct qib_devdata *dd)
+{
+ sda_out(dd, 1);
+ scl_out(dd, 1);
+ sda_out(dd, 0);
+ udelay(1);
+ scl_out(dd, 0);
+}
+
+/**
+ * stop_seq - transmit the stop sequence
+ * @dd: the qlogic_ib device
+ *
+ * (both clock/data low, clock high, data high while clock is high)
+ */
+static void stop_seq(struct qib_devdata *dd)
+{
+ scl_out(dd, 0);
+ sda_out(dd, 0);
+ scl_out(dd, 1);
+ sda_out(dd, 1);
+}
+
+/**
+ * stop_cmd - transmit the stop condition
+ * @dd: the qlogic_ib device
+ *
+ * (both clock/data low, clock high, data high while clock is high)
+ */
+static void stop_cmd(struct qib_devdata *dd)
+{
+ stop_seq(dd);
+ udelay(TWSI_BUF_WAIT_USEC);
+}
+
+/**
+ * qib_twsi_reset - reset I2C communication
+ * @dd: the qlogic_ib device
+ */
+
+int qib_twsi_reset(struct qib_devdata *dd)
+{
+ int clock_cycles_left = 9;
+ int was_high = 0;
+ u32 pins, mask;
+
+ /* Both SCL and SDA should be high. If not, there
+ * is something wrong.
+ */
+ mask = (1UL << dd->gpio_scl_num) | (1UL << dd->gpio_sda_num);
+
+ /*
+ * Force pins to desired innocuous state.
+ * This is the default power-on state with out=0 and dir=0,
+ * So tri-stated and should be floating high (barring HW problems)
+ */
+ dd->f_gpio_mod(dd, 0, 0, mask);
+
+ /*
+ * Clock nine times to get all listeners into a sane state.
+ * If SDA does not go high at any point, we are wedged.
+ * One vendor recommends then issuing START followed by STOP.
+ * we cannot use our "normal" functions to do that, because
+ * if SCL drops between them, another vendor's part will
+ * wedge, dropping SDA and keeping it low forever, at the end of
+ * the next transaction (even if it was not the device addressed).
+ * So our START and STOP take place with SCL held high.
+ */
+ while (clock_cycles_left--) {
+ scl_out(dd, 0);
+ scl_out(dd, 1);
+ /* Note if SDA is high, but keep clocking to sync slave */
+ was_high |= sda_in(dd, 0);
+ }
+
+ if (was_high) {
+ /*
+ * We saw a high, which we hope means the slave is sync'd.
+ * Issue START, STOP, pause for T_BUF.
+ */
+
+ pins = dd->f_gpio_mod(dd, 0, 0, 0);
+ if ((pins & mask) != mask)
+ qib_dev_err(dd, "GPIO pins not at rest: %d\n",
+ pins & mask);
+ /* Drop SDA to issue START */
+ udelay(1); /* Guarantee .6 uSec setup */
+ sda_out(dd, 0);
+ udelay(1); /* Guarantee .6 uSec hold */
+ /* At this point, SCL is high, SDA low. Raise SDA for STOP */
+ sda_out(dd, 1);
+ udelay(TWSI_BUF_WAIT_USEC);
+ }
+
+ return !was_high;
+}
+
+#define QIB_TWSI_START 0x100
+#define QIB_TWSI_STOP 0x200
+
+/* Write byte to TWSI, optionally prefixed with START or suffixed with
+ * STOP.
+ * returns 0 if OK (ACK received), else != 0
+ */
+static int qib_twsi_wr(struct qib_devdata *dd, int data, int flags)
+{
+ int ret = 1;
+ if (flags & QIB_TWSI_START)
+ start_seq(dd);
+
+ ret = wr_byte(dd, data); /* Leaves SCL low (from i2c_ackrcv()) */
+
+ if (flags & QIB_TWSI_STOP)
+ stop_cmd(dd);
+ return ret;
+}
+
+/* Added functionality for IBA7220-based cards */
+#define QIB_TEMP_DEV 0x98
+
+/*
+ * qib_twsi_blk_rd
+ * Formerly called qib_eeprom_internal_read, and only used for eeprom,
+ * but now the general interface for data transfer from twsi devices.
+ * One vestige of its former role is that it recognizes a device
+ * QIB_TWSI_NO_DEV and does the correct operation for the legacy part,
+ * which responded to all TWSI device codes, interpreting them as
+ * address within device. On all other devices found on board handled by
+ * this driver, the device is followed by a one-byte "address" which selects
+ * the "register" or "offset" within the device from which data should
+ * be read.
+ */
+int qib_twsi_blk_rd(struct qib_devdata *dd, int dev, int addr,
+ void *buffer, int len)
+{
+ int ret;
+ u8 *bp = buffer;
+
+ ret = 1;
+
+ if (dev == QIB_TWSI_NO_DEV) {
+ /* legacy not-really-I2C */
+ addr = (addr << 1) | READ_CMD;
+ ret = qib_twsi_wr(dd, addr, QIB_TWSI_START);
+ } else {
+ /* Actual I2C */
+ ret = qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START);
+ if (ret) {
+ stop_cmd(dd);
+ ret = 1;
+ goto bail;
+ }
+ /*
+ * SFF spec claims we do _not_ stop after the addr
+ * but simply issue a start with the "read" dev-addr.
+ * Since we are implicitely waiting for ACK here,
+ * we need t_buf (nominally 20uSec) before that start,
+ * and cannot rely on the delay built in to the STOP
+ */
+ ret = qib_twsi_wr(dd, addr, 0);
+ udelay(TWSI_BUF_WAIT_USEC);
+
+ if (ret) {
+ qib_dev_err(dd,
+ "Failed to write interface read addr %02X\n",
+ addr);
+ ret = 1;
+ goto bail;
+ }
+ ret = qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START);
+ }
+ if (ret) {
+ stop_cmd(dd);
+ ret = 1;
+ goto bail;
+ }
+
+ /*
+ * block devices keeps clocking data out as long as we ack,
+ * automatically incrementing the address. Some have "pages"
+ * whose boundaries will not be crossed, but the handling
+ * of these is left to the caller, who is in a better
+ * position to know.
+ */
+ while (len-- > 0) {
+ /*
+ * Get and store data, sending ACK if length remaining,
+ * else STOP
+ */
+ *bp++ = rd_byte(dd, !len);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/*
+ * qib_twsi_blk_wr
+ * Formerly called qib_eeprom_internal_write, and only used for eeprom,
+ * but now the general interface for data transfer to twsi devices.
+ * One vestige of its former role is that it recognizes a device
+ * QIB_TWSI_NO_DEV and does the correct operation for the legacy part,
+ * which responded to all TWSI device codes, interpreting them as
+ * address within device. On all other devices found on board handled by
+ * this driver, the device is followed by a one-byte "address" which selects
+ * the "register" or "offset" within the device to which data should
+ * be written.
+ */
+int qib_twsi_blk_wr(struct qib_devdata *dd, int dev, int addr,
+ const void *buffer, int len)
+{
+ int sub_len;
+ const u8 *bp = buffer;
+ int max_wait_time, i;
+ int ret;
+ ret = 1;
+
+ while (len > 0) {
+ if (dev == QIB_TWSI_NO_DEV) {
+ if (qib_twsi_wr(dd, (addr << 1) | WRITE_CMD,
+ QIB_TWSI_START)) {
+ goto failed_write;
+ }
+ } else {
+ /* Real I2C */
+ if (qib_twsi_wr(dd, dev | WRITE_CMD, QIB_TWSI_START))
+ goto failed_write;
+ ret = qib_twsi_wr(dd, addr, 0);
+ if (ret) {
+ qib_dev_err(dd, "Failed to write interface"
+ " write addr %02X\n", addr);
+ goto failed_write;
+ }
+ }
+
+ sub_len = min(len, 4);
+ addr += sub_len;
+ len -= sub_len;
+
+ for (i = 0; i < sub_len; i++)
+ if (qib_twsi_wr(dd, *bp++, 0))
+ goto failed_write;
+
+ stop_cmd(dd);
+
+ /*
+ * Wait for write complete by waiting for a successful
+ * read (the chip replies with a zero after the write
+ * cmd completes, and before it writes to the eeprom.
+ * The startcmd for the read will fail the ack until
+ * the writes have completed. We do this inline to avoid
+ * the debug prints that are in the real read routine
+ * if the startcmd fails.
+ * We also use the proper device address, so it doesn't matter
+ * whether we have real eeprom_dev. Legacy likes any address.
+ */
+ max_wait_time = 100;
+ while (qib_twsi_wr(dd, dev | READ_CMD, QIB_TWSI_START)) {
+ stop_cmd(dd);
+ if (!--max_wait_time)
+ goto failed_write;
+ }
+ /* now read (and ignore) the resulting byte */
+ rd_byte(dd, 1);
+ }
+
+ ret = 0;
+ goto bail;
+
+failed_write:
+ stop_cmd(dd);
+ ret = 1;
+
+bail:
+ return ret;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 39/52] IB/qib: Add qib_tx.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_tx.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_tx.c | 557 ++++++++++++++++++++++++++++++++++++
1 files changed, 557 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_tx.c
diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
new file mode 100644
index 0000000..f7eb1dd
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -0,0 +1,557 @@
+/*
+ * Copyright (c) 2008, 2009, 2010 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+
+#include "qib.h"
+
+static unsigned qib_hol_timeout_ms = 3000;
+module_param_named(hol_timeout_ms, qib_hol_timeout_ms, uint, S_IRUGO);
+MODULE_PARM_DESC(hol_timeout_ms,
+ "duration of user app suspension after link failure");
+
+unsigned qib_sdma_fetch_arb = 1;
+module_param_named(fetch_arb, qib_sdma_fetch_arb, uint, S_IRUGO);
+MODULE_PARM_DESC(fetch_arb, "IBA7220: change SDMA descriptor arbitration");
+
+/**
+ * qib_disarm_piobufs - cancel a range of PIO buffers
+ * @dd: the qlogic_ib device
+ * @first: the first PIO buffer to cancel
+ * @cnt: the number of PIO buffers to cancel
+ *
+ * Cancel a range of PIO buffers. Used at user process close,
+ * in case it died while writing to a PIO buffer.
+ */
+void qib_disarm_piobufs(struct qib_devdata *dd, unsigned first, unsigned cnt)
+{
+ unsigned long flags;
+ unsigned i;
+ unsigned last;
+
+ last = first + cnt;
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ for (i = first; i < last; i++) {
+ __clear_bit(i, dd->pio_need_disarm);
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i));
+ }
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/*
+ * This is called by a user process when it sees the DISARM_BUFS event
+ * bit is set.
+ */
+int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
+{
+ struct qib_devdata *dd = rcd->dd;
+ unsigned i;
+ unsigned last;
+ unsigned n = 0;
+
+ last = rcd->pio_base + rcd->piocnt;
+ /*
+ * Don't need uctxt_lock here, since user has called in to us.
+ * Clear at start in case more interrupts set bits while we
+ * are disarming
+ */
+ if (rcd->user_event_mask) {
+ /*
+ * subctxt_cnt is 0 if not shared, so do base
+ * separately, first, then remaining subctxt, if any
+ */
+ clear_bit(_QIB_EVENT_DISARM_BUFS_BIT, &rcd->user_event_mask[0]);
+ for (i = 1; i < rcd->subctxt_cnt; i++)
+ clear_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[i]);
+ }
+ spin_lock_irq(&dd->pioavail_lock);
+ for (i = rcd->pio_base; i < last; i++) {
+ if (__test_and_clear_bit(i, dd->pio_need_disarm)) {
+ n++;
+ dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i));
+ }
+ }
+ spin_unlock_irq(&dd->pioavail_lock);
+ return 0;
+}
+
+static struct qib_pportdata *is_sdma_buf(struct qib_devdata *dd, unsigned i)
+{
+ struct qib_pportdata *ppd;
+ unsigned pidx;
+
+ for (pidx = 0; pidx < dd->num_pports; pidx++) {
+ ppd = dd->pport + pidx;
+ if (i >= ppd->sdma_state.first_sendbuf &&
+ i < ppd->sdma_state.last_sendbuf)
+ return ppd;
+ }
+ return NULL;
+}
+
+/*
+ * Return true if send buffer is being used by a user context.
+ * Sets _QIB_EVENT_DISARM_BUFS_BIT in user_event_mask as a side effect
+ */
+static int find_ctxt(struct qib_devdata *dd, unsigned bufn)
+{
+ struct qib_ctxtdata *rcd;
+ unsigned ctxt;
+ int ret = 0;
+
+ spin_lock(&dd->uctxt_lock);
+ for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) {
+ rcd = dd->rcd[ctxt];
+ if (!rcd || bufn < rcd->pio_base ||
+ bufn >= rcd->pio_base + rcd->piocnt)
+ continue;
+ if (rcd->user_event_mask) {
+ int i;
+ /*
+ * subctxt_cnt is 0 if not shared, so do base
+ * separately, first, then remaining subctxt, if any
+ */
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[0]);
+ for (i = 1; i < rcd->subctxt_cnt; i++)
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[i]);
+ }
+ ret = 1;
+ break;
+ }
+ spin_unlock(&dd->uctxt_lock);
+
+ return ret;
+}
+
+/*
+ * Disarm a set of send buffers. If the buffer might be actively being
+ * written to, mark the buffer to be disarmed later when it is not being
+ * written to.
+ *
+ * This should only be called from the IRQ error handler.
+ */
+void qib_disarm_piobufs_set(struct qib_devdata *dd, unsigned long *mask,
+ unsigned cnt)
+{
+ struct qib_pportdata *ppd, *pppd[dd->num_pports];
+ unsigned i;
+ unsigned long flags;
+
+ for (i = 0; i < dd->num_pports; i++)
+ pppd[i] = NULL;
+
+ for (i = 0; i < cnt; i++) {
+ int which;
+ if (!test_bit(i, mask))
+ continue;
+ /*
+ * If the buffer is owned by the DMA hardware,
+ * reset the DMA engine.
+ */
+ ppd = is_sdma_buf(dd, i);
+ if (ppd) {
+ pppd[ppd->port] = ppd;
+ continue;
+ }
+ /*
+ * If the kernel is writing the buffer or the buffer is
+ * owned by a user process, we can't clear it yet.
+ */
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ if (test_bit(i, dd->pio_writing) ||
+ (!test_bit(i << 1, dd->pioavailkernel) &&
+ find_ctxt(dd, i))) {
+ __set_bit(i, dd->pio_need_disarm);
+ which = 0;
+ } else {
+ which = 1;
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(i));
+ }
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+ }
+
+ /* do cancel_sends once per port that had sdma piobufs in error */
+ for (i = 0; i < dd->num_pports; i++)
+ if (pppd[i])
+ qib_cancel_sends(pppd[i]);
+}
+
+/**
+ * update_send_bufs - update shadow copy of the PIO availability map
+ * @dd: the qlogic_ib device
+ *
+ * called whenever our local copy indicates we have run out of send buffers
+ */
+static void update_send_bufs(struct qib_devdata *dd)
+{
+ unsigned long flags;
+ unsigned i;
+ const unsigned piobregs = dd->pioavregs;
+
+ /*
+ * If the generation (check) bits have changed, then we update the
+ * busy bit for the corresponding PIO buffer. This algorithm will
+ * modify positions to the value they already have in some cases
+ * (i.e., no change), but it's faster than changing only the bits
+ * that have changed.
+ *
+ * We would like to do this atomicly, to avoid spinlocks in the
+ * critical send path, but that's not really possible, given the
+ * type of changes, and that this routine could be called on
+ * multiple cpu's simultaneously, so we lock in this routine only,
+ * to avoid conflicting updates; all we change is the shadow, and
+ * it's a single 64 bit memory location, so by definition the update
+ * is atomic in terms of what other cpu's can see in testing the
+ * bits. The spin_lock overhead isn't too bad, since it only
+ * happens when all buffers are in use, so only cpu overhead, not
+ * latency or bandwidth is affected.
+ */
+ if (!dd->pioavailregs_dma)
+ return;
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ for (i = 0; i < piobregs; i++) {
+ u64 pchbusy, pchg, piov, pnew;
+
+ piov = le64_to_cpu(dd->pioavailregs_dma[i]);
+ pchg = dd->pioavailkernel[i] &
+ ~(dd->pioavailshadow[i] ^ piov);
+ pchbusy = pchg << QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT;
+ if (pchg && (pchbusy & dd->pioavailshadow[i])) {
+ pnew = dd->pioavailshadow[i] & ~pchbusy;
+ pnew |= piov & pchbusy;
+ dd->pioavailshadow[i] = pnew;
+ }
+ }
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/*
+ * Debugging code and stats updates if no pio buffers available.
+ */
+static noinline void no_send_bufs(struct qib_devdata *dd)
+{
+ dd->upd_pio_shadow = 1;
+
+ /* not atomic, but if we lose a stat count in a while, that's OK */
+ qib_stats.sps_nopiobufs++;
+}
+
+/*
+ * Common code for normal driver send buffer allocation, and reserved
+ * allocation.
+ *
+ * Do appropriate marking as busy, etc.
+ * Returns buffer pointer if one is found, otherwise NULL.
+ */
+u32 __iomem *qib_getsendbuf_range(struct qib_devdata *dd, u32 *pbufnum,
+ u32 first, u32 last)
+{
+ unsigned i, j, updated = 0;
+ unsigned nbufs;
+ unsigned long flags;
+ unsigned long *shadow = dd->pioavailshadow;
+ u32 __iomem *buf;
+
+ if (!(dd->flags & QIB_PRESENT))
+ return NULL;
+
+ nbufs = last - first + 1; /* number in range to check */
+ if (dd->upd_pio_shadow) {
+ /*
+ * Minor optimization. If we had no buffers on last call,
+ * start out by doing the update; continue and do scan even
+ * if no buffers were updated, to be paranoid.
+ */
+ update_send_bufs(dd);
+ updated++;
+ }
+ i = first;
+rescan:
+ /*
+ * While test_and_set_bit() is atomic, we do that and then the
+ * change_bit(), and the pair is not. See if this is the cause
+ * of the remaining armlaunch errors.
+ */
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ for (j = 0; j < nbufs; j++, i++) {
+ if (i > last)
+ i = first;
+ if (__test_and_set_bit((2 * i) + 1, shadow))
+ continue;
+ /* flip generation bit */
+ __change_bit(2 * i, shadow);
+ /* remember that the buffer can be written to now */
+ __set_bit(i, dd->pio_writing);
+ break;
+ }
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+
+ if (j == nbufs) {
+ if (!updated) {
+ /*
+ * First time through; shadow exhausted, but may be
+ * buffers available, try an update and then rescan.
+ */
+ update_send_bufs(dd);
+ updated++;
+ i = first;
+ goto rescan;
+ }
+ no_send_bufs(dd);
+ buf = NULL;
+ } else {
+ if (i < dd->piobcnt2k)
+ buf = (u32 __iomem *)(dd->pio2kbase +
+ i * dd->palign);
+ else
+ buf = (u32 __iomem *)(dd->pio4kbase +
+ (i - dd->piobcnt2k) * dd->align4k);
+ if (pbufnum)
+ *pbufnum = i;
+ dd->upd_pio_shadow = 0;
+ }
+
+ return buf;
+}
+
+/*
+ * Record that the caller is finished writing to the buffer so we don't
+ * disarm it while it is being written and disarm it now if needed.
+ */
+void qib_sendbuf_done(struct qib_devdata *dd, unsigned n)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ __clear_bit(n, dd->pio_writing);
+ if (__test_and_clear_bit(n, dd->pio_need_disarm))
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_DISARM_BUF(n));
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+}
+
+/**
+ * qib_chg_pioavailkernel - change which send buffers are available for kernel
+ * @dd: the qlogic_ib device
+ * @start: the starting send buffer number
+ * @len: the number of send buffers
+ * @avail: true if the buffers are available for kernel use, false otherwise
+ */
+void qib_chg_pioavailkernel(struct qib_devdata *dd, unsigned start,
+ unsigned len, u32 avail, struct qib_ctxtdata *rcd)
+{
+ unsigned long flags;
+ unsigned end;
+ unsigned ostart = start;
+
+ /* There are two bits per send buffer (busy and generation) */
+ start *= 2;
+ end = start + len * 2;
+
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ /* Set or clear the busy bit in the shadow. */
+ while (start < end) {
+ if (avail) {
+ unsigned long dma;
+ int i;
+
+ /*
+ * The BUSY bit will never be set, because we disarm
+ * the user buffers before we hand them back to the
+ * kernel. We do have to make sure the generation
+ * bit is set correctly in shadow, since it could
+ * have changed many times while allocated to user.
+ * We can't use the bitmap functions on the full
+ * dma array because it is always little-endian, so
+ * we have to flip to host-order first.
+ * BITS_PER_LONG is slightly wrong, since it's
+ * always 64 bits per register in chip...
+ * We only work on 64 bit kernels, so that's OK.
+ */
+ i = start / BITS_PER_LONG;
+ __clear_bit(QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT + start,
+ dd->pioavailshadow);
+ dma = (unsigned long)
+ le64_to_cpu(dd->pioavailregs_dma[i]);
+ if (test_bit((QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT +
+ start) % BITS_PER_LONG, &dma))
+ __set_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT +
+ start, dd->pioavailshadow);
+ else
+ __clear_bit(QLOGIC_IB_SENDPIOAVAIL_CHECK_SHIFT
+ + start, dd->pioavailshadow);
+ __set_bit(start, dd->pioavailkernel);
+ } else {
+ __set_bit(start + QLOGIC_IB_SENDPIOAVAIL_BUSY_SHIFT,
+ dd->pioavailshadow);
+ __clear_bit(start, dd->pioavailkernel);
+ }
+ start += 2;
+ }
+
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+
+ dd->f_txchk_change(dd, ostart, len, avail, rcd);
+}
+
+/*
+ * Flush all sends that might be in the ready to send state, as well as any
+ * that are in the process of being sent. Used whenever we need to be
+ * sure the send side is idle. Cleans up all buffer state by canceling
+ * all pio buffers, and issuing an abort, which cleans up anything in the
+ * launch fifo. The cancel is superfluous on some chip versions, but
+ * it's safer to always do it.
+ * PIOAvail bits are updated by the chip as if a normal send had happened.
+ */
+void qib_cancel_sends(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct qib_ctxtdata *rcd;
+ unsigned long flags;
+ unsigned ctxt;
+ unsigned i;
+ unsigned last;
+
+ /*
+ * Tell PSM to disarm buffers again before trying to reuse them.
+ * We need to be sure the rcd doesn't change out from under us
+ * while we do so. We hold the two locks sequentially. We might
+ * needlessly set some need_disarm bits as a result, if the
+ * context is closed after we release the uctxt_lock, but that's
+ * fairly benign, and safer than nesting the locks.
+ */
+ for (ctxt = dd->first_user_ctxt; ctxt < dd->cfgctxts; ctxt++) {
+ spin_lock_irqsave(&dd->uctxt_lock, flags);
+ rcd = dd->rcd[ctxt];
+ if (rcd && rcd->ppd == ppd) {
+ last = rcd->pio_base + rcd->piocnt;
+ if (rcd->user_event_mask) {
+ /*
+ * subctxt_cnt is 0 if not shared, so do base
+ * separately, first, then remaining subctxt,
+ * if any
+ */
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[0]);
+ for (i = 1; i < rcd->subctxt_cnt; i++)
+ set_bit(_QIB_EVENT_DISARM_BUFS_BIT,
+ &rcd->user_event_mask[i]);
+ }
+ i = rcd->pio_base;
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ spin_lock_irqsave(&dd->pioavail_lock, flags);
+ for (; i < last; i++)
+ __set_bit(i, dd->pio_need_disarm);
+ spin_unlock_irqrestore(&dd->pioavail_lock, flags);
+ } else
+ spin_unlock_irqrestore(&dd->uctxt_lock, flags);
+ }
+
+ if (!(dd->flags & QIB_HAS_SEND_DMA))
+ dd->f_sendctrl(ppd, QIB_SENDCTRL_DISARM_ALL |
+ QIB_SENDCTRL_FLUSH);
+}
+
+/*
+ * Force an update of in-memory copy of the pioavail registers, when
+ * needed for any of a variety of reasons.
+ * If already off, this routine is a nop, on the assumption that the
+ * caller (or set of callers) will "do the right thing".
+ * This is a per-device operation, so just the first port.
+ */
+void qib_force_pio_avail_update(struct qib_devdata *dd)
+{
+ dd->f_sendctrl(dd->pport, QIB_SENDCTRL_AVAIL_BLIP);
+}
+
+void qib_hol_down(struct qib_pportdata *ppd)
+{
+ /*
+ * Cancel sends when the link goes DOWN so that we aren't doing it
+ * at INIT when we might be trying to send SMI packets.
+ */
+ if (!(ppd->lflags & QIBL_IB_AUTONEG_INPROG))
+ qib_cancel_sends(ppd);
+}
+
+/*
+ * Link is at INIT.
+ * We start the HoL timer so we can detect stuck packets blocking SMP replies.
+ * Timer may already be running, so use mod_timer, not add_timer.
+ */
+void qib_hol_init(struct qib_pportdata *ppd)
+{
+ if (ppd->hol_state != QIB_HOL_INIT) {
+ ppd->hol_state = QIB_HOL_INIT;
+ mod_timer(&ppd->hol_timer,
+ jiffies + msecs_to_jiffies(qib_hol_timeout_ms));
+ }
+}
+
+/*
+ * Link is up, continue any user processes, and ensure timer
+ * is a nop, if running. Let timer keep running, if set; it
+ * will nop when it sees the link is up.
+ */
+void qib_hol_up(struct qib_pportdata *ppd)
+{
+ ppd->hol_state = QIB_HOL_UP;
+}
+
+/*
+ * This is only called via the timer.
+ */
+void qib_hol_event(unsigned long opaque)
+{
+ struct qib_pportdata *ppd = (struct qib_pportdata *)opaque;
+
+ /* If hardware error, etc, skip. */
+ if (!(ppd->dd->flags & QIB_INITTED))
+ return;
+
+ if (ppd->hol_state != QIB_HOL_UP) {
+ /*
+ * Try to flush sends in case a stuck packet is blocking
+ * SMP replies.
+ */
+ qib_hol_down(ppd);
+ mod_timer(&ppd->hol_timer,
+ jiffies + msecs_to_jiffies(qib_hol_timeout_ms));
+ }
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 40/52] IB/qib: Add qib_uc.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_uc.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_uc.c | 555 ++++++++++++++++++++++++++++++++++++
1 files changed, 555 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_uc.c
diff --git a/drivers/infiniband/hw/qib/qib_uc.c b/drivers/infiniband/hw/qib/qib_uc.c
new file mode 100644
index 0000000..6c7fe78
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_uc.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "qib.h"
+
+/* cut down ridiculously long IB macro names */
+#define OP(x) IB_OPCODE_UC_##x
+
+/**
+ * qib_make_uc_req - construct a request packet (SEND, RDMA write)
+ * @qp: a pointer to the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_uc_req(struct qib_qp *qp)
+{
+ struct qib_other_headers *ohdr;
+ struct qib_swqe *wqe;
+ unsigned long flags;
+ u32 hwords;
+ u32 bth0;
+ u32 len;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ int ret = 0;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_SEND_OK)) {
+ if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= QIB_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
+
+ ohdr = &qp->s_hdr.u.oth;
+ if (qp->remote_ah_attr.ah_flags & IB_AH_GRH)
+ ohdr = &qp->s_hdr.u.l.oth;
+
+ /* header size in 32-bit words LRH+BTH = (8+12)/4. */
+ hwords = 5;
+ bth0 = 0;
+
+ /* Get the next send request. */
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ qp->s_wqe = NULL;
+ switch (qp->s_state) {
+ default:
+ if (!(ib_qib_state_ops[qp->state] &
+ QIB_PROCESS_NEXT_SEND_OK))
+ goto bail;
+ /* Check if send work queue is empty. */
+ if (qp->s_cur == qp->s_head)
+ goto bail;
+ /*
+ * Start a new request.
+ */
+ wqe->psn = qp->s_next_psn;
+ qp->s_psn = qp->s_next_psn;
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.total_len = wqe->length;
+ len = wqe->length;
+ qp->s_len = len;
+ switch (wqe->wr.opcode) {
+ case IB_WR_SEND:
+ case IB_WR_SEND_WITH_IMM:
+ if (len > pmtu) {
+ qp->s_state = OP(SEND_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_ONLY);
+ else {
+ qp->s_state =
+ OP(SEND_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case IB_WR_RDMA_WRITE:
+ case IB_WR_RDMA_WRITE_WITH_IMM:
+ ohdr->u.rc.reth.vaddr =
+ cpu_to_be64(wqe->wr.wr.rdma.remote_addr);
+ ohdr->u.rc.reth.rkey =
+ cpu_to_be32(wqe->wr.wr.rdma.rkey);
+ ohdr->u.rc.reth.length = cpu_to_be32(len);
+ hwords += sizeof(struct ib_reth) / 4;
+ if (len > pmtu) {
+ qp->s_state = OP(RDMA_WRITE_FIRST);
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_ONLY);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE);
+ /* Immediate data comes after the RETH */
+ ohdr->u.rc.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ }
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ default:
+ goto bail;
+ }
+ break;
+
+ case OP(SEND_FIRST):
+ qp->s_state = OP(SEND_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND)
+ qp->s_state = OP(SEND_LAST);
+ else {
+ qp->s_state = OP(SEND_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ }
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ qp->s_state = OP(RDMA_WRITE_MIDDLE);
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ len = qp->s_len;
+ if (len > pmtu) {
+ len = pmtu;
+ break;
+ }
+ if (wqe->wr.opcode == IB_WR_RDMA_WRITE)
+ qp->s_state = OP(RDMA_WRITE_LAST);
+ else {
+ qp->s_state =
+ OP(RDMA_WRITE_LAST_WITH_IMMEDIATE);
+ /* Immediate data comes after the BTH */
+ ohdr->u.imm_data = wqe->wr.ex.imm_data;
+ hwords += 1;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ }
+ qp->s_wqe = wqe;
+ if (++qp->s_cur >= qp->s_size)
+ qp->s_cur = 0;
+ break;
+ }
+ qp->s_len -= len;
+ qp->s_hdrwords = hwords;
+ qp->s_cur_sge = &qp->s_sge;
+ qp->s_cur_size = len;
+ qib_make_ruc_header(qp, ohdr, bth0 | (qp->s_state << 24),
+ qp->s_next_psn++ & QIB_PSN_MASK);
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * qib_uc_rcv - handle an incoming UC packet
+ * @ibp: the port the packet came in on
+ * @hdr: the header of the packet
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the length of the packet
+ * @qp: the QP for this packet.
+ *
+ * This is called from qib_qp_rcv() to process an incoming UC packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+ struct qib_other_headers *ohdr;
+ unsigned long flags;
+ u32 opcode;
+ u32 hdrsize;
+ u32 psn;
+ u32 pad;
+ struct ib_wc wc;
+ u32 pmtu = ib_mtu_enum_to_int(qp->path_mtu);
+ struct ib_reth *reth;
+ int ret;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12; /* LRH + BTH */
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12; /* LRH + GRH + BTH */
+ }
+
+ opcode = be32_to_cpu(ohdr->bth[0]);
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qib_ruc_check_hdr(ibp, hdr, has_grh, qp, opcode))
+ goto sunlock;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ psn = be32_to_cpu(ohdr->bth[2]);
+ opcode >>= 24;
+ memset(&wc, 0, sizeof wc);
+
+ /* Prevent simultaneous processing after APM on different CPUs */
+ spin_lock(&qp->r_lock);
+
+ /* Compare the PSN verses the expected PSN. */
+ if (unlikely(qib_cmp24(psn, qp->r_psn) != 0)) {
+ /*
+ * Handle a sequence error.
+ * Silently drop any current message.
+ */
+ qp->r_psn = psn;
+inv:
+ if (qp->r_state == OP(SEND_FIRST) ||
+ qp->r_state == OP(SEND_MIDDLE)) {
+ set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+ qp->r_sge.num_sge = 0;
+ } else
+ while (qp->r_sge.num_sge) {
+ atomic_dec(&qp->r_sge.sge.mr->refcount);
+ if (--qp->r_sge.num_sge)
+ qp->r_sge.sge = *qp->r_sge.sg_list++;
+ }
+ qp->r_state = OP(SEND_LAST);
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+ goto send_first;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE):
+ goto rdma_first;
+
+ default:
+ goto drop;
+ }
+ }
+
+ /* Check for opcode sequence errors. */
+ switch (qp->r_state) {
+ case OP(SEND_FIRST):
+ case OP(SEND_MIDDLE):
+ if (opcode == OP(SEND_MIDDLE) ||
+ opcode == OP(SEND_LAST) ||
+ opcode == OP(SEND_LAST_WITH_IMMEDIATE))
+ break;
+ goto inv;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_MIDDLE):
+ if (opcode == OP(RDMA_WRITE_MIDDLE) ||
+ opcode == OP(RDMA_WRITE_LAST) ||
+ opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE))
+ break;
+ goto inv;
+
+ default:
+ if (opcode == OP(SEND_FIRST) ||
+ opcode == OP(SEND_ONLY) ||
+ opcode == OP(SEND_ONLY_WITH_IMMEDIATE) ||
+ opcode == OP(RDMA_WRITE_FIRST) ||
+ opcode == OP(RDMA_WRITE_ONLY) ||
+ opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ break;
+ goto inv;
+ }
+
+ if (qp->state == IB_QPS_RTR && !(qp->r_flags & QIB_R_COMM_EST)) {
+ qp->r_flags |= QIB_R_COMM_EST;
+ if (qp->ibqp.event_handler) {
+ struct ib_event ev;
+
+ ev.device = qp->ibqp.device;
+ ev.element.qp = &qp->ibqp;
+ ev.event = IB_EVENT_COMM_EST;
+ qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
+ }
+ }
+
+ /* OK, process the packet. */
+ switch (opcode) {
+ case OP(SEND_FIRST):
+ case OP(SEND_ONLY):
+ case OP(SEND_ONLY_WITH_IMMEDIATE):
+send_first:
+ if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+ qp->r_sge = qp->s_rdma_read_sge;
+ else {
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto drop;
+ /*
+ * qp->s_rdma_read_sge will be the owner
+ * of the mr references.
+ */
+ qp->s_rdma_read_sge = qp->r_sge;
+ }
+ qp->r_rcv_len = 0;
+ if (opcode == OP(SEND_ONLY))
+ goto send_last;
+ else if (opcode == OP(SEND_ONLY_WITH_IMMEDIATE))
+ goto send_last_imm;
+ /* FALLTHROUGH */
+ case OP(SEND_MIDDLE):
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto rewind;
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len))
+ goto rewind;
+ qib_copy_sge(&qp->r_sge, data, pmtu, 0);
+ break;
+
+ case OP(SEND_LAST_WITH_IMMEDIATE):
+send_last_imm:
+ wc.ex.imm_data = ohdr->u.imm_data;
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ /* FALLTHROUGH */
+ case OP(SEND_LAST):
+send_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto rewind;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ wc.byte_len = tlen + qp->r_rcv_len;
+ if (unlikely(wc.byte_len > qp->r_len))
+ goto rewind;
+ wc.opcode = IB_WC_RECV;
+last_imm:
+ qib_copy_sge(&qp->r_sge, data, tlen, 0);
+ while (qp->s_rdma_read_sge.num_sge) {
+ atomic_dec(&qp->s_rdma_read_sge.sge.mr->refcount);
+ if (--qp->s_rdma_read_sge.num_sge)
+ qp->s_rdma_read_sge.sge =
+ *qp->s_rdma_read_sge.sg_list++;
+ }
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = qp->remote_qpn;
+ wc.slid = qp->remote_ah_attr.dlid;
+ wc.sl = qp->remote_ah_attr.sl;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+ break;
+
+ case OP(RDMA_WRITE_FIRST):
+ case OP(RDMA_WRITE_ONLY):
+ case OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE): /* consume RWQE */
+rdma_first:
+ if (unlikely(!(qp->qp_access_flags &
+ IB_ACCESS_REMOTE_WRITE))) {
+ goto drop;
+ }
+ reth = &ohdr->u.rc.reth;
+ hdrsize += sizeof(*reth);
+ qp->r_len = be32_to_cpu(reth->length);
+ qp->r_rcv_len = 0;
+ qp->r_sge.sg_list = NULL;
+ if (qp->r_len != 0) {
+ u32 rkey = be32_to_cpu(reth->rkey);
+ u64 vaddr = be64_to_cpu(reth->vaddr);
+ int ok;
+
+ /* Check rkey */
+ ok = qib_rkey_ok(qp, &qp->r_sge.sge, qp->r_len,
+ vaddr, rkey, IB_ACCESS_REMOTE_WRITE);
+ if (unlikely(!ok))
+ goto drop;
+ qp->r_sge.num_sge = 1;
+ } else {
+ qp->r_sge.num_sge = 0;
+ qp->r_sge.sge.mr = NULL;
+ qp->r_sge.sge.vaddr = NULL;
+ qp->r_sge.sge.length = 0;
+ qp->r_sge.sge.sge_length = 0;
+ }
+ if (opcode == OP(RDMA_WRITE_ONLY))
+ goto rdma_last;
+ else if (opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+ goto rdma_last_imm;
+ /* FALLTHROUGH */
+ case OP(RDMA_WRITE_MIDDLE):
+ /* Check for invalid length PMTU or posted rwqe len. */
+ if (unlikely(tlen != (hdrsize + pmtu + 4)))
+ goto drop;
+ qp->r_rcv_len += pmtu;
+ if (unlikely(qp->r_rcv_len > qp->r_len))
+ goto drop;
+ qib_copy_sge(&qp->r_sge, data, pmtu, 1);
+ break;
+
+ case OP(RDMA_WRITE_LAST_WITH_IMMEDIATE):
+rdma_last_imm:
+ wc.ex.imm_data = ohdr->u.imm_data;
+ hdrsize += 4;
+ wc.wc_flags = IB_WC_WITH_IMM;
+
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto drop;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
+ goto drop;
+ if (test_and_clear_bit(QIB_R_REWIND_SGE, &qp->r_aflags))
+ while (qp->s_rdma_read_sge.num_sge) {
+ atomic_dec(&qp->s_rdma_read_sge.sge.mr->
+ refcount);
+ if (--qp->s_rdma_read_sge.num_sge)
+ qp->s_rdma_read_sge.sge =
+ *qp->s_rdma_read_sge.sg_list++;
+ }
+ else {
+ ret = qib_get_rwqe(qp, 1);
+ if (ret < 0)
+ goto op_err;
+ if (!ret)
+ goto drop;
+ }
+ wc.byte_len = qp->r_len;
+ wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
+ goto last_imm;
+
+ case OP(RDMA_WRITE_LAST):
+rdma_last:
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ /* Check for invalid length. */
+ /* XXX LAST len should be >= 1 */
+ if (unlikely(tlen < (hdrsize + pad + 4)))
+ goto drop;
+ /* Don't count the CRC. */
+ tlen -= (hdrsize + pad + 4);
+ if (unlikely(tlen + qp->r_rcv_len != qp->r_len))
+ goto drop;
+ qib_copy_sge(&qp->r_sge, data, tlen, 1);
+ while (qp->r_sge.num_sge) {
+ atomic_dec(&qp->r_sge.sge.mr->refcount);
+ if (--qp->r_sge.num_sge)
+ qp->r_sge.sge = *qp->r_sge.sg_list++;
+ }
+ break;
+
+ default:
+ /* Drop packet for unknown opcodes. */
+ goto drop;
+ }
+ qp->r_psn++;
+ qp->r_state = opcode;
+ spin_unlock(&qp->r_lock);
+ return;
+
+rewind:
+ set_bit(QIB_R_REWIND_SGE, &qp->r_aflags);
+ qp->r_sge.num_sge = 0;
+drop:
+ ibp->n_pkt_drops++;
+ spin_unlock(&qp->r_lock);
+ return;
+
+op_err:
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ spin_unlock(&qp->r_lock);
+ return;
+
+sunlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 41/52] IB/qib: Add qib_ud.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_ud.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_ud.c | 607 ++++++++++++++++++++++++++++++++++++
1 files changed, 607 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_ud.c
diff --git a/drivers/infiniband/hw/qib/qib_ud.c b/drivers/infiniband/hw/qib/qib_ud.c
new file mode 100644
index 0000000..c838cda
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_ud.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_smi.h>
+
+#include "qib.h"
+#include "qib_mad.h"
+
+/**
+ * qib_ud_loopback - handle send on loopback QPs
+ * @sqp: the sending QP
+ * @swqe: the send work request
+ *
+ * This is called from qib_make_ud_req() to forward a WQE addressed
+ * to the same HCA.
+ * Note that the receive interrupt handler may be calling qib_ud_rcv()
+ * while this is being called.
+ */
+static void qib_ud_loopback(struct qib_qp *sqp, struct qib_swqe *swqe)
+{
+ struct qib_ibport *ibp = to_iport(sqp->ibqp.device, sqp->port_num);
+ struct qib_pportdata *ppd;
+ struct qib_qp *qp;
+ struct ib_ah_attr *ah_attr;
+ unsigned long flags;
+ struct qib_sge_state ssge;
+ struct qib_sge *sge;
+ struct ib_wc wc;
+ u32 length;
+
+ qp = qib_lookup_qpn(ibp, swqe->wr.wr.ud.remote_qpn);
+ if (!qp) {
+ ibp->n_pkt_drops++;
+ return;
+ }
+ if (qp->ibqp.qp_type != sqp->ibqp.qp_type ||
+ !(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+ ibp->n_pkt_drops++;
+ goto drop;
+ }
+
+ ah_attr = &to_iah(swqe->wr.wr.ud.ah)->attr;
+ ppd = ppd_from_ibp(ibp);
+
+ if (qp->ibqp.qp_num > 1) {
+ u16 pkey1;
+ u16 pkey2;
+ u16 lid;
+
+ pkey1 = qib_get_pkey(ibp, sqp->s_pkey_index);
+ pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
+ if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
+ lid = ppd->lid | (ah_attr->src_path_bits &
+ ((1 << ppd->lmc) - 1));
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY, pkey1,
+ ah_attr->sl,
+ sqp->ibqp.qp_num, qp->ibqp.qp_num,
+ cpu_to_be16(lid),
+ cpu_to_be16(ah_attr->dlid));
+ goto drop;
+ }
+ }
+
+ /*
+ * Check that the qkey matches (except for QP0, see 9.6.1.4.1).
+ * Qkeys with the high order bit set mean use the
+ * qkey from the QP context instead of the WR (see 10.2.5).
+ */
+ if (qp->ibqp.qp_num) {
+ u32 qkey;
+
+ qkey = (int)swqe->wr.wr.ud.remote_qkey < 0 ?
+ sqp->qkey : swqe->wr.wr.ud.remote_qkey;
+ if (unlikely(qkey != qp->qkey)) {
+ u16 lid;
+
+ lid = ppd->lid | (ah_attr->src_path_bits &
+ ((1 << ppd->lmc) - 1));
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
+ ah_attr->sl,
+ sqp->ibqp.qp_num, qp->ibqp.qp_num,
+ cpu_to_be16(lid),
+ cpu_to_be16(ah_attr->dlid));
+ goto drop;
+ }
+ }
+
+ /*
+ * A GRH is expected to preceed the data even if not
+ * present on the wire.
+ */
+ length = swqe->length;
+ memset(&wc, 0, sizeof wc);
+ wc.byte_len = length + sizeof(struct ib_grh);
+
+ if (swqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ wc.wc_flags = IB_WC_WITH_IMM;
+ wc.ex.imm_data = swqe->wr.ex.imm_data;
+ }
+
+ spin_lock_irqsave(&qp->r_lock, flags);
+
+ /*
+ * Get the next work request entry to find where to put the data.
+ */
+ if (qp->r_flags & QIB_R_REUSE_SGE)
+ qp->r_flags &= ~QIB_R_REUSE_SGE;
+ else {
+ int ret;
+
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0) {
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ goto bail_unlock;
+ }
+ if (!ret) {
+ if (qp->ibqp.qp_num == 0)
+ ibp->n_vl15_dropped++;
+ goto bail_unlock;
+ }
+ }
+ /* Silently drop packets which are too big. */
+ if (unlikely(wc.byte_len > qp->r_len)) {
+ qp->r_flags |= QIB_R_REUSE_SGE;
+ ibp->n_pkt_drops++;
+ goto bail_unlock;
+ }
+
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ qib_copy_sge(&qp->r_sge, &ah_attr->grh,
+ sizeof(struct ib_grh), 1);
+ wc.wc_flags |= IB_WC_GRH;
+ } else
+ qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ ssge.sg_list = swqe->sg_list + 1;
+ ssge.sge = *swqe->sg_list;
+ ssge.num_sge = swqe->wr.num_sge;
+ sge = &ssge.sge;
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ qib_copy_sge(&qp->r_sge, sge->vaddr, len, 1);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ssge.num_sge)
+ *sge = *ssge.sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ length -= len;
+ }
+ while (qp->r_sge.num_sge) {
+ atomic_dec(&qp->r_sge.sge.mr->refcount);
+ if (--qp->r_sge.num_sge)
+ qp->r_sge.sge = *qp->r_sge.sg_list++;
+ }
+ if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ goto bail_unlock;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = IB_WC_RECV;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = sqp->ibqp.qp_num;
+ wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
+ swqe->wr.wr.ud.pkey_index : 0;
+ wc.slid = ppd->lid | (ah_attr->src_path_bits & ((1 << ppd->lmc) - 1));
+ wc.sl = ah_attr->sl;
+ wc.dlid_path_bits = ah_attr->dlid & ((1 << ppd->lmc) - 1);
+ wc.port_num = qp->port_num;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ swqe->wr.send_flags & IB_SEND_SOLICITED);
+ ibp->n_loop_pkts++;
+bail_unlock:
+ spin_unlock_irqrestore(&qp->r_lock, flags);
+drop:
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+}
+
+/**
+ * qib_make_ud_req - construct a UD request packet
+ * @qp: the QP
+ *
+ * Return 1 if constructed; otherwise, return 0.
+ */
+int qib_make_ud_req(struct qib_qp *qp)
+{
+ struct qib_other_headers *ohdr;
+ struct ib_ah_attr *ah_attr;
+ struct qib_pportdata *ppd;
+ struct qib_ibport *ibp;
+ struct qib_swqe *wqe;
+ unsigned long flags;
+ u32 nwords;
+ u32 extra_bytes;
+ u32 bth0;
+ u16 lrh0;
+ u16 lid;
+ int ret = 0;
+ int next_cur;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_NEXT_SEND_OK)) {
+ if (!(ib_qib_state_ops[qp->state] & QIB_FLUSH_SEND))
+ goto bail;
+ /* We are in the error state, flush the work request. */
+ if (qp->s_last == qp->s_head)
+ goto bail;
+ /* If DMAs are in progress, we can't flush immediately. */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= QIB_S_WAIT_DMA;
+ goto bail;
+ }
+ wqe = get_swqe_ptr(qp, qp->s_last);
+ qib_send_complete(qp, wqe, IB_WC_WR_FLUSH_ERR);
+ goto done;
+ }
+
+ if (qp->s_cur == qp->s_head)
+ goto bail;
+
+ wqe = get_swqe_ptr(qp, qp->s_cur);
+ next_cur = qp->s_cur + 1;
+ if (next_cur >= qp->s_size)
+ next_cur = 0;
+
+ /* Construct the header. */
+ ibp = to_iport(qp->ibqp.device, qp->port_num);
+ ppd = ppd_from_ibp(ibp);
+ ah_attr = &to_iah(wqe->wr.wr.ud.ah)->attr;
+ if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE) {
+ if (ah_attr->dlid != QIB_PERMISSIVE_LID)
+ ibp->n_multicast_xmit++;
+ else
+ ibp->n_unicast_xmit++;
+ } else {
+ ibp->n_unicast_xmit++;
+ lid = ah_attr->dlid & ~((1 << ppd->lmc) - 1);
+ if (unlikely(lid == ppd->lid)) {
+ /*
+ * If DMAs are in progress, we can't generate
+ * a completion for the loopback packet since
+ * it would be out of order.
+ * XXX Instead of waiting, we could queue a
+ * zero length descriptor so we get a callback.
+ */
+ if (atomic_read(&qp->s_dma_busy)) {
+ qp->s_flags |= QIB_S_WAIT_DMA;
+ goto bail;
+ }
+ qp->s_cur = next_cur;
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ qib_ud_loopback(qp, wqe);
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qib_send_complete(qp, wqe, IB_WC_SUCCESS);
+ goto done;
+ }
+ }
+
+ qp->s_cur = next_cur;
+ extra_bytes = -wqe->length & 3;
+ nwords = (wqe->length + extra_bytes) >> 2;
+
+ /* header size in 32-bit words LRH+BTH+DETH = (8+12+8)/4. */
+ qp->s_hdrwords = 7;
+ qp->s_cur_size = wqe->length;
+ qp->s_cur_sge = &qp->s_sge;
+ qp->s_srate = ah_attr->static_rate;
+ qp->s_wqe = wqe;
+ qp->s_sge.sge = wqe->sg_list[0];
+ qp->s_sge.sg_list = wqe->sg_list + 1;
+ qp->s_sge.num_sge = wqe->wr.num_sge;
+ qp->s_sge.total_len = wqe->length;
+
+ if (ah_attr->ah_flags & IB_AH_GRH) {
+ /* Header size in 32-bit words. */
+ qp->s_hdrwords += qib_make_grh(ibp, &qp->s_hdr.u.l.grh,
+ &ah_attr->grh,
+ qp->s_hdrwords, nwords);
+ lrh0 = QIB_LRH_GRH;
+ ohdr = &qp->s_hdr.u.l.oth;
+ /*
+ * Don't worry about sending to locally attached multicast
+ * QPs. It is unspecified by the spec. what happens.
+ */
+ } else {
+ /* Header size in 32-bit words. */
+ lrh0 = QIB_LRH_BTH;
+ ohdr = &qp->s_hdr.u.oth;
+ }
+ if (wqe->wr.opcode == IB_WR_SEND_WITH_IMM) {
+ qp->s_hdrwords++;
+ ohdr->u.ud.imm_data = wqe->wr.ex.imm_data;
+ bth0 = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE << 24;
+ } else
+ bth0 = IB_OPCODE_UD_SEND_ONLY << 24;
+ lrh0 |= ah_attr->sl << 4;
+ if (qp->ibqp.qp_type == IB_QPT_SMI)
+ lrh0 |= 0xF000; /* Set VL (see ch. 13.5.3.1) */
+ else
+ lrh0 |= ibp->sl_to_vl[ah_attr->sl] << 12;
+ qp->s_hdr.lrh[0] = cpu_to_be16(lrh0);
+ qp->s_hdr.lrh[1] = cpu_to_be16(ah_attr->dlid); /* DEST LID */
+ qp->s_hdr.lrh[2] = cpu_to_be16(qp->s_hdrwords + nwords + SIZE_OF_CRC);
+ lid = ppd->lid;
+ if (lid) {
+ lid |= ah_attr->src_path_bits & ((1 << ppd->lmc) - 1);
+ qp->s_hdr.lrh[3] = cpu_to_be16(lid);
+ } else
+ qp->s_hdr.lrh[3] = IB_LID_PERMISSIVE;
+ if (wqe->wr.send_flags & IB_SEND_SOLICITED)
+ bth0 |= IB_BTH_SOLICITED;
+ bth0 |= extra_bytes << 20;
+ bth0 |= qp->ibqp.qp_type == IB_QPT_SMI ? QIB_DEFAULT_P_KEY :
+ qib_get_pkey(ibp, qp->ibqp.qp_type == IB_QPT_GSI ?
+ wqe->wr.wr.ud.pkey_index : qp->s_pkey_index);
+ ohdr->bth[0] = cpu_to_be32(bth0);
+ /*
+ * Use the multicast QP if the destination LID is a multicast LID.
+ */
+ ohdr->bth[1] = ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
+ ah_attr->dlid != QIB_PERMISSIVE_LID ?
+ cpu_to_be32(QIB_MULTICAST_QPN) :
+ cpu_to_be32(wqe->wr.wr.ud.remote_qpn);
+ ohdr->bth[2] = cpu_to_be32(qp->s_next_psn++ & QIB_PSN_MASK);
+ /*
+ * Qkeys with the high order bit set mean use the
+ * qkey from the QP context instead of the WR (see 10.2.5).
+ */
+ ohdr->u.ud.deth[0] = cpu_to_be32((int)wqe->wr.wr.ud.remote_qkey < 0 ?
+ qp->qkey : wqe->wr.wr.ud.remote_qkey);
+ ohdr->u.ud.deth[1] = cpu_to_be32(qp->ibqp.qp_num);
+
+done:
+ ret = 1;
+ goto unlock;
+
+bail:
+ qp->s_flags &= ~QIB_S_BUSY;
+unlock:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+static unsigned qib_lookup_pkey(struct qib_ibport *ibp, u16 pkey)
+{
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = ppd->dd;
+ unsigned ctxt = ppd->hw_pidx;
+ unsigned i;
+
+ pkey &= 0x7fff; /* remove limited/full membership bit */
+
+ for (i = 0; i < ARRAY_SIZE(dd->rcd[ctxt]->pkeys); ++i)
+ if ((dd->rcd[ctxt]->pkeys[i] & 0x7fff) == pkey)
+ return i;
+
+ /*
+ * Should not get here, this means hardware failed to validate pkeys.
+ * Punt and return index 0.
+ */
+ return 0;
+}
+
+/**
+ * qib_ud_rcv - receive an incoming UD packet
+ * @ibp: the port the packet came in on
+ * @hdr: the packet header
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP the packet came on
+ *
+ * This is called from qib_qp_rcv() to process an incoming UD packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+ struct qib_other_headers *ohdr;
+ int opcode;
+ u32 hdrsize;
+ u32 pad;
+ struct ib_wc wc;
+ u32 qkey;
+ u32 src_qp;
+ u16 dlid;
+
+ /* Check for GRH */
+ if (!has_grh) {
+ ohdr = &hdr->u.oth;
+ hdrsize = 8 + 12 + 8; /* LRH + BTH + DETH */
+ } else {
+ ohdr = &hdr->u.l.oth;
+ hdrsize = 8 + 40 + 12 + 8; /* LRH + GRH + BTH + DETH */
+ }
+ qkey = be32_to_cpu(ohdr->u.ud.deth[0]);
+ src_qp = be32_to_cpu(ohdr->u.ud.deth[1]) & QIB_QPN_MASK;
+
+ /* Get the number of bytes the message was padded by. */
+ pad = (be32_to_cpu(ohdr->bth[0]) >> 20) & 3;
+ if (unlikely(tlen < (hdrsize + pad + 4))) {
+ /* Drop incomplete packets. */
+ ibp->n_pkt_drops++;
+ goto bail;
+ }
+ tlen -= hdrsize + pad + 4;
+
+ /*
+ * Check that the permissive LID is only used on QP0
+ * and the QKEY matches (see 9.6.1.4.1 and 9.6.1.5.1).
+ */
+ if (qp->ibqp.qp_num) {
+ if (unlikely(hdr->lrh[1] == IB_LID_PERMISSIVE ||
+ hdr->lrh[3] == IB_LID_PERMISSIVE)) {
+ ibp->n_pkt_drops++;
+ goto bail;
+ }
+ if (qp->ibqp.qp_num > 1) {
+ u16 pkey1, pkey2;
+
+ pkey1 = be32_to_cpu(ohdr->bth[0]);
+ pkey2 = qib_get_pkey(ibp, qp->s_pkey_index);
+ if (unlikely(!qib_pkey_ok(pkey1, pkey2))) {
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_PKEY,
+ pkey1,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) &
+ 0xF,
+ src_qp, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
+ goto bail;
+ }
+ }
+ if (unlikely(qkey != qp->qkey)) {
+ qib_bad_pqkey(ibp, IB_NOTICE_TRAP_BAD_QKEY, qkey,
+ (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF,
+ src_qp, qp->ibqp.qp_num,
+ hdr->lrh[3], hdr->lrh[1]);
+ goto bail;
+ }
+ /* Drop invalid MAD packets (see 13.5.3.1). */
+ if (unlikely(qp->ibqp.qp_num == 1 &&
+ (tlen != 256 ||
+ (be16_to_cpu(hdr->lrh[0]) >> 12) == 15))) {
+ ibp->n_pkt_drops++;
+ goto bail;
+ }
+ } else {
+ struct ib_smp *smp;
+
+ /* Drop invalid MAD packets (see 13.5.3.1). */
+ if (tlen != 256 || (be16_to_cpu(hdr->lrh[0]) >> 12) != 15) {
+ ibp->n_pkt_drops++;
+ goto bail;
+ }
+ smp = (struct ib_smp *) data;
+ if ((hdr->lrh[1] == IB_LID_PERMISSIVE ||
+ hdr->lrh[3] == IB_LID_PERMISSIVE) &&
+ smp->mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
+ ibp->n_pkt_drops++;
+ goto bail;
+ }
+ }
+
+ /*
+ * The opcode is in the low byte when its in network order
+ * (top byte when in host order).
+ */
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ if (qp->ibqp.qp_num > 1 &&
+ opcode == IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE) {
+ wc.ex.imm_data = ohdr->u.ud.imm_data;
+ wc.wc_flags = IB_WC_WITH_IMM;
+ hdrsize += sizeof(u32);
+ } else if (opcode == IB_OPCODE_UD_SEND_ONLY) {
+ wc.ex.imm_data = 0;
+ wc.wc_flags = 0;
+ } else {
+ ibp->n_pkt_drops++;
+ goto bail;
+ }
+
+ /*
+ * A GRH is expected to preceed the data even if not
+ * present on the wire.
+ */
+ wc.byte_len = tlen + sizeof(struct ib_grh);
+
+ /*
+ * We need to serialize getting a receive work queue entry and
+ * generating a completion for it against QPs sending to this QP
+ * locally.
+ */
+ spin_lock(&qp->r_lock);
+
+ /*
+ * Get the next work request entry to find where to put the data.
+ */
+ if (qp->r_flags & QIB_R_REUSE_SGE)
+ qp->r_flags &= ~QIB_R_REUSE_SGE;
+ else {
+ int ret;
+
+ ret = qib_get_rwqe(qp, 0);
+ if (ret < 0) {
+ qib_rc_error(qp, IB_WC_LOC_QP_OP_ERR);
+ goto bail_unlock;
+ }
+ if (!ret) {
+ if (qp->ibqp.qp_num == 0)
+ ibp->n_vl15_dropped++;
+ goto bail_unlock;
+ }
+ }
+ /* Silently drop packets which are too big. */
+ if (unlikely(wc.byte_len > qp->r_len)) {
+ qp->r_flags |= QIB_R_REUSE_SGE;
+ ibp->n_pkt_drops++;
+ goto bail_unlock;
+ }
+ if (has_grh) {
+ qib_copy_sge(&qp->r_sge, &hdr->u.l.grh,
+ sizeof(struct ib_grh), 1);
+ wc.wc_flags |= IB_WC_GRH;
+ } else
+ qib_skip_sge(&qp->r_sge, sizeof(struct ib_grh), 1);
+ qib_copy_sge(&qp->r_sge, data, wc.byte_len - sizeof(struct ib_grh), 1);
+ while (qp->r_sge.num_sge) {
+ atomic_dec(&qp->r_sge.sge.mr->refcount);
+ if (--qp->r_sge.num_sge)
+ qp->r_sge.sge = *qp->r_sge.sg_list++;
+ }
+ if (!test_and_clear_bit(QIB_R_WRID_VALID, &qp->r_aflags))
+ goto bail_unlock;
+ wc.wr_id = qp->r_wr_id;
+ wc.status = IB_WC_SUCCESS;
+ wc.opcode = IB_WC_RECV;
+ wc.vendor_err = 0;
+ wc.qp = &qp->ibqp;
+ wc.src_qp = src_qp;
+ wc.pkey_index = qp->ibqp.qp_type == IB_QPT_GSI ?
+ qib_lookup_pkey(ibp, be32_to_cpu(ohdr->bth[0])) : 0;
+ wc.slid = be16_to_cpu(hdr->lrh[3]);
+ wc.sl = (be16_to_cpu(hdr->lrh[0]) >> 4) & 0xF;
+ dlid = be16_to_cpu(hdr->lrh[1]);
+ /*
+ * Save the LMC lower bits if the destination LID is a unicast LID.
+ */
+ wc.dlid_path_bits = dlid >= QIB_MULTICAST_LID_BASE ? 0 :
+ dlid & ((1 << ppd_from_ibp(ibp)->lmc) - 1);
+ wc.port_num = qp->port_num;
+ /* Signal completion event if the solicited bit is set. */
+ qib_cq_enter(to_icq(qp->ibqp.recv_cq), &wc,
+ (ohdr->bth[0] &
+ cpu_to_be32(IB_BTH_SOLICITED)) != 0);
+bail_unlock:
+ spin_unlock(&qp->r_lock);
+bail:;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 42/52] IB/qib: Add qib_user_pages.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_user_pages.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_user_pages.c | 157 ++++++++++++++++++++++++++++
1 files changed, 157 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_user_pages.c
diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
new file mode 100644
index 0000000..d7a26c1
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/mm.h>
+#include <linux/device.h>
+
+#include "qib.h"
+
+static void __qib_release_user_pages(struct page **p, size_t num_pages,
+ int dirty)
+{
+ size_t i;
+
+ for (i = 0; i < num_pages; i++) {
+ if (dirty)
+ set_page_dirty_lock(p[i]);
+ put_page(p[i]);
+ }
+}
+
+/*
+ * Call with current->mm->mmap_sem held.
+ */
+static int __get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p, struct vm_area_struct **vma)
+{
+ unsigned long lock_limit;
+ size_t got;
+ int ret;
+
+ lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+
+ if (num_pages > lock_limit && !capable(CAP_IPC_LOCK)) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ for (got = 0; got < num_pages; got += ret) {
+ ret = get_user_pages(current, current->mm,
+ start_page + got * PAGE_SIZE,
+ num_pages - got, 1, 1,
+ p + got, vma);
+ if (ret < 0)
+ goto bail_release;
+ }
+
+ current->mm->locked_vm += num_pages;
+
+ ret = 0;
+ goto bail;
+
+bail_release:
+ __qib_release_user_pages(p, got, 0);
+bail:
+ return ret;
+}
+
+/**
+ * qib_map_page - a safety wrapper around pci_map_page()
+ *
+ * A dma_addr of all 0's is interpreted by the chip as "disabled".
+ * Unfortunately, it can also be a valid dma_addr returned on some
+ * architectures.
+ *
+ * The powerpc iommu assigns dma_addrs in ascending order, so we don't
+ * have to bother with retries or mapping a dummy page to insure we
+ * don't just get the same mapping again.
+ *
+ * I'm sure we won't be so lucky with other iommu's, so FIXME.
+ */
+dma_addr_t qib_map_page(struct pci_dev *hwdev, struct page *page,
+ unsigned long offset, size_t size, int direction)
+{
+ dma_addr_t phys;
+
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+
+ if (phys == 0) {
+ pci_unmap_page(hwdev, phys, size, direction);
+ phys = pci_map_page(hwdev, page, offset, size, direction);
+ /*
+ * FIXME: If we get 0 again, we should keep this page,
+ * map another, then free the 0 page.
+ */
+ }
+
+ return phys;
+}
+
+/**
+ * qib_get_user_pages - lock user pages into memory
+ * @start_page: the start page
+ * @num_pages: the number of pages
+ * @p: the output page structures
+ *
+ * This function takes a given start page (page aligned user virtual
+ * address) and pins it and the following specified number of pages. For
+ * now, num_pages is always 1, but that will probably change at some point
+ * (because caller is doing expected sends on a single virtually contiguous
+ * buffer, so we can do all pages at once).
+ */
+int qib_get_user_pages(unsigned long start_page, size_t num_pages,
+ struct page **p)
+{
+ int ret;
+
+ down_write(¤t->mm->mmap_sem);
+
+ ret = __get_user_pages(start_page, num_pages, p, NULL);
+
+ up_write(¤t->mm->mmap_sem);
+
+ return ret;
+}
+
+void qib_release_user_pages(struct page **p, size_t num_pages)
+{
+ if (current->mm) /* during close after signal, mm can be NULL */
+ down_write(¤t->mm->mmap_sem);
+
+ __qib_release_user_pages(p, num_pages, 1);
+
+ if (current->mm) {
+ current->mm->locked_vm -= num_pages;
+ up_write(¤t->mm->mmap_sem);
+ }
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 43/52] IB/qib: Add qib_user_sdma.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_user_sdma.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_user_sdma.c | 897 +++++++++++++++++++++++++++++
1 files changed, 897 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_user_sdma.c
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
new file mode 100644
index 0000000..4c19e06
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -0,0 +1,897 @@
+/*
+ * Copyright (c) 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/mm.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/uio.h>
+#include <linux/rbtree.h>
+#include <linux/spinlock.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_user_sdma.h"
+
+/* minimum size of header */
+#define QIB_USER_SDMA_MIN_HEADER_LENGTH 64
+/* expected size of headers (for dma_pool) */
+#define QIB_USER_SDMA_EXP_HEADER_LENGTH 64
+/* attempt to drain the queue for 5secs */
+#define QIB_USER_SDMA_DRAIN_TIMEOUT 500
+
+struct qib_user_sdma_pkt {
+ u8 naddr; /* dimension of addr (1..3) ... */
+ u32 counter; /* sdma pkts queued counter for this entry */
+ u64 added; /* global descq number of entries */
+
+ struct {
+ u32 offset; /* offset for kvaddr, addr */
+ u32 length; /* length in page */
+ u8 put_page; /* should we put_page? */
+ u8 dma_mapped; /* is page dma_mapped? */
+ struct page *page; /* may be NULL (coherent mem) */
+ void *kvaddr; /* FIXME: only for pio hack */
+ dma_addr_t addr;
+ } addr[4]; /* max pages, any more and we coalesce */
+ struct list_head list; /* list element */
+};
+
+struct qib_user_sdma_queue {
+ /*
+ * pkts sent to dma engine are queued on this
+ * list head. the type of the elements of this
+ * list are struct qib_user_sdma_pkt...
+ */
+ struct list_head sent;
+
+ /* headers with expected length are allocated from here... */
+ char header_cache_name[64];
+ struct dma_pool *header_cache;
+
+ /* packets are allocated from the slab cache... */
+ char pkt_slab_name[64];
+ struct kmem_cache *pkt_slab;
+
+ /* as packets go on the queued queue, they are counted... */
+ u32 counter;
+ u32 sent_counter;
+
+ /* dma page table */
+ struct rb_root dma_pages_root;
+
+ /* protect everything above... */
+ struct mutex lock;
+};
+
+struct qib_user_sdma_queue *
+qib_user_sdma_queue_create(struct device *dev, int unit, int ctxt, int sctxt)
+{
+ struct qib_user_sdma_queue *pq =
+ kmalloc(sizeof(struct qib_user_sdma_queue), GFP_KERNEL);
+
+ if (!pq)
+ goto done;
+
+ pq->counter = 0;
+ pq->sent_counter = 0;
+ INIT_LIST_HEAD(&pq->sent);
+
+ mutex_init(&pq->lock);
+
+ snprintf(pq->pkt_slab_name, sizeof(pq->pkt_slab_name),
+ "qib-user-sdma-pkts-%u-%02u.%02u", unit, ctxt, sctxt);
+ pq->pkt_slab = kmem_cache_create(pq->pkt_slab_name,
+ sizeof(struct qib_user_sdma_pkt),
+ 0, 0, NULL);
+
+ if (!pq->pkt_slab)
+ goto err_kfree;
+
+ snprintf(pq->header_cache_name, sizeof(pq->header_cache_name),
+ "qib-user-sdma-headers-%u-%02u.%02u", unit, ctxt, sctxt);
+ pq->header_cache = dma_pool_create(pq->header_cache_name,
+ dev,
+ QIB_USER_SDMA_EXP_HEADER_LENGTH,
+ 4, 0);
+ if (!pq->header_cache)
+ goto err_slab;
+
+ pq->dma_pages_root = RB_ROOT;
+
+ goto done;
+
+err_slab:
+ kmem_cache_destroy(pq->pkt_slab);
+err_kfree:
+ kfree(pq);
+ pq = NULL;
+
+done:
+ return pq;
+}
+
+static void qib_user_sdma_init_frag(struct qib_user_sdma_pkt *pkt,
+ int i, size_t offset, size_t len,
+ int put_page, int dma_mapped,
+ struct page *page,
+ void *kvaddr, dma_addr_t dma_addr)
+{
+ pkt->addr[i].offset = offset;
+ pkt->addr[i].length = len;
+ pkt->addr[i].put_page = put_page;
+ pkt->addr[i].dma_mapped = dma_mapped;
+ pkt->addr[i].page = page;
+ pkt->addr[i].kvaddr = kvaddr;
+ pkt->addr[i].addr = dma_addr;
+}
+
+static void qib_user_sdma_init_header(struct qib_user_sdma_pkt *pkt,
+ u32 counter, size_t offset,
+ size_t len, int dma_mapped,
+ struct page *page,
+ void *kvaddr, dma_addr_t dma_addr)
+{
+ pkt->naddr = 1;
+ pkt->counter = counter;
+ qib_user_sdma_init_frag(pkt, 0, offset, len, 0, dma_mapped, page,
+ kvaddr, dma_addr);
+}
+
+/* we've too many pages in the iovec, coalesce to a single page */
+static int qib_user_sdma_coalesce(const struct qib_devdata *dd,
+ struct qib_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov)
+{
+ int ret = 0;
+ struct page *page = alloc_page(GFP_KERNEL);
+ void *mpage_save;
+ char *mpage;
+ int i;
+ int len = 0;
+ dma_addr_t dma_addr;
+
+ if (!page) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ mpage = kmap(page);
+ mpage_save = mpage;
+ for (i = 0; i < niov; i++) {
+ int cfur;
+
+ cfur = copy_from_user(mpage,
+ iov[i].iov_base, iov[i].iov_len);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_unmap;
+ }
+
+ mpage += iov[i].iov_len;
+ len += iov[i].iov_len;
+ }
+
+ dma_addr = dma_map_page(&dd->pcidev->dev, page, 0, len,
+ DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto free_unmap;
+ }
+
+ qib_user_sdma_init_frag(pkt, 1, 0, len, 0, 1, page, mpage_save,
+ dma_addr);
+ pkt->naddr = 2;
+
+ goto done;
+
+free_unmap:
+ kunmap(page);
+ __free_page(page);
+done:
+ return ret;
+}
+
+/*
+ * How many pages in this iovec element?
+ */
+static int qib_user_sdma_num_pages(const struct iovec *iov)
+{
+ const unsigned long addr = (unsigned long) iov->iov_base;
+ const unsigned long len = iov->iov_len;
+ const unsigned long spage = addr & PAGE_MASK;
+ const unsigned long epage = (addr + len - 1) & PAGE_MASK;
+
+ return 1 + ((epage - spage) >> PAGE_SHIFT);
+}
+
+/*
+ * Truncate length to page boundry.
+ */
+static int qib_user_sdma_page_length(unsigned long addr, unsigned long len)
+{
+ const unsigned long offset = addr & ~PAGE_MASK;
+
+ return ((offset + len) > PAGE_SIZE) ? (PAGE_SIZE - offset) : len;
+}
+
+static void qib_user_sdma_free_pkt_frag(struct device *dev,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ int frag)
+{
+ const int i = frag;
+
+ if (pkt->addr[i].page) {
+ if (pkt->addr[i].dma_mapped)
+ dma_unmap_page(dev,
+ pkt->addr[i].addr,
+ pkt->addr[i].length,
+ DMA_TO_DEVICE);
+
+ if (pkt->addr[i].kvaddr)
+ kunmap(pkt->addr[i].page);
+
+ if (pkt->addr[i].put_page)
+ put_page(pkt->addr[i].page);
+ else
+ __free_page(pkt->addr[i].page);
+ } else if (pkt->addr[i].kvaddr)
+ /* free coherent mem from cache... */
+ dma_pool_free(pq->header_cache,
+ pkt->addr[i].kvaddr, pkt->addr[i].addr);
+}
+
+/* return number of pages pinned... */
+static int qib_user_sdma_pin_pages(const struct qib_devdata *dd,
+ struct qib_user_sdma_pkt *pkt,
+ unsigned long addr, int tlen, int npages)
+{
+ struct page *pages[2];
+ int j;
+ int ret;
+
+ ret = get_user_pages(current, current->mm, addr,
+ npages, 0, 1, pages, NULL);
+
+ if (ret != npages) {
+ int i;
+
+ for (i = 0; i < ret; i++)
+ put_page(pages[i]);
+
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ for (j = 0; j < npages; j++) {
+ /* map the pages... */
+ const int flen = qib_user_sdma_page_length(addr, tlen);
+ dma_addr_t dma_addr =
+ dma_map_page(&dd->pcidev->dev,
+ pages[j], 0, flen, DMA_TO_DEVICE);
+ unsigned long fofs = addr & ~PAGE_MASK;
+
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ qib_user_sdma_init_frag(pkt, pkt->naddr, fofs, flen, 1, 1,
+ pages[j], kmap(pages[j]), dma_addr);
+
+ pkt->naddr++;
+ addr += flen;
+ tlen -= flen;
+ }
+
+done:
+ return ret;
+}
+
+static int qib_user_sdma_pin_pkt(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov)
+{
+ int ret = 0;
+ unsigned long idx;
+
+ for (idx = 0; idx < niov; idx++) {
+ const int npages = qib_user_sdma_num_pages(iov + idx);
+ const unsigned long addr = (unsigned long) iov[idx].iov_base;
+
+ ret = qib_user_sdma_pin_pages(dd, pkt, addr,
+ iov[idx].iov_len, npages);
+ if (ret < 0)
+ goto free_pkt;
+ }
+
+ goto done;
+
+free_pkt:
+ for (idx = 0; idx < pkt->naddr; idx++)
+ qib_user_sdma_free_pkt_frag(&dd->pcidev->dev, pq, pkt, idx);
+
+done:
+ return ret;
+}
+
+static int qib_user_sdma_init_payload(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct qib_user_sdma_pkt *pkt,
+ const struct iovec *iov,
+ unsigned long niov, int npages)
+{
+ int ret = 0;
+
+ if (npages >= ARRAY_SIZE(pkt->addr))
+ ret = qib_user_sdma_coalesce(dd, pkt, iov, niov);
+ else
+ ret = qib_user_sdma_pin_pkt(dd, pq, pkt, iov, niov);
+
+ return ret;
+}
+
+/* free a packet list -- return counter value of last packet */
+static void qib_user_sdma_free_pkt_list(struct device *dev,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *list)
+{
+ struct qib_user_sdma_pkt *pkt, *pkt_next;
+
+ list_for_each_entry_safe(pkt, pkt_next, list, list) {
+ int i;
+
+ for (i = 0; i < pkt->naddr; i++)
+ qib_user_sdma_free_pkt_frag(dev, pq, pkt, i);
+
+ kmem_cache_free(pq->pkt_slab, pkt);
+ }
+}
+
+/*
+ * copy headers, coalesce etc -- pq->lock must be held
+ *
+ * we queue all the packets to list, returning the
+ * number of bytes total. list must be empty initially,
+ * as, if there is an error we clean it...
+ */
+static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *list,
+ const struct iovec *iov,
+ unsigned long niov,
+ int maxpkts)
+{
+ unsigned long idx = 0;
+ int ret = 0;
+ int npkts = 0;
+ struct page *page = NULL;
+ __le32 *pbc;
+ dma_addr_t dma_addr;
+ struct qib_user_sdma_pkt *pkt = NULL;
+ size_t len;
+ size_t nw;
+ u32 counter = pq->counter;
+ int dma_mapped = 0;
+
+ while (idx < niov && npkts < maxpkts) {
+ const unsigned long addr = (unsigned long) iov[idx].iov_base;
+ const unsigned long idx_save = idx;
+ unsigned pktnw;
+ unsigned pktnwc;
+ int nfrags = 0;
+ int npages = 0;
+ int cfur;
+
+ dma_mapped = 0;
+ len = iov[idx].iov_len;
+ nw = len >> 2;
+ page = NULL;
+
+ pkt = kmem_cache_alloc(pq->pkt_slab, GFP_KERNEL);
+ if (!pkt) {
+ ret = -ENOMEM;
+ goto free_list;
+ }
+
+ if (len < QIB_USER_SDMA_MIN_HEADER_LENGTH ||
+ len > PAGE_SIZE || len & 3 || addr & 3) {
+ ret = -EINVAL;
+ goto free_pkt;
+ }
+
+ if (len == QIB_USER_SDMA_EXP_HEADER_LENGTH)
+ pbc = dma_pool_alloc(pq->header_cache, GFP_KERNEL,
+ &dma_addr);
+ else
+ pbc = NULL;
+
+ if (!pbc) {
+ page = alloc_page(GFP_KERNEL);
+ if (!page) {
+ ret = -ENOMEM;
+ goto free_pkt;
+ }
+ pbc = kmap(page);
+ }
+
+ cfur = copy_from_user(pbc, iov[idx].iov_base, len);
+ if (cfur) {
+ ret = -EFAULT;
+ goto free_pbc;
+ }
+
+ /*
+ * This assignment is a bit strange. it's because the
+ * the pbc counts the number of 32 bit words in the full
+ * packet _except_ the first word of the pbc itself...
+ */
+ pktnwc = nw - 1;
+
+ /*
+ * pktnw computation yields the number of 32 bit words
+ * that the caller has indicated in the PBC. note that
+ * this is one less than the total number of words that
+ * goes to the send DMA engine as the first 32 bit word
+ * of the PBC itself is not counted. Armed with this count,
+ * we can verify that the packet is consistent with the
+ * iovec lengths.
+ */
+ pktnw = le32_to_cpu(*pbc) & QIB_PBC_LENGTH_MASK;
+ if (pktnw < pktnwc || pktnw > pktnwc + (PAGE_SIZE >> 2)) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ idx++;
+ while (pktnwc < pktnw && idx < niov) {
+ const size_t slen = iov[idx].iov_len;
+ const unsigned long faddr =
+ (unsigned long) iov[idx].iov_base;
+
+ if (slen & 3 || faddr & 3 || !slen ||
+ slen > PAGE_SIZE) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ npages++;
+ if ((faddr & PAGE_MASK) !=
+ ((faddr + slen - 1) & PAGE_MASK))
+ npages++;
+
+ pktnwc += slen >> 2;
+ idx++;
+ nfrags++;
+ }
+
+ if (pktnwc != pktnw) {
+ ret = -EINVAL;
+ goto free_pbc;
+ }
+
+ if (page) {
+ dma_addr = dma_map_page(&dd->pcidev->dev,
+ page, 0, len, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, dma_addr)) {
+ ret = -ENOMEM;
+ goto free_pbc;
+ }
+
+ dma_mapped = 1;
+ }
+
+ qib_user_sdma_init_header(pkt, counter, 0, len, dma_mapped,
+ page, pbc, dma_addr);
+
+ if (nfrags) {
+ ret = qib_user_sdma_init_payload(dd, pq, pkt,
+ iov + idx_save + 1,
+ nfrags, npages);
+ if (ret < 0)
+ goto free_pbc_dma;
+ }
+
+ counter++;
+ npkts++;
+
+ list_add_tail(&pkt->list, list);
+ }
+
+ ret = idx;
+ goto done;
+
+free_pbc_dma:
+ if (dma_mapped)
+ dma_unmap_page(&dd->pcidev->dev, dma_addr, len, DMA_TO_DEVICE);
+free_pbc:
+ if (page) {
+ kunmap(page);
+ __free_page(page);
+ } else
+ dma_pool_free(pq->header_cache, pbc, dma_addr);
+free_pkt:
+ kmem_cache_free(pq->pkt_slab, pkt);
+free_list:
+ qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, list);
+done:
+ return ret;
+}
+
+static void qib_user_sdma_set_complete_counter(struct qib_user_sdma_queue *pq,
+ u32 c)
+{
+ pq->sent_counter = c;
+}
+
+/* try to clean out queue -- needs pq->lock */
+static int qib_user_sdma_queue_clean(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq)
+{
+ struct qib_devdata *dd = ppd->dd;
+ struct list_head free_list;
+ struct qib_user_sdma_pkt *pkt;
+ struct qib_user_sdma_pkt *pkt_prev;
+ int ret = 0;
+
+ INIT_LIST_HEAD(&free_list);
+
+ list_for_each_entry_safe(pkt, pkt_prev, &pq->sent, list) {
+ s64 descd = ppd->sdma_descq_removed - pkt->added;
+
+ if (descd < 0)
+ break;
+
+ list_move_tail(&pkt->list, &free_list);
+
+ /* one more packet cleaned */
+ ret++;
+ }
+
+ if (!list_empty(&free_list)) {
+ u32 counter;
+
+ pkt = list_entry(free_list.prev,
+ struct qib_user_sdma_pkt, list);
+ counter = pkt->counter;
+
+ qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+ qib_user_sdma_set_complete_counter(pq, counter);
+ }
+
+ return ret;
+}
+
+void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq)
+{
+ if (!pq)
+ return;
+
+ kmem_cache_destroy(pq->pkt_slab);
+ dma_pool_destroy(pq->header_cache);
+ kfree(pq);
+}
+
+/* clean descriptor queue, returns > 0 if some elements cleaned */
+static int qib_user_sdma_hwqueue_clean(struct qib_pportdata *ppd)
+{
+ int ret;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+ ret = qib_sdma_make_progress(ppd);
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ return ret;
+}
+
+/* we're in close, drain packets so that we can cleanup successfully... */
+void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int i;
+
+ if (!pq)
+ return;
+
+ for (i = 0; i < QIB_USER_SDMA_DRAIN_TIMEOUT; i++) {
+ mutex_lock(&pq->lock);
+ if (list_empty(&pq->sent)) {
+ mutex_unlock(&pq->lock);
+ break;
+ }
+ qib_user_sdma_hwqueue_clean(ppd);
+ qib_user_sdma_queue_clean(ppd, pq);
+ mutex_unlock(&pq->lock);
+ msleep(10);
+ }
+
+ if (!list_empty(&pq->sent)) {
+ struct list_head free_list;
+
+ qib_dev_err(dd, "user sdma lists not empty: forcing!\n");
+ INIT_LIST_HEAD(&free_list);
+ mutex_lock(&pq->lock);
+ list_splice_init(&pq->sent, &free_list);
+ qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &free_list);
+ mutex_unlock(&pq->lock);
+ }
+}
+
+static inline __le64 qib_sdma_make_desc0(struct qib_pportdata *ppd,
+ u64 addr, u64 dwlen, u64 dwoffset)
+{
+ u8 tmpgen;
+
+ tmpgen = ppd->sdma_generation;
+
+ return cpu_to_le64(/* SDmaPhyAddr[31:0] */
+ ((addr & 0xfffffffcULL) << 32) |
+ /* SDmaGeneration[1:0] */
+ ((tmpgen & 3ULL) << 30) |
+ /* SDmaDwordCount[10:0] */
+ ((dwlen & 0x7ffULL) << 16) |
+ /* SDmaBufOffset[12:2] */
+ (dwoffset & 0x7ffULL));
+}
+
+static inline __le64 qib_sdma_make_first_desc0(__le64 descq)
+{
+ return descq | cpu_to_le64(1ULL << 12);
+}
+
+static inline __le64 qib_sdma_make_last_desc0(__le64 descq)
+{
+ /* last */ /* dma head */
+ return descq | cpu_to_le64(1ULL << 11 | 1ULL << 13);
+}
+
+static inline __le64 qib_sdma_make_desc1(u64 addr)
+{
+ /* SDmaPhyAddr[47:32] */
+ return cpu_to_le64(addr >> 32);
+}
+
+static void qib_user_sdma_send_frag(struct qib_pportdata *ppd,
+ struct qib_user_sdma_pkt *pkt, int idx,
+ unsigned ofs, u16 tail)
+{
+ const u64 addr = (u64) pkt->addr[idx].addr +
+ (u64) pkt->addr[idx].offset;
+ const u64 dwlen = (u64) pkt->addr[idx].length / 4;
+ __le64 *descqp;
+ __le64 descq0;
+
+ descqp = &ppd->sdma_descq[tail].qw[0];
+
+ descq0 = qib_sdma_make_desc0(ppd, addr, dwlen, ofs);
+ if (idx == 0)
+ descq0 = qib_sdma_make_first_desc0(descq0);
+ if (idx == pkt->naddr - 1)
+ descq0 = qib_sdma_make_last_desc0(descq0);
+
+ descqp[0] = descq0;
+ descqp[1] = qib_sdma_make_desc1(addr);
+}
+
+/* pq->lock must be held, get packets on the wire... */
+static int qib_user_sdma_push_pkts(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq,
+ struct list_head *pktlist)
+{
+ struct qib_devdata *dd = ppd->dd;
+ int ret = 0;
+ unsigned long flags;
+ u16 tail;
+ u8 generation;
+ u64 descq_added;
+
+ if (list_empty(pktlist))
+ return 0;
+
+ if (unlikely(!(ppd->lflags & QIBL_LINKACTIVE)))
+ return -ECOMM;
+
+ spin_lock_irqsave(&ppd->sdma_lock, flags);
+
+ /* keep a copy for restoring purposes in case of problems */
+ generation = ppd->sdma_generation;
+ descq_added = ppd->sdma_descq_added;
+
+ if (unlikely(!__qib_sdma_running(ppd))) {
+ ret = -ECOMM;
+ goto unlock;
+ }
+
+ tail = ppd->sdma_descq_tail;
+ while (!list_empty(pktlist)) {
+ struct qib_user_sdma_pkt *pkt =
+ list_entry(pktlist->next, struct qib_user_sdma_pkt,
+ list);
+ int i;
+ unsigned ofs = 0;
+ u16 dtail = tail;
+
+ if (pkt->naddr > qib_sdma_descq_freecnt(ppd))
+ goto unlock_check_tail;
+
+ for (i = 0; i < pkt->naddr; i++) {
+ qib_user_sdma_send_frag(ppd, pkt, i, ofs, tail);
+ ofs += pkt->addr[i].length >> 2;
+
+ if (++tail == ppd->sdma_descq_cnt) {
+ tail = 0;
+ ++ppd->sdma_generation;
+ }
+ }
+
+ if ((ofs << 2) > ppd->ibmaxlen) {
+ ret = -EMSGSIZE;
+ goto unlock;
+ }
+
+ /*
+ * If the packet is >= 2KB mtu equivalent, we have to use
+ * the large buffers, and have to mark each descriptor as
+ * part of a large buffer packet.
+ */
+ if (ofs > dd->piosize2kmax_dwords) {
+ for (i = 0; i < pkt->naddr; i++) {
+ ppd->sdma_descq[dtail].qw[0] |=
+ cpu_to_le64(1ULL << 14);
+ if (++dtail == ppd->sdma_descq_cnt)
+ dtail = 0;
+ }
+ }
+
+ ppd->sdma_descq_added += pkt->naddr;
+ pkt->added = ppd->sdma_descq_added;
+ list_move_tail(&pkt->list, &pq->sent);
+ ret++;
+ }
+
+unlock_check_tail:
+ /* advance the tail on the chip if necessary */
+ if (ppd->sdma_descq_tail != tail)
+ dd->f_sdma_update_tail(ppd, tail);
+
+unlock:
+ if (unlikely(ret < 0)) {
+ ppd->sdma_generation = generation;
+ ppd->sdma_descq_added = descq_added;
+ }
+ spin_unlock_irqrestore(&ppd->sdma_lock, flags);
+
+ return ret;
+}
+
+int qib_user_sdma_writev(struct qib_ctxtdata *rcd,
+ struct qib_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long dim)
+{
+ struct qib_devdata *dd = rcd->dd;
+ struct qib_pportdata *ppd = rcd->ppd;
+ int ret = 0;
+ struct list_head list;
+ int npkts = 0;
+
+ INIT_LIST_HEAD(&list);
+
+ mutex_lock(&pq->lock);
+
+ /* why not -ECOMM like qib_user_sdma_push_pkts() below? */
+ if (!qib_sdma_running(ppd))
+ goto done_unlock;
+
+ if (ppd->sdma_descq_added != ppd->sdma_descq_removed) {
+ qib_user_sdma_hwqueue_clean(ppd);
+ qib_user_sdma_queue_clean(ppd, pq);
+ }
+
+ while (dim) {
+ const int mxp = 8;
+
+ down_write(¤t->mm->mmap_sem);
+ ret = qib_user_sdma_queue_pkts(dd, pq, &list, iov, dim, mxp);
+ up_write(¤t->mm->mmap_sem);
+
+ if (ret <= 0)
+ goto done_unlock;
+ else {
+ dim -= ret;
+ iov += ret;
+ }
+
+ /* force packets onto the sdma hw queue... */
+ if (!list_empty(&list)) {
+ /*
+ * Lazily clean hw queue. the 4 is a guess of about
+ * how many sdma descriptors a packet will take (it
+ * doesn't have to be perfect).
+ */
+ if (qib_sdma_descq_freecnt(ppd) < ret * 4) {
+ qib_user_sdma_hwqueue_clean(ppd);
+ qib_user_sdma_queue_clean(ppd, pq);
+ }
+
+ ret = qib_user_sdma_push_pkts(ppd, pq, &list);
+ if (ret < 0)
+ goto done_unlock;
+ else {
+ npkts += ret;
+ pq->counter += ret;
+
+ if (!list_empty(&list))
+ goto done_unlock;
+ }
+ }
+ }
+
+done_unlock:
+ if (!list_empty(&list))
+ qib_user_sdma_free_pkt_list(&dd->pcidev->dev, pq, &list);
+ mutex_unlock(&pq->lock);
+
+ return (ret < 0) ? ret : npkts;
+}
+
+int qib_user_sdma_make_progress(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq)
+{
+ int ret = 0;
+
+ mutex_lock(&pq->lock);
+ qib_user_sdma_hwqueue_clean(ppd);
+ ret = qib_user_sdma_queue_clean(ppd, pq);
+ mutex_unlock(&pq->lock);
+
+ return ret;
+}
+
+u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq)
+{
+ return pq ? pq->sent_counter : 0;
+}
+
+u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq)
+{
+ return pq ? pq->counter : 0;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 44/52] IB/qib: Add qib_user_sdma.h
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_user_sdma.h file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_user_sdma.h | 52 +++++++++++++++++++++++++++++
1 files changed, 52 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_user_sdma.h
diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.h b/drivers/infiniband/hw/qib/qib_user_sdma.h
new file mode 100644
index 0000000..ce8cbaf
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include <linux/device.h>
+
+struct qib_user_sdma_queue;
+
+struct qib_user_sdma_queue *
+qib_user_sdma_queue_create(struct device *dev, int unit, int port, int sport);
+void qib_user_sdma_queue_destroy(struct qib_user_sdma_queue *pq);
+
+int qib_user_sdma_writev(struct qib_ctxtdata *pd,
+ struct qib_user_sdma_queue *pq,
+ const struct iovec *iov,
+ unsigned long dim);
+
+int qib_user_sdma_make_progress(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq);
+
+void qib_user_sdma_queue_drain(struct qib_pportdata *ppd,
+ struct qib_user_sdma_queue *pq);
+
+u32 qib_user_sdma_complete_counter(const struct qib_user_sdma_queue *pq);
+u32 qib_user_sdma_inflight_counter(struct qib_user_sdma_queue *pq);
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 45/52] IB/qib: Add qib_verbs.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_verbs.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_verbs.c | 2248 +++++++++++++++++++++++++++++++++
1 files changed, 2248 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_verbs.c
diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c
new file mode 100644
index 0000000..cda8f41
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs.c
@@ -0,0 +1,2248 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_mad.h>
+#include <rdma/ib_user_verbs.h>
+#include <linux/io.h>
+#include <linux/utsname.h>
+#include <linux/rculist.h>
+#include <linux/mm.h>
+
+#include "qib.h"
+#include "qib_common.h"
+
+static unsigned int ib_qib_qp_table_size = 251;
+module_param_named(qp_table_size, ib_qib_qp_table_size, uint, S_IRUGO);
+MODULE_PARM_DESC(qp_table_size, "QP table size");
+
+unsigned int ib_qib_lkey_table_size = 16;
+module_param_named(lkey_table_size, ib_qib_lkey_table_size, uint,
+ S_IRUGO);
+MODULE_PARM_DESC(lkey_table_size,
+ "LKEY table size in bits (2^n, 1 <= n <= 23)");
+
+static unsigned int ib_qib_max_pds = 0xFFFF;
+module_param_named(max_pds, ib_qib_max_pds, uint, S_IRUGO);
+MODULE_PARM_DESC(max_pds,
+ "Maximum number of protection domains to support");
+
+static unsigned int ib_qib_max_ahs = 0xFFFF;
+module_param_named(max_ahs, ib_qib_max_ahs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_ahs, "Maximum number of address handles to support");
+
+unsigned int ib_qib_max_cqes = 0x2FFFF;
+module_param_named(max_cqes, ib_qib_max_cqes, uint, S_IRUGO);
+MODULE_PARM_DESC(max_cqes,
+ "Maximum number of completion queue entries to support");
+
+unsigned int ib_qib_max_cqs = 0x1FFFF;
+module_param_named(max_cqs, ib_qib_max_cqs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_cqs, "Maximum number of completion queues to support");
+
+unsigned int ib_qib_max_qp_wrs = 0x3FFF;
+module_param_named(max_qp_wrs, ib_qib_max_qp_wrs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_qp_wrs, "Maximum number of QP WRs to support");
+
+unsigned int ib_qib_max_qps = 16384;
+module_param_named(max_qps, ib_qib_max_qps, uint, S_IRUGO);
+MODULE_PARM_DESC(max_qps, "Maximum number of QPs to support");
+
+unsigned int ib_qib_max_sges = 0x60;
+module_param_named(max_sges, ib_qib_max_sges, uint, S_IRUGO);
+MODULE_PARM_DESC(max_sges, "Maximum number of SGEs to support");
+
+unsigned int ib_qib_max_mcast_grps = 16384;
+module_param_named(max_mcast_grps, ib_qib_max_mcast_grps, uint, S_IRUGO);
+MODULE_PARM_DESC(max_mcast_grps,
+ "Maximum number of multicast groups to support");
+
+unsigned int ib_qib_max_mcast_qp_attached = 16;
+module_param_named(max_mcast_qp_attached, ib_qib_max_mcast_qp_attached,
+ uint, S_IRUGO);
+MODULE_PARM_DESC(max_mcast_qp_attached,
+ "Maximum number of attached QPs to support");
+
+unsigned int ib_qib_max_srqs = 1024;
+module_param_named(max_srqs, ib_qib_max_srqs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srqs, "Maximum number of SRQs to support");
+
+unsigned int ib_qib_max_srq_sges = 128;
+module_param_named(max_srq_sges, ib_qib_max_srq_sges, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srq_sges, "Maximum number of SRQ SGEs to support");
+
+unsigned int ib_qib_max_srq_wrs = 0x1FFFF;
+module_param_named(max_srq_wrs, ib_qib_max_srq_wrs, uint, S_IRUGO);
+MODULE_PARM_DESC(max_srq_wrs, "Maximum number of SRQ WRs support");
+
+static unsigned int ib_qib_disable_sma;
+module_param_named(disable_sma, ib_qib_disable_sma, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(disable_sma, "Disable the SMA");
+
+/*
+ * Note that it is OK to post send work requests in the SQE and ERR
+ * states; qib_do_send() will process them and generate error
+ * completions as per IB 1.2 C10-96.
+ */
+const int ib_qib_state_ops[IB_QPS_ERR + 1] = {
+ [IB_QPS_RESET] = 0,
+ [IB_QPS_INIT] = QIB_POST_RECV_OK,
+ [IB_QPS_RTR] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK,
+ [IB_QPS_RTS] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+ QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK |
+ QIB_PROCESS_NEXT_SEND_OK,
+ [IB_QPS_SQD] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+ QIB_POST_SEND_OK | QIB_PROCESS_SEND_OK,
+ [IB_QPS_SQE] = QIB_POST_RECV_OK | QIB_PROCESS_RECV_OK |
+ QIB_POST_SEND_OK | QIB_FLUSH_SEND,
+ [IB_QPS_ERR] = QIB_POST_RECV_OK | QIB_FLUSH_RECV |
+ QIB_POST_SEND_OK | QIB_FLUSH_SEND,
+};
+
+struct qib_ucontext {
+ struct ib_ucontext ibucontext;
+};
+
+static inline struct qib_ucontext *to_iucontext(struct ib_ucontext
+ *ibucontext)
+{
+ return container_of(ibucontext, struct qib_ucontext, ibucontext);
+}
+
+/*
+ * Translate ib_wr_opcode into ib_wc_opcode.
+ */
+const enum ib_wc_opcode ib_qib_wc_opcode[] = {
+ [IB_WR_RDMA_WRITE] = IB_WC_RDMA_WRITE,
+ [IB_WR_RDMA_WRITE_WITH_IMM] = IB_WC_RDMA_WRITE,
+ [IB_WR_SEND] = IB_WC_SEND,
+ [IB_WR_SEND_WITH_IMM] = IB_WC_SEND,
+ [IB_WR_RDMA_READ] = IB_WC_RDMA_READ,
+ [IB_WR_ATOMIC_CMP_AND_SWP] = IB_WC_COMP_SWAP,
+ [IB_WR_ATOMIC_FETCH_AND_ADD] = IB_WC_FETCH_ADD
+};
+
+/*
+ * System image GUID.
+ */
+__be64 ib_qib_sys_image_guid;
+
+/**
+ * qib_copy_sge - copy data to SGE memory
+ * @ss: the SGE state
+ * @data: the data to copy
+ * @length: the length of the data
+ */
+void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length, int release)
+{
+ struct qib_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ memcpy(sge->vaddr, data, len);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (release)
+ atomic_dec(&sge->mr->refcount);
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ data += len;
+ length -= len;
+ }
+}
+
+/**
+ * qib_skip_sge - skip over SGE memory - XXX almost dup of prev func
+ * @ss: the SGE state
+ * @length: the number of bytes to skip
+ */
+void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release)
+{
+ struct qib_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (release)
+ atomic_dec(&sge->mr->refcount);
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ length -= len;
+ }
+}
+
+/*
+ * Count the number of DMA descriptors needed to send length bytes of data.
+ * Don't modify the qib_sge_state to get the count.
+ * Return zero if any of the segments is not aligned.
+ */
+static u32 qib_count_sge(struct qib_sge_state *ss, u32 length)
+{
+ struct qib_sge *sg_list = ss->sg_list;
+ struct qib_sge sge = ss->sge;
+ u8 num_sge = ss->num_sge;
+ u32 ndesc = 1; /* count the header */
+
+ while (length) {
+ u32 len = sge.length;
+
+ if (len > length)
+ len = length;
+ if (len > sge.sge_length)
+ len = sge.sge_length;
+ BUG_ON(len == 0);
+ if (((long) sge.vaddr & (sizeof(u32) - 1)) ||
+ (len != length && (len & (sizeof(u32) - 1)))) {
+ ndesc = 0;
+ break;
+ }
+ ndesc++;
+ sge.vaddr += len;
+ sge.length -= len;
+ sge.sge_length -= len;
+ if (sge.sge_length == 0) {
+ if (--num_sge)
+ sge = *sg_list++;
+ } else if (sge.length == 0 && sge.mr->lkey) {
+ if (++sge.n >= QIB_SEGSZ) {
+ if (++sge.m >= sge.mr->mapsz)
+ break;
+ sge.n = 0;
+ }
+ sge.vaddr =
+ sge.mr->map[sge.m]->segs[sge.n].vaddr;
+ sge.length =
+ sge.mr->map[sge.m]->segs[sge.n].length;
+ }
+ length -= len;
+ }
+ return ndesc;
+}
+
+/*
+ * Copy from the SGEs to the data buffer.
+ */
+static void qib_copy_from_sge(void *data, struct qib_sge_state *ss, u32 length)
+{
+ struct qib_sge *sge = &ss->sge;
+
+ while (length) {
+ u32 len = sge->length;
+
+ if (len > length)
+ len = length;
+ if (len > sge->sge_length)
+ len = sge->sge_length;
+ BUG_ON(len == 0);
+ memcpy(data, sge->vaddr, len);
+ sge->vaddr += len;
+ sge->length -= len;
+ sge->sge_length -= len;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ break;
+ sge->n = 0;
+ }
+ sge->vaddr =
+ sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length =
+ sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+ data += len;
+ length -= len;
+ }
+}
+
+/**
+ * qib_post_one_send - post one RC, UC, or UD send work request
+ * @qp: the QP to post on
+ * @wr: the work request to send
+ */
+static int qib_post_one_send(struct qib_qp *qp, struct ib_send_wr *wr)
+{
+ struct qib_swqe *wqe;
+ u32 next;
+ int i;
+ int j;
+ int acc;
+ int ret;
+ unsigned long flags;
+ struct qib_lkey_table *rkt;
+ struct qib_pd *pd;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+
+ /* Check that state is OK to post send. */
+ if (unlikely(!(ib_qib_state_ops[qp->state] & QIB_POST_SEND_OK)))
+ goto bail_inval;
+
+ /* IB spec says that num_sge == 0 is OK. */
+ if (wr->num_sge > qp->s_max_sge)
+ goto bail_inval;
+
+ /*
+ * Don't allow RDMA reads or atomic operations on UC or
+ * undefined operations.
+ * Make sure buffer is large enough to hold the result for atomics.
+ */
+ if (wr->opcode == IB_WR_FAST_REG_MR) {
+ if (qib_fast_reg_mr(qp, wr))
+ goto bail_inval;
+ } else if (qp->ibqp.qp_type == IB_QPT_UC) {
+ if ((unsigned) wr->opcode >= IB_WR_RDMA_READ)
+ goto bail_inval;
+ } else if (qp->ibqp.qp_type != IB_QPT_RC) {
+ /* Check IB_QPT_SMI, IB_QPT_GSI, IB_QPT_UD opcode */
+ if (wr->opcode != IB_WR_SEND &&
+ wr->opcode != IB_WR_SEND_WITH_IMM)
+ goto bail_inval;
+ /* Check UD destination address PD */
+ if (qp->ibqp.pd != wr->wr.ud.ah->pd)
+ goto bail_inval;
+ } else if ((unsigned) wr->opcode > IB_WR_ATOMIC_FETCH_AND_ADD)
+ goto bail_inval;
+ else if (wr->opcode >= IB_WR_ATOMIC_CMP_AND_SWP &&
+ (wr->num_sge == 0 ||
+ wr->sg_list[0].length < sizeof(u64) ||
+ wr->sg_list[0].addr & (sizeof(u64) - 1)))
+ goto bail_inval;
+ else if (wr->opcode >= IB_WR_RDMA_READ && !qp->s_max_rd_atomic)
+ goto bail_inval;
+
+ next = qp->s_head + 1;
+ if (next >= qp->s_size)
+ next = 0;
+ if (next == qp->s_last) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ rkt = &to_idev(qp->ibqp.device)->lk_table;
+ pd = to_ipd(qp->ibqp.pd);
+ wqe = get_swqe_ptr(qp, qp->s_head);
+ wqe->wr = *wr;
+ wqe->length = 0;
+ j = 0;
+ if (wr->num_sge) {
+ acc = wr->opcode >= IB_WR_RDMA_READ ?
+ IB_ACCESS_LOCAL_WRITE : 0;
+ for (i = 0; i < wr->num_sge; i++) {
+ u32 length = wr->sg_list[i].length;
+ int ok;
+
+ if (length == 0)
+ continue;
+ ok = qib_lkey_ok(rkt, pd, &wqe->sg_list[j],
+ &wr->sg_list[i], acc);
+ if (!ok)
+ goto bail_inval_free;
+ wqe->length += length;
+ j++;
+ }
+ wqe->wr.num_sge = j;
+ }
+ if (qp->ibqp.qp_type == IB_QPT_UC ||
+ qp->ibqp.qp_type == IB_QPT_RC) {
+ if (wqe->length > 0x80000000U)
+ goto bail_inval_free;
+ } else if (wqe->length > (dd_from_ibdev(qp->ibqp.device)->pport +
+ qp->port_num - 1)->ibmtu)
+ goto bail_inval_free;
+ else
+ atomic_inc(&to_iah(wr->wr.ud.ah)->refcount);
+ wqe->ssn = qp->s_ssn++;
+ qp->s_head = next;
+
+ ret = 0;
+ goto bail;
+
+bail_inval_free:
+ while (j) {
+ struct qib_sge *sge = &wqe->sg_list[--j];
+
+ atomic_dec(&sge->mr->refcount);
+ }
+bail_inval:
+ ret = -EINVAL;
+bail:
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+/**
+ * qib_post_send - post a send on a QP
+ * @ibqp: the QP to post the send on
+ * @wr: the list of work requests to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
+ struct ib_send_wr **bad_wr)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ int err = 0;
+
+ for (; wr; wr = wr->next) {
+ err = qib_post_one_send(qp, wr);
+ if (err) {
+ *bad_wr = wr;
+ goto bail;
+ }
+ }
+
+ /* Try to do the send work in the caller's context. */
+ qib_do_send(&qp->s_work);
+
+bail:
+ return err;
+}
+
+/**
+ * qib_post_receive - post a receive on a QP
+ * @ibqp: the QP to post the receive on
+ * @wr: the WR to post
+ * @bad_wr: the first bad WR is put here
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_post_receive(struct ib_qp *ibqp, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ struct qib_rwq *wq = qp->r_rq.wq;
+ unsigned long flags;
+ int ret;
+
+ /* Check that state is OK to post receive. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_POST_RECV_OK) || !wq) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ for (; wr; wr = wr->next) {
+ struct qib_rwqe *wqe;
+ u32 next;
+ int i;
+
+ if ((unsigned) wr->num_sge > qp->r_rq.max_sge) {
+ *bad_wr = wr;
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irqsave(&qp->r_rq.lock, flags);
+ next = wq->head + 1;
+ if (next >= qp->r_rq.size)
+ next = 0;
+ if (next == wq->tail) {
+ spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ *bad_wr = wr;
+ ret = -ENOMEM;
+ goto bail;
+ }
+
+ wqe = get_rwqe_ptr(&qp->r_rq, wq->head);
+ wqe->wr_id = wr->wr_id;
+ wqe->num_sge = wr->num_sge;
+ for (i = 0; i < wr->num_sge; i++)
+ wqe->sg_list[i] = wr->sg_list[i];
+ /* Make sure queue entry is written before the head index. */
+ smp_wmb();
+ wq->head = next;
+ spin_unlock_irqrestore(&qp->r_rq.lock, flags);
+ }
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_qp_rcv - processing an incoming packet on a QP
+ * @rcd: the context pointer
+ * @hdr: the packet header
+ * @has_grh: true if the packet has a GRH
+ * @data: the packet data
+ * @tlen: the packet length
+ * @qp: the QP the packet came on
+ *
+ * This is called from qib_ib_rcv() to process an incoming packet
+ * for the given QP.
+ * Called at interrupt level.
+ */
+static void qib_qp_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp)
+{
+ struct qib_ibport *ibp = &rcd->ppd->ibport_data;
+
+ /* Check for valid receive state. */
+ if (!(ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK)) {
+ ibp->n_pkt_drops++;
+ return;
+ }
+
+ switch (qp->ibqp.qp_type) {
+ case IB_QPT_SMI:
+ case IB_QPT_GSI:
+ if (ib_qib_disable_sma)
+ break;
+ /* FALLTHROUGH */
+ case IB_QPT_UD:
+ qib_ud_rcv(ibp, hdr, has_grh, data, tlen, qp);
+ break;
+
+ case IB_QPT_RC:
+ qib_rc_rcv(rcd, hdr, has_grh, data, tlen, qp);
+ break;
+
+ case IB_QPT_UC:
+ qib_uc_rcv(ibp, hdr, has_grh, data, tlen, qp);
+ break;
+
+ default:
+ break;
+ }
+}
+
+/**
+ * qib_ib_rcv - process an incoming packet
+ * @rcd: the context pointer
+ * @rhdr: the header of the packet
+ * @data: the packet payload
+ * @tlen: the packet length
+ *
+ * This is called from qib_kreceive() to process an incoming packet at
+ * interrupt level. Tlen is the length of the header + data + CRC in bytes.
+ */
+void qib_ib_rcv(struct qib_ctxtdata *rcd, void *rhdr, void *data, u32 tlen)
+{
+ struct qib_pportdata *ppd = rcd->ppd;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+ struct qib_ib_header *hdr = rhdr;
+ struct qib_other_headers *ohdr;
+ struct qib_qp *qp;
+ u32 qp_num;
+ int lnh;
+ u8 opcode;
+ u16 lid;
+
+ /* 24 == LRH+BTH+CRC */
+ if (unlikely(tlen < 24))
+ goto drop;
+
+ /* Check for a valid destination LID (see ch. 7.11.1). */
+ lid = be16_to_cpu(hdr->lrh[1]);
+ if (lid < QIB_MULTICAST_LID_BASE) {
+ lid &= ~((1 << ppd->lmc) - 1);
+ if (unlikely(lid != ppd->lid))
+ goto drop;
+ }
+
+ /* Check for GRH */
+ lnh = be16_to_cpu(hdr->lrh[0]) & 3;
+ if (lnh == QIB_LRH_BTH)
+ ohdr = &hdr->u.oth;
+ else if (lnh == QIB_LRH_GRH) {
+ u32 vtf;
+
+ ohdr = &hdr->u.l.oth;
+ if (hdr->u.l.grh.next_hdr != IB_GRH_NEXT_HDR)
+ goto drop;
+ vtf = be32_to_cpu(hdr->u.l.grh.version_tclass_flow);
+ if ((vtf >> IB_GRH_VERSION_SHIFT) != IB_GRH_VERSION)
+ goto drop;
+ } else
+ goto drop;
+
+ opcode = be32_to_cpu(ohdr->bth[0]) >> 24;
+ ibp->opstats[opcode & 0x7f].n_bytes += tlen;
+ ibp->opstats[opcode & 0x7f].n_packets++;
+
+ /* Get the destination QP number. */
+ qp_num = be32_to_cpu(ohdr->bth[1]) & QIB_QPN_MASK;
+ if (qp_num == QIB_MULTICAST_QPN) {
+ struct qib_mcast *mcast;
+ struct qib_mcast_qp *p;
+
+ if (lnh != QIB_LRH_GRH)
+ goto drop;
+ mcast = qib_mcast_find(ibp, &hdr->u.l.grh.dgid);
+ if (mcast == NULL)
+ goto drop;
+ ibp->n_multicast_rcv++;
+ list_for_each_entry_rcu(p, &mcast->qp_list, list)
+ qib_qp_rcv(rcd, hdr, 1, data, tlen, p->qp);
+ /*
+ * Notify qib_multicast_detach() if it is waiting for us
+ * to finish.
+ */
+ if (atomic_dec_return(&mcast->refcount) <= 1)
+ wake_up(&mcast->wait);
+ } else {
+ qp = qib_lookup_qpn(ibp, qp_num);
+ if (!qp)
+ goto drop;
+ ibp->n_unicast_rcv++;
+ qib_qp_rcv(rcd, hdr, lnh == QIB_LRH_GRH, data, tlen, qp);
+ /*
+ * Notify qib_destroy_qp() if it is waiting
+ * for us to finish.
+ */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+ return;
+
+drop:
+ ibp->n_pkt_drops++;
+}
+
+/*
+ * This is called from a timer to check for QPs
+ * which need kernel memory in order to send a packet.
+ */
+static void mem_timer(unsigned long data)
+{
+ struct qib_ibdev *dev = (struct qib_ibdev *) data;
+ struct list_head *list = &dev->memwait;
+ struct qib_qp *qp = NULL;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ if (!list_empty(list)) {
+ qp = list_entry(list->next, struct qib_qp, iowait);
+ list_del_init(&qp->iowait);
+ atomic_inc(&qp->refcount);
+ if (!list_empty(list))
+ mod_timer(&dev->mem_timer, jiffies + 1);
+ }
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ if (qp) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_WAIT_KMEM) {
+ qp->s_flags &= ~QIB_S_WAIT_KMEM;
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+}
+
+static void update_sge(struct qib_sge_state *ss, u32 length)
+{
+ struct qib_sge *sge = &ss->sge;
+
+ sge->vaddr += length;
+ sge->length -= length;
+ sge->sge_length -= length;
+ if (sge->sge_length == 0) {
+ if (--ss->num_sge)
+ *sge = *ss->sg_list++;
+ } else if (sge->length == 0 && sge->mr->lkey) {
+ if (++sge->n >= QIB_SEGSZ) {
+ if (++sge->m >= sge->mr->mapsz)
+ return;
+ sge->n = 0;
+ }
+ sge->vaddr = sge->mr->map[sge->m]->segs[sge->n].vaddr;
+ sge->length = sge->mr->map[sge->m]->segs[sge->n].length;
+ }
+}
+
+#ifdef __LITTLE_ENDIAN
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data <<= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data >>= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#else
+static inline u32 get_upper_bits(u32 data, u32 shift)
+{
+ return data << shift;
+}
+
+static inline u32 set_upper_bits(u32 data, u32 shift)
+{
+ return data >> shift;
+}
+
+static inline u32 clear_upper_bytes(u32 data, u32 n, u32 off)
+{
+ data >>= ((sizeof(u32) - n) * BITS_PER_BYTE);
+ data <<= ((sizeof(u32) - n - off) * BITS_PER_BYTE);
+ return data;
+}
+#endif
+
+static void copy_io(u32 __iomem *piobuf, struct qib_sge_state *ss,
+ u32 length, unsigned flush_wc)
+{
+ u32 extra = 0;
+ u32 data = 0;
+ u32 last;
+
+ while (1) {
+ u32 len = ss->sge.length;
+ u32 off;
+
+ if (len > length)
+ len = length;
+ if (len > ss->sge.sge_length)
+ len = ss->sge.sge_length;
+ BUG_ON(len == 0);
+ /* If the source address is not aligned, try to align it. */
+ off = (unsigned long)ss->sge.vaddr & (sizeof(u32) - 1);
+ if (off) {
+ u32 *addr = (u32 *)((unsigned long)ss->sge.vaddr &
+ ~(sizeof(u32) - 1));
+ u32 v = get_upper_bits(*addr, off * BITS_PER_BYTE);
+ u32 y;
+
+ y = sizeof(u32) - off;
+ if (len > y)
+ len = y;
+ if (len + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, extra *
+ BITS_PER_BYTE);
+ len = sizeof(u32) - extra;
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, len, extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += len;
+ }
+ } else if (extra) {
+ /* Source address is aligned. */
+ u32 *addr = (u32 *) ss->sge.vaddr;
+ int shift = extra * BITS_PER_BYTE;
+ int ushift = 32 - shift;
+ u32 l = len;
+
+ while (l >= sizeof(u32)) {
+ u32 v = *addr;
+
+ data |= set_upper_bits(v, shift);
+ __raw_writel(data, piobuf);
+ data = get_upper_bits(v, ushift);
+ piobuf++;
+ addr++;
+ l -= sizeof(u32);
+ }
+ /*
+ * We still have 'extra' number of bytes leftover.
+ */
+ if (l) {
+ u32 v = *addr;
+
+ if (l + extra >= sizeof(u32)) {
+ data |= set_upper_bits(v, shift);
+ len -= l + extra - sizeof(u32);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ __raw_writel(data, piobuf);
+ piobuf++;
+ extra = 0;
+ data = 0;
+ } else {
+ /* Clear unused upper bytes */
+ data |= clear_upper_bytes(v, l, extra);
+ if (len == length) {
+ last = data;
+ break;
+ }
+ extra += l;
+ }
+ } else if (len == length) {
+ last = data;
+ break;
+ }
+ } else if (len == length) {
+ u32 w;
+
+ /*
+ * Need to round up for the last dword in the
+ * packet.
+ */
+ w = (len + 3) >> 2;
+ qib_pio_copy(piobuf, ss->sge.vaddr, w - 1);
+ piobuf += w - 1;
+ last = ((u32 *) ss->sge.vaddr)[w - 1];
+ break;
+ } else {
+ u32 w = len >> 2;
+
+ qib_pio_copy(piobuf, ss->sge.vaddr, w);
+ piobuf += w;
+
+ extra = len & (sizeof(u32) - 1);
+ if (extra) {
+ u32 v = ((u32 *) ss->sge.vaddr)[w];
+
+ /* Clear unused upper bytes */
+ data = clear_upper_bytes(v, extra, 0);
+ }
+ }
+ update_sge(ss, len);
+ length -= len;
+ }
+ /* Update address before sending packet. */
+ update_sge(ss, length);
+ if (flush_wc) {
+ /* must flush early everything before trigger word */
+ qib_flush_wc();
+ __raw_writel(last, piobuf);
+ /* be sure trigger word is written */
+ qib_flush_wc();
+ } else
+ __raw_writel(last, piobuf);
+}
+
+static struct qib_verbs_txreq *get_txreq(struct qib_ibdev *dev,
+ struct qib_qp *qp, int *retp)
+{
+ struct qib_verbs_txreq *tx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ spin_lock(&dev->pending_lock);
+
+ if (!list_empty(&dev->txreq_free)) {
+ struct list_head *l = dev->txreq_free.next;
+
+ list_del(l);
+ tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+ *retp = 0;
+ } else {
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK &&
+ list_empty(&qp->iowait)) {
+ dev->n_txwait++;
+ qp->s_flags |= QIB_S_WAIT_TX;
+ list_add_tail(&qp->iowait, &dev->txwait);
+ }
+ tx = NULL;
+ qp->s_flags &= ~QIB_S_BUSY;
+ *retp = -EBUSY;
+ }
+
+ spin_unlock(&dev->pending_lock);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ return tx;
+}
+
+void qib_put_txreq(struct qib_verbs_txreq *tx)
+{
+ struct qib_ibdev *dev;
+ struct qib_qp *qp;
+ unsigned long flags;
+
+ qp = tx->qp;
+ dev = to_idev(qp->ibqp.device);
+
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ if (tx->mr) {
+ atomic_dec(&tx->mr->refcount);
+ tx->mr = NULL;
+ }
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF) {
+ tx->txreq.flags &= ~QIB_SDMA_TXREQ_F_FREEBUF;
+ dma_unmap_single(&dd_from_dev(dev)->pcidev->dev,
+ tx->txreq.addr, tx->hdr_dwords << 2,
+ DMA_TO_DEVICE);
+ kfree(tx->align_buf);
+ }
+
+ spin_lock_irqsave(&dev->pending_lock, flags);
+
+ /* Put struct back on free list */
+ list_add(&tx->txreq.list, &dev->txreq_free);
+
+ if (!list_empty(&dev->txwait)) {
+ /* Wake up first QP wanting a free struct */
+ qp = list_entry(dev->txwait.next, struct qib_qp, iowait);
+ list_del_init(&qp->iowait);
+ atomic_inc(&qp->refcount);
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_WAIT_TX) {
+ qp->s_flags &= ~QIB_S_WAIT_TX;
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ } else
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+}
+
+/*
+ * This is called when there are send DMA descriptors that might be
+ * available.
+ *
+ * This is called with ppd->sdma_lock held.
+ */
+void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail)
+{
+ struct qib_qp *qp, *nqp;
+ struct qib_qp *qps[20];
+ struct qib_ibdev *dev;
+ unsigned i, n;
+
+ n = 0;
+ dev = &ppd->dd->verbs_dev;
+ spin_lock(&dev->pending_lock);
+
+ /* Search wait list for first QP wanting DMA descriptors. */
+ list_for_each_entry_safe(qp, nqp, &dev->dmawait, iowait) {
+ if (qp->port_num != ppd->port)
+ continue;
+ if (n == ARRAY_SIZE(qps))
+ break;
+ if (qp->s_tx->txreq.sg_count > avail)
+ break;
+ avail -= qp->s_tx->txreq.sg_count;
+ list_del_init(&qp->iowait);
+ atomic_inc(&qp->refcount);
+ qps[n++] = qp;
+ }
+
+ spin_unlock(&dev->pending_lock);
+
+ for (i = 0; i < n; i++) {
+ qp = qps[i];
+ spin_lock(&qp->s_lock);
+ if (qp->s_flags & QIB_S_WAIT_DMA_DESC) {
+ qp->s_flags &= ~QIB_S_WAIT_DMA_DESC;
+ qib_schedule_send(qp);
+ }
+ spin_unlock(&qp->s_lock);
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+}
+
+/*
+ * This is called with ppd->sdma_lock held.
+ */
+static void sdma_complete(struct qib_sdma_txreq *cookie, int status)
+{
+ struct qib_verbs_txreq *tx =
+ container_of(cookie, struct qib_verbs_txreq, txreq);
+ struct qib_qp *qp = tx->qp;
+
+ spin_lock(&qp->s_lock);
+ if (tx->wqe)
+ qib_send_complete(qp, tx->wqe, IB_WC_SUCCESS);
+ else if (qp->ibqp.qp_type == IB_QPT_RC) {
+ struct qib_ib_header *hdr;
+
+ if (tx->txreq.flags & QIB_SDMA_TXREQ_F_FREEBUF)
+ hdr = &tx->align_buf->hdr;
+ else {
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+
+ hdr = &dev->pio_hdrs[tx->hdr_inx].hdr;
+ }
+ qib_rc_send_complete(qp, hdr);
+ }
+ if (atomic_dec_and_test(&qp->s_dma_busy)) {
+ if (qp->state == IB_QPS_RESET)
+ wake_up(&qp->wait_dma);
+ else if (qp->s_flags & QIB_S_WAIT_DMA) {
+ qp->s_flags &= ~QIB_S_WAIT_DMA;
+ qib_schedule_send(qp);
+ }
+ }
+ spin_unlock(&qp->s_lock);
+
+ qib_put_txreq(tx);
+}
+
+static int wait_kmem(struct qib_ibdev *dev, struct qib_qp *qp)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->iowait)) {
+ if (list_empty(&dev->memwait))
+ mod_timer(&dev->mem_timer, jiffies + 1);
+ qp->s_flags |= QIB_S_WAIT_KMEM;
+ list_add_tail(&qp->iowait, &dev->memwait);
+ }
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~QIB_S_BUSY;
+ ret = -EBUSY;
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ return ret;
+}
+
+static int qib_verbs_send_dma(struct qib_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len,
+ u32 plen, u32 dwords)
+{
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+ struct qib_devdata *dd = dd_from_dev(dev);
+ struct qib_ibport *ibp = to_iport(qp->ibqp.device, qp->port_num);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_verbs_txreq *tx;
+ struct qib_pio_header *phdr;
+ u32 control;
+ u32 ndesc;
+ int ret;
+
+ tx = qp->s_tx;
+ if (tx) {
+ qp->s_tx = NULL;
+ /* resend previously constructed packet */
+ ret = qib_sdma_verbs_send(ppd, tx->ss, tx->dwords, tx);
+ goto bail;
+ }
+
+ tx = get_txreq(dev, qp, &ret);
+ if (!tx)
+ goto bail;
+
+ control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
+ be16_to_cpu(hdr->lrh[0]) >> 12);
+ tx->qp = qp;
+ atomic_inc(&qp->refcount);
+ tx->wqe = qp->s_wqe;
+ tx->mr = qp->s_rdma_mr;
+ if (qp->s_rdma_mr)
+ qp->s_rdma_mr = NULL;
+ tx->txreq.callback = sdma_complete;
+ if (dd->flags & QIB_HAS_SDMA_TIMEOUT)
+ tx->txreq.flags = QIB_SDMA_TXREQ_F_HEADTOHOST;
+ else
+ tx->txreq.flags = QIB_SDMA_TXREQ_F_INTREQ;
+ if (plen + 1 > dd->piosize2kmax_dwords)
+ tx->txreq.flags |= QIB_SDMA_TXREQ_F_USELARGEBUF;
+
+ if (len) {
+ /*
+ * Don't try to DMA if it takes more descriptors than
+ * the queue holds.
+ */
+ ndesc = qib_count_sge(ss, len);
+ if (ndesc >= ppd->sdma_descq_cnt)
+ ndesc = 0;
+ } else
+ ndesc = 1;
+ if (ndesc) {
+ phdr = &dev->pio_hdrs[tx->hdr_inx];
+ phdr->pbc[0] = cpu_to_le32(plen);
+ phdr->pbc[1] = cpu_to_le32(control);
+ memcpy(&phdr->hdr, hdr, hdrwords << 2);
+ tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEDESC;
+ tx->txreq.sg_count = ndesc;
+ tx->txreq.addr = dev->pio_hdrs_phys +
+ tx->hdr_inx * sizeof(struct qib_pio_header);
+ tx->hdr_dwords = hdrwords + 2; /* add PBC length */
+ ret = qib_sdma_verbs_send(ppd, ss, dwords, tx);
+ goto bail;
+ }
+
+ /* Allocate a buffer and copy the header and payload to it. */
+ tx->hdr_dwords = plen + 1;
+ phdr = kmalloc(tx->hdr_dwords << 2, GFP_ATOMIC);
+ if (!phdr)
+ goto err_tx;
+ phdr->pbc[0] = cpu_to_le32(plen);
+ phdr->pbc[1] = cpu_to_le32(control);
+ memcpy(&phdr->hdr, hdr, hdrwords << 2);
+ qib_copy_from_sge((u32 *) &phdr->hdr + hdrwords, ss, len);
+
+ tx->txreq.addr = dma_map_single(&dd->pcidev->dev, phdr,
+ tx->hdr_dwords << 2, DMA_TO_DEVICE);
+ if (dma_mapping_error(&dd->pcidev->dev, tx->txreq.addr))
+ goto map_err;
+ tx->align_buf = phdr;
+ tx->txreq.flags |= QIB_SDMA_TXREQ_F_FREEBUF;
+ tx->txreq.sg_count = 1;
+ ret = qib_sdma_verbs_send(ppd, NULL, 0, tx);
+ goto unaligned;
+
+map_err:
+ kfree(phdr);
+err_tx:
+ qib_put_txreq(tx);
+ ret = wait_kmem(dev, qp);
+unaligned:
+ ibp->n_unaligned++;
+bail:
+ return ret;
+}
+
+/*
+ * If we are now in the error state, return zero to flush the
+ * send work request.
+ */
+static int no_bufs_available(struct qib_qp *qp)
+{
+ struct qib_ibdev *dev = to_idev(qp->ibqp.device);
+ struct qib_devdata *dd;
+ unsigned long flags;
+ int ret = 0;
+
+ /*
+ * Note that as soon as want_buffer() is called and
+ * possibly before it returns, qib_ib_piobufavail()
+ * could be called. Therefore, put QP on the I/O wait list before
+ * enabling the PIO avail interrupt.
+ */
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (ib_qib_state_ops[qp->state] & QIB_PROCESS_RECV_OK) {
+ spin_lock(&dev->pending_lock);
+ if (list_empty(&qp->iowait)) {
+ dev->n_piowait++;
+ qp->s_flags |= QIB_S_WAIT_PIO;
+ list_add_tail(&qp->iowait, &dev->piowait);
+ dd = dd_from_dev(dev);
+ dd->f_wantpiobuf_intr(dd, 1);
+ }
+ spin_unlock(&dev->pending_lock);
+ qp->s_flags &= ~QIB_S_BUSY;
+ ret = -EBUSY;
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ return ret;
+}
+
+static int qib_verbs_send_pio(struct qib_qp *qp, struct qib_ib_header *ibhdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len,
+ u32 plen, u32 dwords)
+{
+ struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ struct qib_pportdata *ppd = dd->pport + qp->port_num - 1;
+ u32 *hdr = (u32 *) ibhdr;
+ u32 __iomem *piobuf_orig;
+ u32 __iomem *piobuf;
+ u64 pbc;
+ unsigned long flags;
+ unsigned flush_wc;
+ u32 control;
+ u32 pbufn;
+
+ control = dd->f_setpbc_control(ppd, plen, qp->s_srate,
+ be16_to_cpu(ibhdr->lrh[0]) >> 12);
+ pbc = ((u64) control << 32) | plen;
+ piobuf = dd->f_getsendbuf(ppd, pbc, &pbufn);
+ if (unlikely(piobuf == NULL))
+ return no_bufs_available(qp);
+
+ /*
+ * Write the pbc.
+ * We have to flush after the PBC for correctness on some cpus
+ * or WC buffer can be written out of order.
+ */
+ writeq(pbc, piobuf);
+ piobuf_orig = piobuf;
+ piobuf += 2;
+
+ flush_wc = dd->flags & QIB_PIO_FLUSH_WC;
+ if (len == 0) {
+ /*
+ * If there is just the header portion, must flush before
+ * writing last word of header for correctness, and after
+ * the last header word (trigger word).
+ */
+ if (flush_wc) {
+ qib_flush_wc();
+ qib_pio_copy(piobuf, hdr, hdrwords - 1);
+ qib_flush_wc();
+ __raw_writel(hdr[hdrwords - 1], piobuf + hdrwords - 1);
+ qib_flush_wc();
+ } else
+ qib_pio_copy(piobuf, hdr, hdrwords);
+ goto done;
+ }
+
+ if (flush_wc)
+ qib_flush_wc();
+ qib_pio_copy(piobuf, hdr, hdrwords);
+ piobuf += hdrwords;
+
+ /* The common case is aligned and contained in one segment. */
+ if (likely(ss->num_sge == 1 && len <= ss->sge.length &&
+ !((unsigned long)ss->sge.vaddr & (sizeof(u32) - 1)))) {
+ u32 *addr = (u32 *) ss->sge.vaddr;
+
+ /* Update address before sending packet. */
+ update_sge(ss, len);
+ if (flush_wc) {
+ qib_pio_copy(piobuf, addr, dwords - 1);
+ /* must flush early everything before trigger word */
+ qib_flush_wc();
+ __raw_writel(addr[dwords - 1], piobuf + dwords - 1);
+ /* be sure trigger word is written */
+ qib_flush_wc();
+ } else
+ qib_pio_copy(piobuf, addr, dwords);
+ goto done;
+ }
+ copy_io(piobuf, ss, len, flush_wc);
+done:
+ if (dd->flags & QIB_USE_SPCL_TRIG) {
+ u32 spcl_off = (pbufn >= dd->piobcnt2k) ? 2047 : 1023;
+ qib_flush_wc();
+ __raw_writel(0xaebecede, piobuf_orig + spcl_off);
+ }
+ qib_sendbuf_done(dd, pbufn);
+ if (qp->s_rdma_mr) {
+ atomic_dec(&qp->s_rdma_mr->refcount);
+ qp->s_rdma_mr = NULL;
+ }
+ if (qp->s_wqe) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qib_send_complete(qp, qp->s_wqe, IB_WC_SUCCESS);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ } else if (qp->ibqp.qp_type == IB_QPT_RC) {
+ spin_lock_irqsave(&qp->s_lock, flags);
+ qib_rc_send_complete(qp, ibhdr);
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+ }
+ return 0;
+}
+
+/**
+ * qib_verbs_send - send a packet
+ * @qp: the QP to send on
+ * @hdr: the packet header
+ * @hdrwords: the number of 32-bit words in the header
+ * @ss: the SGE to send
+ * @len: the length of the packet in bytes
+ *
+ * Return zero if packet is sent or queued OK.
+ * Return non-zero and clear qp->s_flags QIB_S_BUSY otherwise.
+ */
+int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len)
+{
+ struct qib_devdata *dd = dd_from_ibdev(qp->ibqp.device);
+ u32 plen;
+ int ret;
+ u32 dwords = (len + 3) >> 2;
+
+ /*
+ * Calculate the send buffer trigger address.
+ * The +1 counts for the pbc control dword following the pbc length.
+ */
+ plen = hdrwords + dwords + 1;
+
+ /*
+ * VL15 packets (IB_QPT_SMI) will always use PIO, so we
+ * can defer SDMA restart until link goes ACTIVE without
+ * worrying about just how we got there.
+ */
+ if (qp->ibqp.qp_type == IB_QPT_SMI ||
+ !(dd->flags & QIB_HAS_SEND_DMA))
+ ret = qib_verbs_send_pio(qp, hdr, hdrwords, ss, len,
+ plen, dwords);
+ else
+ ret = qib_verbs_send_dma(qp, hdr, hdrwords, ss, len,
+ plen, dwords);
+
+ return ret;
+}
+
+int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait)
+{
+ int ret;
+ struct qib_devdata *dd = ppd->dd;
+
+ if (!(dd->flags & QIB_PRESENT)) {
+ /* no hardware, freeze, etc. */
+ ret = -EINVAL;
+ goto bail;
+ }
+ *swords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDSEND);
+ *rwords = dd->f_portcntr(ppd, QIBPORTCNTR_WORDRCV);
+ *spkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTSEND);
+ *rpkts = dd->f_portcntr(ppd, QIBPORTCNTR_PKTRCV);
+ *xmit_wait = dd->f_portcntr(ppd, QIBPORTCNTR_SENDSTALL);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_get_counters - get various chip counters
+ * @dd: the qlogic_ib device
+ * @cntrs: counters are placed here
+ *
+ * Return the counters needed by recv_pma_get_portcounters().
+ */
+int qib_get_counters(struct qib_pportdata *ppd,
+ struct qib_verbs_counters *cntrs)
+{
+ int ret;
+
+ if (!(ppd->dd->flags & QIB_PRESENT)) {
+ /* no hardware, freeze, etc. */
+ ret = -EINVAL;
+ goto bail;
+ }
+ cntrs->symbol_error_counter =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBSYMBOLERR);
+ cntrs->link_error_recovery_counter =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKERRRECOV);
+ /*
+ * The link downed counter counts when the other side downs the
+ * connection. We add in the number of times we downed the link
+ * due to local link integrity errors to compensate.
+ */
+ cntrs->link_downed_counter =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_IBLINKDOWN);
+ cntrs->port_rcv_errors =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXDROPPKT) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVOVFL) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERR_RLEN) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_INVALIDRLEN) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLINK) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRICRC) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRVCRC) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_ERRLPCRC) +
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_BADFORMAT);
+ cntrs->port_rcv_errors +=
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXLOCALPHYERR);
+ cntrs->port_rcv_errors +=
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RXVLERR);
+ cntrs->port_rcv_remphys_errors =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_RCVEBP);
+ cntrs->port_xmit_discards =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_UNSUPVL);
+ cntrs->port_xmit_data = ppd->dd->f_portcntr(ppd,
+ QIBPORTCNTR_WORDSEND);
+ cntrs->port_rcv_data = ppd->dd->f_portcntr(ppd,
+ QIBPORTCNTR_WORDRCV);
+ cntrs->port_xmit_packets = ppd->dd->f_portcntr(ppd,
+ QIBPORTCNTR_PKTSEND);
+ cntrs->port_rcv_packets = ppd->dd->f_portcntr(ppd,
+ QIBPORTCNTR_PKTRCV);
+ cntrs->local_link_integrity_errors =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_LLI);
+ cntrs->excessive_buffer_overrun_errors =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_EXCESSBUFOVFL);
+ cntrs->vl15_dropped =
+ ppd->dd->f_portcntr(ppd, QIBPORTCNTR_VL15PKTDROP);
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_ib_piobufavail - callback when a PIO buffer is available
+ * @dd: the device pointer
+ *
+ * This is called from qib_intr() at interrupt level when a PIO buffer is
+ * available after qib_verbs_send() returned an error that no buffers were
+ * available. Disable the interrupt if there are no more QPs waiting.
+ */
+void qib_ib_piobufavail(struct qib_devdata *dd)
+{
+ struct qib_ibdev *dev = &dd->verbs_dev;
+ struct list_head *list;
+ struct qib_qp *qps[5];
+ struct qib_qp *qp;
+ unsigned long flags;
+ unsigned i, n;
+
+ list = &dev->piowait;
+ n = 0;
+
+ /*
+ * Note: checking that the piowait list is empty and clearing
+ * the buffer available interrupt needs to be atomic or we
+ * could end up with QPs on the wait list with the interrupt
+ * disabled.
+ */
+ spin_lock_irqsave(&dev->pending_lock, flags);
+ while (!list_empty(list)) {
+ if (n == ARRAY_SIZE(qps))
+ goto full;
+ qp = list_entry(list->next, struct qib_qp, iowait);
+ list_del_init(&qp->iowait);
+ atomic_inc(&qp->refcount);
+ qps[n++] = qp;
+ }
+ dd->f_wantpiobuf_intr(dd, 0);
+full:
+ spin_unlock_irqrestore(&dev->pending_lock, flags);
+
+ for (i = 0; i < n; i++) {
+ qp = qps[i];
+
+ spin_lock_irqsave(&qp->s_lock, flags);
+ if (qp->s_flags & QIB_S_WAIT_PIO) {
+ qp->s_flags &= ~QIB_S_WAIT_PIO;
+ qib_schedule_send(qp);
+ }
+ spin_unlock_irqrestore(&qp->s_lock, flags);
+
+ /* Notify qib_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+ }
+}
+
+static int qib_query_device(struct ib_device *ibdev,
+ struct ib_device_attr *props)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ struct qib_ibdev *dev = to_idev(ibdev);
+
+ memset(props, 0, sizeof(*props));
+
+ props->device_cap_flags = IB_DEVICE_BAD_PKEY_CNTR |
+ IB_DEVICE_BAD_QKEY_CNTR | IB_DEVICE_SHUTDOWN_PORT |
+ IB_DEVICE_SYS_IMAGE_GUID | IB_DEVICE_RC_RNR_NAK_GEN |
+ IB_DEVICE_PORT_ACTIVE_EVENT | IB_DEVICE_SRQ_RESIZE;
+ props->page_size_cap = PAGE_SIZE;
+ props->vendor_id =
+ QIB_SRC_OUI_1 << 16 | QIB_SRC_OUI_2 << 8 | QIB_SRC_OUI_3;
+ props->vendor_part_id = dd->deviceid;
+ props->hw_ver = dd->minrev;
+ props->sys_image_guid = ib_qib_sys_image_guid;
+ props->max_mr_size = ~0ULL;
+ props->max_qp = ib_qib_max_qps;
+ props->max_qp_wr = ib_qib_max_qp_wrs;
+ props->max_sge = ib_qib_max_sges;
+ props->max_cq = ib_qib_max_cqs;
+ props->max_ah = ib_qib_max_ahs;
+ props->max_cqe = ib_qib_max_cqes;
+ props->max_mr = dev->lk_table.max;
+ props->max_fmr = dev->lk_table.max;
+ props->max_map_per_fmr = 32767;
+ props->max_pd = ib_qib_max_pds;
+ props->max_qp_rd_atom = QIB_MAX_RDMA_ATOMIC;
+ props->max_qp_init_rd_atom = 255;
+ /* props->max_res_rd_atom */
+ props->max_srq = ib_qib_max_srqs;
+ props->max_srq_wr = ib_qib_max_srq_wrs;
+ props->max_srq_sge = ib_qib_max_srq_sges;
+ /* props->local_ca_ack_delay */
+ props->atomic_cap = IB_ATOMIC_GLOB;
+ props->max_pkeys = qib_get_npkeys(dd);
+ props->max_mcast_grp = ib_qib_max_mcast_grps;
+ props->max_mcast_qp_attach = ib_qib_max_mcast_qp_attached;
+ props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
+ props->max_mcast_grp;
+
+ return 0;
+}
+
+static int qib_query_port(struct ib_device *ibdev, u8 port,
+ struct ib_port_attr *props)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ enum ib_mtu mtu;
+ u16 lid = ppd->lid;
+
+ memset(props, 0, sizeof(*props));
+ props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
+ props->lmc = ppd->lmc;
+ props->sm_lid = ibp->sm_lid;
+ props->sm_sl = ibp->sm_sl;
+ props->state = dd->f_iblink_state(ppd->lastibcstat);
+ props->phys_state = dd->f_ibphys_portstate(ppd->lastibcstat);
+ props->port_cap_flags = ibp->port_cap_flags;
+ props->gid_tbl_len = QIB_GUIDS_PER_PORT;
+ props->max_msg_sz = 0x80000000;
+ props->pkey_tbl_len = qib_get_npkeys(dd);
+ props->bad_pkey_cntr = ibp->pkey_violations;
+ props->qkey_viol_cntr = ibp->qkey_violations;
+ props->active_width = ppd->link_width_active;
+ /* See rate_show() */
+ props->active_speed = ppd->link_speed_active;
+ props->max_vl_num = qib_num_vls(ppd->vls_supported);
+ props->init_type_reply = 0;
+
+ props->max_mtu = qib_ibmtu ? qib_ibmtu : IB_MTU_4096;
+ switch (ppd->ibmtu) {
+ case 4096:
+ mtu = IB_MTU_4096;
+ break;
+ case 2048:
+ mtu = IB_MTU_2048;
+ break;
+ case 1024:
+ mtu = IB_MTU_1024;
+ break;
+ case 512:
+ mtu = IB_MTU_512;
+ break;
+ case 256:
+ mtu = IB_MTU_256;
+ break;
+ default:
+ mtu = IB_MTU_2048;
+ }
+ props->active_mtu = mtu;
+ props->subnet_timeout = ibp->subnet_timeout;
+
+ return 0;
+}
+
+static int qib_modify_device(struct ib_device *device,
+ int device_modify_mask,
+ struct ib_device_modify *device_modify)
+{
+ struct qib_devdata *dd = dd_from_ibdev(device);
+ unsigned i;
+ int ret;
+
+ if (device_modify_mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID |
+ IB_DEVICE_MODIFY_NODE_DESC)) {
+ ret = -EOPNOTSUPP;
+ goto bail;
+ }
+
+ if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) {
+ memcpy(device->node_desc, device_modify->node_desc, 64);
+ for (i = 0; i < dd->num_pports; i++) {
+ struct qib_ibport *ibp = &dd->pport[i].ibport_data;
+
+ qib_node_desc_chg(ibp);
+ }
+ }
+
+ if (device_modify_mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) {
+ ib_qib_sys_image_guid =
+ cpu_to_be64(device_modify->sys_image_guid);
+ for (i = 0; i < dd->num_pports; i++) {
+ struct qib_ibport *ibp = &dd->pport[i].ibport_data;
+
+ qib_sys_guid_chg(ibp);
+ }
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+static int qib_modify_port(struct ib_device *ibdev, u8 port,
+ int port_modify_mask, struct ib_port_modify *props)
+{
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ ibp->port_cap_flags |= props->set_port_cap_mask;
+ ibp->port_cap_flags &= ~props->clr_port_cap_mask;
+ if (props->set_port_cap_mask || props->clr_port_cap_mask)
+ qib_cap_mask_chg(ibp);
+ if (port_modify_mask & IB_PORT_SHUTDOWN)
+ qib_set_linkstate(ppd, QIB_IB_LINKDOWN);
+ if (port_modify_mask & IB_PORT_RESET_QKEY_CNTR)
+ ibp->qkey_violations = 0;
+ return 0;
+}
+
+static int qib_query_gid(struct ib_device *ibdev, u8 port,
+ int index, union ib_gid *gid)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ int ret = 0;
+
+ if (!port || port > dd->num_pports)
+ ret = -EINVAL;
+ else {
+ struct qib_ibport *ibp = to_iport(ibdev, port);
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+
+ gid->global.subnet_prefix = ibp->gid_prefix;
+ if (index == 0)
+ gid->global.interface_id = ppd->guid;
+ else if (index < QIB_GUIDS_PER_PORT)
+ gid->global.interface_id = ibp->guids[index - 1];
+ else
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static struct ib_pd *qib_alloc_pd(struct ib_device *ibdev,
+ struct ib_ucontext *context,
+ struct ib_udata *udata)
+{
+ struct qib_ibdev *dev = to_idev(ibdev);
+ struct qib_pd *pd;
+ struct ib_pd *ret;
+
+ /*
+ * This is actually totally arbitrary. Some correctness tests
+ * assume there's a maximum number of PDs that can be allocated.
+ * We don't actually have this limit, but we fail the test if
+ * we allow allocations of more than we report for this value.
+ */
+
+ pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ if (!pd) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ spin_lock(&dev->n_pds_lock);
+ if (dev->n_pds_allocated == ib_qib_max_pds) {
+ spin_unlock(&dev->n_pds_lock);
+ kfree(pd);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_pds_allocated++;
+ spin_unlock(&dev->n_pds_lock);
+
+ /* ib_alloc_pd() will initialize pd->ibpd. */
+ pd->user = udata != NULL;
+
+ ret = &pd->ibpd;
+
+bail:
+ return ret;
+}
+
+static int qib_dealloc_pd(struct ib_pd *ibpd)
+{
+ struct qib_pd *pd = to_ipd(ibpd);
+ struct qib_ibdev *dev = to_idev(ibpd->device);
+
+ spin_lock(&dev->n_pds_lock);
+ dev->n_pds_allocated--;
+ spin_unlock(&dev->n_pds_lock);
+
+ kfree(pd);
+
+ return 0;
+}
+
+int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr)
+{
+ /* A multicast address requires a GRH (see ch. 8.4.1). */
+ if (ah_attr->dlid >= QIB_MULTICAST_LID_BASE &&
+ ah_attr->dlid != QIB_PERMISSIVE_LID &&
+ !(ah_attr->ah_flags & IB_AH_GRH))
+ goto bail;
+ if ((ah_attr->ah_flags & IB_AH_GRH) &&
+ ah_attr->grh.sgid_index >= QIB_GUIDS_PER_PORT)
+ goto bail;
+ if (ah_attr->dlid == 0)
+ goto bail;
+ if (ah_attr->port_num < 1 ||
+ ah_attr->port_num > ibdev->phys_port_cnt)
+ goto bail;
+ if (ah_attr->static_rate != IB_RATE_PORT_CURRENT &&
+ ib_rate_to_mult(ah_attr->static_rate) < 0)
+ goto bail;
+ if (ah_attr->sl > 15)
+ goto bail;
+ return 0;
+bail:
+ return -EINVAL;
+}
+
+/**
+ * qib_create_ah - create an address handle
+ * @pd: the protection domain
+ * @ah_attr: the attributes of the AH
+ *
+ * This may be called from interrupt context.
+ */
+static struct ib_ah *qib_create_ah(struct ib_pd *pd,
+ struct ib_ah_attr *ah_attr)
+{
+ struct qib_ah *ah;
+ struct ib_ah *ret;
+ struct qib_ibdev *dev = to_idev(pd->device);
+ unsigned long flags;
+
+ if (qib_check_ah(pd->device, ah_attr)) {
+ ret = ERR_PTR(-EINVAL);
+ goto bail;
+ }
+
+ ah = kmalloc(sizeof *ah, GFP_ATOMIC);
+ if (!ah) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ if (dev->n_ahs_allocated == ib_qib_max_ahs) {
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+ kfree(ah);
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ dev->n_ahs_allocated++;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+ /* ib_create_ah() will initialize ah->ibah. */
+ ah->attr = *ah_attr;
+ atomic_set(&ah->refcount, 0);
+
+ ret = &ah->ibah;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_destroy_ah - destroy an address handle
+ * @ibah: the AH to destroy
+ *
+ * This may be called from interrupt context.
+ */
+static int qib_destroy_ah(struct ib_ah *ibah)
+{
+ struct qib_ibdev *dev = to_idev(ibah->device);
+ struct qib_ah *ah = to_iah(ibah);
+ unsigned long flags;
+
+ if (atomic_read(&ah->refcount) != 0)
+ return -EBUSY;
+
+ spin_lock_irqsave(&dev->n_ahs_lock, flags);
+ dev->n_ahs_allocated--;
+ spin_unlock_irqrestore(&dev->n_ahs_lock, flags);
+
+ kfree(ah);
+
+ return 0;
+}
+
+static int qib_modify_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct qib_ah *ah = to_iah(ibah);
+
+ if (qib_check_ah(ibah->device, ah_attr))
+ return -EINVAL;
+
+ ah->attr = *ah_attr;
+
+ return 0;
+}
+
+static int qib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr)
+{
+ struct qib_ah *ah = to_iah(ibah);
+
+ *ah_attr = ah->attr;
+
+ return 0;
+}
+
+/**
+ * qib_get_npkeys - return the size of the PKEY table for context 0
+ * @dd: the qlogic_ib device
+ */
+unsigned qib_get_npkeys(struct qib_devdata *dd)
+{
+ return ARRAY_SIZE(dd->rcd[0]->pkeys);
+}
+
+/*
+ * Return the indexed PKEY from the port PKEY table.
+ * No need to validate rcd[ctxt]; the port is setup if we are here.
+ */
+unsigned qib_get_pkey(struct qib_ibport *ibp, unsigned index)
+{
+ struct qib_pportdata *ppd = ppd_from_ibp(ibp);
+ struct qib_devdata *dd = ppd->dd;
+ unsigned ctxt = ppd->hw_pidx;
+ unsigned ret;
+
+ /* dd->rcd null if mini_init or some init failures */
+ if (!dd->rcd || index >= ARRAY_SIZE(dd->rcd[ctxt]->pkeys))
+ ret = 0;
+ else
+ ret = dd->rcd[ctxt]->pkeys[index];
+
+ return ret;
+}
+
+static int qib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
+ u16 *pkey)
+{
+ struct qib_devdata *dd = dd_from_ibdev(ibdev);
+ int ret;
+
+ if (index >= qib_get_npkeys(dd)) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ *pkey = qib_get_pkey(to_iport(ibdev, port), index);
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+/**
+ * qib_alloc_ucontext - allocate a ucontest
+ * @ibdev: the infiniband device
+ * @udata: not used by the QLogic_IB driver
+ */
+
+static struct ib_ucontext *qib_alloc_ucontext(struct ib_device *ibdev,
+ struct ib_udata *udata)
+{
+ struct qib_ucontext *context;
+ struct ib_ucontext *ret;
+
+ context = kmalloc(sizeof *context, GFP_KERNEL);
+ if (!context) {
+ ret = ERR_PTR(-ENOMEM);
+ goto bail;
+ }
+
+ ret = &context->ibucontext;
+
+bail:
+ return ret;
+}
+
+static int qib_dealloc_ucontext(struct ib_ucontext *context)
+{
+ kfree(to_iucontext(context));
+ return 0;
+}
+
+static void init_ibport(struct qib_pportdata *ppd)
+{
+ struct qib_verbs_counters cntrs;
+ struct qib_ibport *ibp = &ppd->ibport_data;
+
+ spin_lock_init(&ibp->lock);
+ /* Set the prefix to the default value (see ch. 4.1.1) */
+ ibp->gid_prefix = IB_DEFAULT_GID_PREFIX;
+ ibp->sm_lid = be16_to_cpu(IB_LID_PERMISSIVE);
+ ibp->port_cap_flags = IB_PORT_SYS_IMAGE_GUID_SUP |
+ IB_PORT_CLIENT_REG_SUP | IB_PORT_SL_MAP_SUP |
+ IB_PORT_TRAP_SUP | IB_PORT_AUTO_MIGR_SUP |
+ IB_PORT_DR_NOTICE_SUP | IB_PORT_CAP_MASK_NOTICE_SUP |
+ IB_PORT_OTHER_LOCAL_CHANGES_SUP;
+ if (ppd->dd->flags & QIB_HAS_LINK_LATENCY)
+ ibp->port_cap_flags |= IB_PORT_LINK_LATENCY_SUP;
+ ibp->pma_counter_select[0] = IB_PMA_PORT_XMIT_DATA;
+ ibp->pma_counter_select[1] = IB_PMA_PORT_RCV_DATA;
+ ibp->pma_counter_select[2] = IB_PMA_PORT_XMIT_PKTS;
+ ibp->pma_counter_select[3] = IB_PMA_PORT_RCV_PKTS;
+ ibp->pma_counter_select[4] = IB_PMA_PORT_XMIT_WAIT;
+
+ /* Snapshot current HW counters to "clear" them. */
+ qib_get_counters(ppd, &cntrs);
+ ibp->z_symbol_error_counter = cntrs.symbol_error_counter;
+ ibp->z_link_error_recovery_counter =
+ cntrs.link_error_recovery_counter;
+ ibp->z_link_downed_counter = cntrs.link_downed_counter;
+ ibp->z_port_rcv_errors = cntrs.port_rcv_errors;
+ ibp->z_port_rcv_remphys_errors = cntrs.port_rcv_remphys_errors;
+ ibp->z_port_xmit_discards = cntrs.port_xmit_discards;
+ ibp->z_port_xmit_data = cntrs.port_xmit_data;
+ ibp->z_port_rcv_data = cntrs.port_rcv_data;
+ ibp->z_port_xmit_packets = cntrs.port_xmit_packets;
+ ibp->z_port_rcv_packets = cntrs.port_rcv_packets;
+ ibp->z_local_link_integrity_errors =
+ cntrs.local_link_integrity_errors;
+ ibp->z_excessive_buffer_overrun_errors =
+ cntrs.excessive_buffer_overrun_errors;
+ ibp->z_vl15_dropped = cntrs.vl15_dropped;
+}
+
+/**
+ * qib_register_ib_device - register our device with the infiniband core
+ * @dd: the device data structure
+ * Return the allocated qib_ibdev pointer or NULL on error.
+ */
+int qib_register_ib_device(struct qib_devdata *dd)
+{
+ struct qib_ibdev *dev = &dd->verbs_dev;
+ struct ib_device *ibdev = &dev->ibdev;
+ struct qib_pportdata *ppd = dd->pport;
+ unsigned i, lk_tab_size;
+ int ret;
+
+ dev->qp_table_size = ib_qib_qp_table_size;
+ dev->qp_table = kzalloc(dev->qp_table_size * sizeof *dev->qp_table,
+ GFP_KERNEL);
+ if (!dev->qp_table) {
+ ret = -ENOMEM;
+ goto err_qpt;
+ }
+
+ for (i = 0; i < dd->num_pports; i++)
+ init_ibport(ppd + i);
+
+ /* Only need to initialize non-zero fields. */
+ spin_lock_init(&dev->qpt_lock);
+ spin_lock_init(&dev->n_pds_lock);
+ spin_lock_init(&dev->n_ahs_lock);
+ spin_lock_init(&dev->n_cqs_lock);
+ spin_lock_init(&dev->n_qps_lock);
+ spin_lock_init(&dev->n_srqs_lock);
+ spin_lock_init(&dev->n_mcast_grps_lock);
+ init_timer(&dev->mem_timer);
+ dev->mem_timer.function = mem_timer;
+ dev->mem_timer.data = (unsigned long) dev;
+
+ qib_init_qpn_table(dd, &dev->qpn_table);
+
+ /*
+ * The top ib_qib_lkey_table_size bits are used to index the
+ * table. The lower 8 bits can be owned by the user (copied from
+ * the LKEY). The remaining bits act as a generation number or tag.
+ */
+ spin_lock_init(&dev->lk_table.lock);
+ dev->lk_table.max = 1 << ib_qib_lkey_table_size;
+ lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
+ dev->lk_table.table = (struct qib_mregion **)
+ __get_free_pages(GFP_KERNEL, get_order(lk_tab_size));
+ if (dev->lk_table.table == NULL) {
+ ret = -ENOMEM;
+ goto err_lk;
+ }
+ memset(dev->lk_table.table, 0, lk_tab_size);
+ INIT_LIST_HEAD(&dev->pending_mmaps);
+ spin_lock_init(&dev->pending_lock);
+ dev->mmap_offset = PAGE_SIZE;
+ spin_lock_init(&dev->mmap_offset_lock);
+ INIT_LIST_HEAD(&dev->piowait);
+ INIT_LIST_HEAD(&dev->dmawait);
+ INIT_LIST_HEAD(&dev->txwait);
+ INIT_LIST_HEAD(&dev->memwait);
+ INIT_LIST_HEAD(&dev->txreq_free);
+
+ if (ppd->sdma_descq_cnt) {
+ dev->pio_hdrs = dma_alloc_coherent(&dd->pcidev->dev,
+ ppd->sdma_descq_cnt *
+ sizeof(struct qib_pio_header),
+ &dev->pio_hdrs_phys,
+ GFP_KERNEL);
+ if (!dev->pio_hdrs) {
+ ret = -ENOMEM;
+ goto err_hdrs;
+ }
+ }
+
+ for (i = 0; i < ppd->sdma_descq_cnt; i++) {
+ struct qib_verbs_txreq *tx;
+
+ tx = kzalloc(sizeof *tx, GFP_KERNEL);
+ if (!tx) {
+ ret = -ENOMEM;
+ goto err_tx;
+ }
+ tx->hdr_inx = i;
+ list_add(&tx->txreq.list, &dev->txreq_free);
+ }
+
+ /*
+ * The system image GUID is supposed to be the same for all
+ * IB HCAs in a single system but since there can be other
+ * device types in the system, we can't be sure this is unique.
+ */
+ if (!ib_qib_sys_image_guid)
+ ib_qib_sys_image_guid = ppd->guid;
+
+ strlcpy(ibdev->name, "qib%d", IB_DEVICE_NAME_MAX);
+ ibdev->owner = THIS_MODULE;
+ ibdev->node_guid = ppd->guid;
+ ibdev->uverbs_abi_ver = QIB_UVERBS_ABI_VERSION;
+ ibdev->uverbs_cmd_mask =
+ (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
+ (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_AH) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_AH) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
+ (1ull << IB_USER_VERBS_CMD_REG_MR) |
+ (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_RESIZE_CQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
+ (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
+ (1ull << IB_USER_VERBS_CMD_POST_SEND) |
+ (1ull << IB_USER_VERBS_CMD_POST_RECV) |
+ (1ull << IB_USER_VERBS_CMD_ATTACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_DETACH_MCAST) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_SRQ) |
+ (1ull << IB_USER_VERBS_CMD_POST_SRQ_RECV);
+ ibdev->node_type = RDMA_NODE_IB_CA;
+ ibdev->phys_port_cnt = dd->num_pports;
+ ibdev->num_comp_vectors = 1;
+ ibdev->dma_device = &dd->pcidev->dev;
+ ibdev->query_device = qib_query_device;
+ ibdev->modify_device = qib_modify_device;
+ ibdev->query_port = qib_query_port;
+ ibdev->modify_port = qib_modify_port;
+ ibdev->query_pkey = qib_query_pkey;
+ ibdev->query_gid = qib_query_gid;
+ ibdev->alloc_ucontext = qib_alloc_ucontext;
+ ibdev->dealloc_ucontext = qib_dealloc_ucontext;
+ ibdev->alloc_pd = qib_alloc_pd;
+ ibdev->dealloc_pd = qib_dealloc_pd;
+ ibdev->create_ah = qib_create_ah;
+ ibdev->destroy_ah = qib_destroy_ah;
+ ibdev->modify_ah = qib_modify_ah;
+ ibdev->query_ah = qib_query_ah;
+ ibdev->create_srq = qib_create_srq;
+ ibdev->modify_srq = qib_modify_srq;
+ ibdev->query_srq = qib_query_srq;
+ ibdev->destroy_srq = qib_destroy_srq;
+ ibdev->create_qp = qib_create_qp;
+ ibdev->modify_qp = qib_modify_qp;
+ ibdev->query_qp = qib_query_qp;
+ ibdev->destroy_qp = qib_destroy_qp;
+ ibdev->post_send = qib_post_send;
+ ibdev->post_recv = qib_post_receive;
+ ibdev->post_srq_recv = qib_post_srq_receive;
+ ibdev->create_cq = qib_create_cq;
+ ibdev->destroy_cq = qib_destroy_cq;
+ ibdev->resize_cq = qib_resize_cq;
+ ibdev->poll_cq = qib_poll_cq;
+ ibdev->req_notify_cq = qib_req_notify_cq;
+ ibdev->get_dma_mr = qib_get_dma_mr;
+ ibdev->reg_phys_mr = qib_reg_phys_mr;
+ ibdev->reg_user_mr = qib_reg_user_mr;
+ ibdev->dereg_mr = qib_dereg_mr;
+ ibdev->alloc_fast_reg_mr = qib_alloc_fast_reg_mr;
+ ibdev->alloc_fast_reg_page_list = qib_alloc_fast_reg_page_list;
+ ibdev->free_fast_reg_page_list = qib_free_fast_reg_page_list;
+ ibdev->alloc_fmr = qib_alloc_fmr;
+ ibdev->map_phys_fmr = qib_map_phys_fmr;
+ ibdev->unmap_fmr = qib_unmap_fmr;
+ ibdev->dealloc_fmr = qib_dealloc_fmr;
+ ibdev->attach_mcast = qib_multicast_attach;
+ ibdev->detach_mcast = qib_multicast_detach;
+ ibdev->process_mad = qib_process_mad;
+ ibdev->mmap = qib_mmap;
+ ibdev->dma_ops = &qib_dma_mapping_ops;
+
+ snprintf(ibdev->node_desc, sizeof(ibdev->node_desc),
+ QIB_IDSTR " %s", init_utsname()->nodename);
+
+ ret = ib_register_device(ibdev, qib_create_port_files);
+ if (ret)
+ goto err_reg;
+
+ ret = qib_create_agents(dev);
+ if (ret)
+ goto err_agents;
+
+ if (qib_verbs_register_sysfs(dd))
+ goto err_class;
+
+ goto bail;
+
+err_class:
+ qib_free_agents(dev);
+err_agents:
+ ib_unregister_device(ibdev);
+err_reg:
+err_tx:
+ while (!list_empty(&dev->txreq_free)) {
+ struct list_head *l = dev->txreq_free.next;
+ struct qib_verbs_txreq *tx;
+
+ list_del(l);
+ tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+ kfree(tx);
+ }
+ if (ppd->sdma_descq_cnt)
+ dma_free_coherent(&dd->pcidev->dev,
+ ppd->sdma_descq_cnt *
+ sizeof(struct qib_pio_header),
+ dev->pio_hdrs, dev->pio_hdrs_phys);
+err_hdrs:
+ free_pages((unsigned long) dev->lk_table.table, get_order(lk_tab_size));
+err_lk:
+ kfree(dev->qp_table);
+err_qpt:
+ qib_dev_err(dd, "cannot register verbs: %d!\n", -ret);
+bail:
+ return ret;
+}
+
+void qib_unregister_ib_device(struct qib_devdata *dd)
+{
+ struct qib_ibdev *dev = &dd->verbs_dev;
+ struct ib_device *ibdev = &dev->ibdev;
+ u32 qps_inuse;
+ unsigned lk_tab_size;
+
+ qib_verbs_unregister_sysfs(dd);
+
+ qib_free_agents(dev);
+
+ ib_unregister_device(ibdev);
+
+ if (!list_empty(&dev->piowait))
+ qib_dev_err(dd, "piowait list not empty!\n");
+ if (!list_empty(&dev->dmawait))
+ qib_dev_err(dd, "dmawait list not empty!\n");
+ if (!list_empty(&dev->txwait))
+ qib_dev_err(dd, "txwait list not empty!\n");
+ if (!list_empty(&dev->memwait))
+ qib_dev_err(dd, "memwait list not empty!\n");
+ if (dev->dma_mr)
+ qib_dev_err(dd, "DMA MR not NULL!\n");
+
+ qps_inuse = qib_free_all_qps(dd);
+ if (qps_inuse)
+ qib_dev_err(dd, "QP memory leak! %u still in use\n",
+ qps_inuse);
+
+ del_timer_sync(&dev->mem_timer);
+ qib_free_qpn_table(&dev->qpn_table);
+ while (!list_empty(&dev->txreq_free)) {
+ struct list_head *l = dev->txreq_free.next;
+ struct qib_verbs_txreq *tx;
+
+ list_del(l);
+ tx = list_entry(l, struct qib_verbs_txreq, txreq.list);
+ kfree(tx);
+ }
+ if (dd->pport->sdma_descq_cnt)
+ dma_free_coherent(&dd->pcidev->dev,
+ dd->pport->sdma_descq_cnt *
+ sizeof(struct qib_pio_header),
+ dev->pio_hdrs, dev->pio_hdrs_phys);
+ lk_tab_size = dev->lk_table.max * sizeof(*dev->lk_table.table);
+ free_pages((unsigned long) dev->lk_table.table,
+ get_order(lk_tab_size));
+ kfree(dev->qp_table);
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 46/52] IB/qib: Add qib_verbs.h
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_verbs.h file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_verbs.h | 1099 +++++++++++++++++++++++++++++++++
1 files changed, 1099 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_verbs.h
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
new file mode 100644
index 0000000..fd972e0
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -0,0 +1,1099 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef QIB_VERBS_H
+#define QIB_VERBS_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
+
+struct qib_ctxtdata;
+struct qib_pportdata;
+struct qib_devdata;
+struct qib_verbs_txreq;
+
+#define QIB_MAX_RDMA_ATOMIC 16
+#define QIB_GUIDS_PER_PORT 5
+
+#define QPN_MAX (1 << 24)
+#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define QIB_UVERBS_ABI_VERSION 2
+
+/*
+ * Define an ib_cq_notify value that is not valid so we know when CQ
+ * notifications are armed.
+ */
+#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
+
+#define IB_SEQ_NAK (3 << 29)
+
+/* AETH NAK opcode values */
+#define IB_RNR_NAK 0x20
+#define IB_NAK_PSN_ERROR 0x60
+#define IB_NAK_INVALID_REQUEST 0x61
+#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
+#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
+#define IB_NAK_INVALID_RD_REQUEST 0x64
+
+/* Flags for checking QP state (see ib_qib_state_ops[]) */
+#define QIB_POST_SEND_OK 0x01
+#define QIB_POST_RECV_OK 0x02
+#define QIB_PROCESS_RECV_OK 0x04
+#define QIB_PROCESS_SEND_OK 0x08
+#define QIB_PROCESS_NEXT_SEND_OK 0x10
+#define QIB_FLUSH_SEND 0x20
+#define QIB_FLUSH_RECV 0x40
+#define QIB_PROCESS_OR_FLUSH_SEND \
+ (QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND)
+
+/* IB Performance Manager status values */
+#define IB_PMA_SAMPLE_STATUS_DONE 0x00
+#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
+#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
+
+/* Mandatory IB performance counter select values. */
+#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001)
+#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002)
+#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003)
+#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004)
+#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005)
+
+#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0)
+
+#define IB_BTH_REQ_ACK (1 << 31)
+#define IB_BTH_SOLICITED (1 << 23)
+#define IB_BTH_MIG_REQ (1 << 22)
+
+/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
+#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
+
+#define IB_GRH_VERSION 6
+#define IB_GRH_VERSION_MASK 0xF
+#define IB_GRH_VERSION_SHIFT 28
+#define IB_GRH_TCLASS_MASK 0xFF
+#define IB_GRH_TCLASS_SHIFT 20
+#define IB_GRH_FLOW_MASK 0xFFFFF
+#define IB_GRH_FLOW_SHIFT 0
+#define IB_GRH_NEXT_HDR 0x1B
+
+#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+
+/* Values for set/get portinfo VLCap OperationalVLs */
+#define IB_VL_VL0 1
+#define IB_VL_VL0_1 2
+#define IB_VL_VL0_3 3
+#define IB_VL_VL0_7 4
+#define IB_VL_VL0_14 5
+
+static inline int qib_num_vls(int vls)
+{
+ switch (vls) {
+ default:
+ case IB_VL_VL0:
+ return 1;
+ case IB_VL_VL0_1:
+ return 2;
+ case IB_VL_VL0_3:
+ return 4;
+ case IB_VL_VL0_7:
+ return 8;
+ case IB_VL_VL0_14:
+ return 15;
+ }
+}
+
+struct ib_reth {
+ __be64 vaddr;
+ __be32 rkey;
+ __be32 length;
+} __attribute__ ((packed));
+
+struct ib_atomic_eth {
+ __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
+ __be32 rkey;
+ __be64 swap_data;
+ __be64 compare_data;
+} __attribute__ ((packed));
+
+struct qib_other_headers {
+ __be32 bth[3];
+ union {
+ struct {
+ __be32 deth[2];
+ __be32 imm_data;
+ } ud;
+ struct {
+ struct ib_reth reth;
+ __be32 imm_data;
+ } rc;
+ struct {
+ __be32 aeth;
+ __be32 atomic_ack_eth[2];
+ } at;
+ __be32 imm_data;
+ __be32 aeth;
+ struct ib_atomic_eth atomic_eth;
+ } u;
+} __attribute__ ((packed));
+
+/*
+ * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
+ * long (72 w/ imm_data). Only the first 56 bytes of the IB header
+ * will be in the eager header buffer. The remaining 12 or 16 bytes
+ * are in the data buffer.
+ */
+struct qib_ib_header {
+ __be16 lrh[4];
+ union {
+ struct {
+ struct ib_grh grh;
+ struct qib_other_headers oth;
+ } l;
+ struct qib_other_headers oth;
+ } u;
+} __attribute__ ((packed));
+
+struct qib_pio_header {
+ __le32 pbc[2];
+ struct qib_ib_header hdr;
+} __attribute__ ((packed));
+
+/*
+ * There is one struct qib_mcast for each multicast GID.
+ * All attached QPs are then stored as a list of
+ * struct qib_mcast_qp.
+ */
+struct qib_mcast_qp {
+ struct list_head list;
+ struct qib_qp *qp;
+};
+
+struct qib_mcast {
+ struct rb_node rb_node;
+ union ib_gid mgid;
+ struct list_head qp_list;
+ wait_queue_head_t wait;
+ atomic_t refcount;
+ int n_attached;
+};
+
+/* Protection domain */
+struct qib_pd {
+ struct ib_pd ibpd;
+ int user; /* non-zero if created from user space */
+};
+
+/* Address Handle */
+struct qib_ah {
+ struct ib_ah ibah;
+ struct ib_ah_attr attr;
+ atomic_t refcount;
+};
+
+/*
+ * This structure is used by qib_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct qib_mmap_info {
+ struct list_head pending_mmaps;
+ struct ib_ucontext *context;
+ void *obj;
+ __u64 offset;
+ struct kref ref;
+ unsigned size;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct qib_cq_wc {
+ u32 head; /* index of next entry to fill */
+ u32 tail; /* index of next ib_poll_cq() entry */
+ union {
+ /* these are actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc uqueue[0];
+ struct ib_wc kqueue[0];
+ };
+};
+
+/*
+ * The completion queue structure.
+ */
+struct qib_cq {
+ struct ib_cq ibcq;
+ struct work_struct comptask;
+ spinlock_t lock; /* protect changes in this struct */
+ u8 notify;
+ u8 triggered;
+ struct qib_cq_wc *queue;
+ struct qib_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct qib_seg {
+ void *vaddr;
+ size_t length;
+};
+
+/* The number of qib_segs that fit in a page. */
+#define QIB_SEGSZ (PAGE_SIZE / sizeof(struct qib_seg))
+
+struct qib_segarray {
+ struct qib_seg segs[QIB_SEGSZ];
+};
+
+struct qib_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
+ u64 user_base; /* User's address for this region */
+ u64 iova; /* IB start address of this region */
+ size_t length;
+ u32 lkey;
+ u32 offset; /* offset (bytes) to start of region */
+ int access_flags;
+ u32 max_segs; /* number of qib_segs in all the arrays */
+ u32 mapsz; /* size of the map array */
+ atomic_t refcount;
+ struct qib_segarray *map[0]; /* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct qib_sge {
+ struct qib_mregion *mr;
+ void *vaddr; /* kernel virtual address of segment */
+ u32 sge_length; /* length of the SGE */
+ u32 length; /* remaining length of the segment */
+ u16 m; /* current index: mr->map[m] */
+ u16 n; /* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct qib_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct qib_mregion mr; /* must be last */
+};
+
+/*
+ * Send work request queue entry.
+ * The size of the sg_list is determined when the QP is created and stored
+ * in qp->s_max_sge.
+ */
+struct qib_swqe {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ u32 psn; /* first packet sequence number */
+ u32 lpsn; /* last packet sequence number */
+ u32 ssn; /* send sequence number */
+ u32 length; /* total length of data in sg_list */
+ struct qib_sge sg_list[0];
+};
+
+/*
+ * Receive work request queue entry.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+ */
+struct qib_rwqe {
+ u64 wr_id;
+ u8 num_sge;
+ struct ib_sge sg_list[0];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct qib_rwq {
+ u32 head; /* new work requests posted to the head */
+ u32 tail; /* receives pull requests from here. */
+ struct qib_rwqe wq[0];
+};
+
+struct qib_rq {
+ struct qib_rwq *wq;
+ spinlock_t lock; /* protect changes in this struct */
+ u32 size; /* size of RWQE array */
+ u8 max_sge;
+};
+
+struct qib_srq {
+ struct ib_srq ibsrq;
+ struct qib_rq rq;
+ struct qib_mmap_info *ip;
+ /* send signal when number of RWQEs < limit */
+ u32 limit;
+};
+
+struct qib_sge_state {
+ struct qib_sge *sg_list; /* next SGE to be used if any */
+ struct qib_sge sge; /* progress state for the current SGE */
+ u32 total_len;
+ u8 num_sge;
+};
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct qib_ack_entry {
+ u8 opcode;
+ u8 sent;
+ u32 psn;
+ u32 lpsn;
+ union {
+ struct qib_sge rdma_sge;
+ u64 atomic_data;
+ };
+};
+
+/*
+ * Variables prefixed with s_ are for the requester (sender).
+ * Variables prefixed with r_ are for the responder (receiver).
+ * Variables prefixed with ack_ are for responder replies.
+ *
+ * Common variables are protected by both r_rq.lock and s_lock in that order
+ * which only happens in modify_qp() or changing the QP 'state'.
+ */
+struct qib_qp {
+ struct ib_qp ibqp;
+ struct qib_qp *next; /* link list for QPN hash table */
+ struct qib_qp *timer_next; /* link list for qib_ib_timer() */
+ struct list_head iowait; /* link for wait PIO buf */
+ struct list_head rspwait; /* link for waititing to respond */
+ struct ib_ah_attr remote_ah_attr;
+ struct ib_ah_attr alt_ah_attr;
+ struct qib_ib_header s_hdr; /* next packet header to send */
+ atomic_t refcount;
+ wait_queue_head_t wait;
+ wait_queue_head_t wait_dma;
+ struct timer_list s_timer;
+ struct work_struct s_work;
+ struct qib_mmap_info *ip;
+ struct qib_sge_state *s_cur_sge;
+ struct qib_verbs_txreq *s_tx;
+ struct qib_mregion *s_rdma_mr;
+ struct qib_sge_state s_sge; /* current send request data */
+ struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1];
+ struct qib_sge_state s_ack_rdma_sge;
+ struct qib_sge_state s_rdma_read_sge;
+ struct qib_sge_state r_sge; /* current receive data */
+ spinlock_t r_lock; /* used for APM */
+ spinlock_t s_lock;
+ atomic_t s_dma_busy;
+ unsigned processor_id; /* Processor ID QP is bound to */
+ u32 s_flags;
+ u32 s_cur_size; /* size of send packet in bytes */
+ u32 s_len; /* total length of s_sge */
+ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
+ u32 s_next_psn; /* PSN for next request */
+ u32 s_last_psn; /* last response PSN processed */
+ u32 s_sending_psn; /* lowest PSN that is being sent */
+ u32 s_sending_hpsn; /* highest PSN that is being sent */
+ u32 s_psn; /* current packet sequence number */
+ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
+ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
+ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
+ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
+ u64 r_wr_id; /* ID for current receive WQE */
+ unsigned long r_aflags;
+ u32 r_len; /* total length of r_sge */
+ u32 r_rcv_len; /* receive data len processed */
+ u32 r_psn; /* expected rcv packet sequence number */
+ u32 r_msn; /* message sequence number */
+ u16 s_hdrwords; /* size of s_hdr in 32 bit words */
+ u16 s_rdma_ack_cnt;
+ u8 state; /* QP state */
+ u8 s_state; /* opcode of last packet sent */
+ u8 s_ack_state; /* opcode of packet to ACK */
+ u8 s_nak_state; /* non-zero if NAK is pending */
+ u8 r_state; /* opcode of last packet received */
+ u8 r_nak_state; /* non-zero if NAK is pending */
+ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
+ u8 r_flags;
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
+ u8 qp_access_flags;
+ u8 s_max_sge; /* size of s_wq->sg_list */
+ u8 s_retry_cnt; /* number of times to retry */
+ u8 s_rnr_retry_cnt;
+ u8 s_retry; /* requester retry counter */
+ u8 s_rnr_retry; /* requester RNR retry counter */
+ u8 s_pkey_index; /* PKEY index to use */
+ u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
+ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+ u8 s_srate;
+ u8 s_draining;
+ u8 s_mig_state;
+ u8 timeout; /* Timeout for this QP */
+ u8 alt_timeout; /* Alternate path timeout for this QP */
+ u8 port_num;
+ enum ib_mtu path_mtu;
+ u32 remote_qpn;
+ u32 qkey; /* QKEY for this QP (for UD or RD) */
+ u32 s_size; /* send work queue size */
+ u32 s_head; /* new entries added here */
+ u32 s_tail; /* next entry to process */
+ u32 s_cur; /* current work queue entry */
+ u32 s_acked; /* last un-ACK'ed entry */
+ u32 s_last; /* last completed entry */
+ u32 s_ssn; /* SSN of tail entry */
+ u32 s_lsn; /* limit sequence number (credit) */
+ struct qib_swqe *s_wq; /* send work queue */
+ struct qib_swqe *s_wqe;
+ struct qib_rq r_rq; /* receive work queue */
+ struct qib_sge r_sg_list[0]; /* verified SGEs */
+};
+
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define QIB_R_WRID_VALID 0
+#define QIB_R_REWIND_SGE 1
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define QIB_R_REUSE_SGE 0x01
+#define QIB_R_RDMAR_SEQ 0x02
+#define QIB_R_RSP_NAK 0x04
+#define QIB_R_RSP_SEND 0x08
+#define QIB_R_COMM_EST 0x10
+
+/*
+ * Bit definitions for s_flags.
+ *
+ * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
+ * QIB_S_BUSY - send tasklet is processing the QP
+ * QIB_S_TIMER - the RC retry timer is active
+ * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
+ * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
+ * before processing the next SWQE
+ * QIB_S_WAIT_RNR - waiting for RNR timeout
+ * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ * next send completion entry not via send DMA
+ * QIB_S_WAIT_PIO - waiting for a send buffer to be available
+ * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available
+ * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
+ * QIB_S_WAIT_KMEM - waiting for kernel memory to be available
+ * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
+ * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests
+ * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK
+ */
+#define QIB_S_SIGNAL_REQ_WR 0x0001
+#define QIB_S_BUSY 0x0002
+#define QIB_S_TIMER 0x0004
+#define QIB_S_RESP_PENDING 0x0008
+#define QIB_S_ACK_PENDING 0x0010
+#define QIB_S_WAIT_FENCE 0x0020
+#define QIB_S_WAIT_RDMAR 0x0040
+#define QIB_S_WAIT_RNR 0x0080
+#define QIB_S_WAIT_SSN_CREDIT 0x0100
+#define QIB_S_WAIT_DMA 0x0200
+#define QIB_S_WAIT_PIO 0x0400
+#define QIB_S_WAIT_TX 0x0800
+#define QIB_S_WAIT_DMA_DESC 0x1000
+#define QIB_S_WAIT_KMEM 0x2000
+#define QIB_S_WAIT_PSN 0x4000
+#define QIB_S_WAIT_ACK 0x8000
+#define QIB_S_SEND_ONE 0x10000
+#define QIB_S_UNLIMITED_CREDIT 0x20000
+
+/*
+ * Wait flags that would prevent any packet type from being sent.
+ */
+#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \
+ QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM)
+
+/*
+ * Wait flags that would prevent send work requests from making progress.
+ */
+#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \
+ QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \
+ QIB_S_WAIT_PSN | QIB_S_WAIT_ACK)
+
+#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND)
+
+#define QIB_PSN_CREDIT 16
+
+/*
+ * Since struct qib_swqe is not a fixed size, we can't simply index into
+ * struct qib_qp.s_wq. This function does the array index computation.
+ */
+static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp,
+ unsigned n)
+{
+ return (struct qib_swqe *)((char *)qp->s_wq +
+ (sizeof(struct qib_swqe) +
+ qp->s_max_sge *
+ sizeof(struct qib_sge)) * n);
+}
+
+/*
+ * Since struct qib_rwqe is not a fixed size, we can't simply index into
+ * struct qib_rwq.wq. This function does the array index computation.
+ */
+static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n)
+{
+ return (struct qib_rwqe *)
+ ((char *) rq->wq->wq +
+ (sizeof(struct qib_rwqe) +
+ rq->max_sge * sizeof(struct ib_sge)) * n);
+}
+
+/*
+ * QPN-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way,
+ * large bitmaps are not allocated unless large numbers of QPs are used.
+ */
+struct qpn_map {
+ void *page;
+};
+
+struct qib_qpn_table {
+ spinlock_t lock; /* protect changes in this struct */
+ unsigned flags; /* flags for QP0/1 allocated for each port */
+ u32 last; /* last QP number allocated */
+ u32 nmaps; /* size of the map table */
+ u16 limit;
+ u16 mask;
+ /* bit map of free QP numbers other than 0/1 */
+ struct qpn_map map[QPNMAP_ENTRIES];
+};
+
+struct qib_lkey_table {
+ spinlock_t lock; /* protect changes in this struct */
+ u32 next; /* next unused index (speeds search) */
+ u32 gen; /* generation count */
+ u32 max; /* size of the table */
+ struct qib_mregion **table;
+};
+
+struct qib_opcode_stats {
+ u64 n_packets; /* number of packets */
+ u64 n_bytes; /* total number of bytes */
+};
+
+struct qib_ibport {
+ struct qib_qp *qp0;
+ struct qib_qp *qp1;
+ struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
+ struct qib_ah *sm_ah;
+ struct qib_ah *smi_ah;
+ struct rb_root mcast_tree;
+ spinlock_t lock; /* protect changes in this struct */
+
+ /* non-zero when timer is set */
+ unsigned long mkey_lease_timeout;
+ unsigned long trap_timeout;
+ __be64 gid_prefix; /* in network order */
+ __be64 mkey;
+ __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */
+ u64 tid; /* TID for traps */
+ u64 n_unicast_xmit; /* total unicast packets sent */
+ u64 n_unicast_rcv; /* total unicast packets received */
+ u64 n_multicast_xmit; /* total multicast packets sent */
+ u64 n_multicast_rcv; /* total multicast packets received */
+ u64 z_symbol_error_counter; /* starting count for PMA */
+ u64 z_link_error_recovery_counter; /* starting count for PMA */
+ u64 z_link_downed_counter; /* starting count for PMA */
+ u64 z_port_rcv_errors; /* starting count for PMA */
+ u64 z_port_rcv_remphys_errors; /* starting count for PMA */
+ u64 z_port_xmit_discards; /* starting count for PMA */
+ u64 z_port_xmit_data; /* starting count for PMA */
+ u64 z_port_rcv_data; /* starting count for PMA */
+ u64 z_port_xmit_packets; /* starting count for PMA */
+ u64 z_port_rcv_packets; /* starting count for PMA */
+ u32 z_local_link_integrity_errors; /* starting count for PMA */
+ u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
+ u32 z_vl15_dropped; /* starting count for PMA */
+ u32 n_rc_resends;
+ u32 n_rc_acks;
+ u32 n_rc_qacks;
+ u32 n_rc_delayed_comp;
+ u32 n_seq_naks;
+ u32 n_rdma_seq;
+ u32 n_rnr_naks;
+ u32 n_other_naks;
+ u32 n_loop_pkts;
+ u32 n_pkt_drops;
+ u32 n_vl15_dropped;
+ u32 n_rc_timeouts;
+ u32 n_dmawait;
+ u32 n_unaligned;
+ u32 n_rc_dupreq;
+ u32 n_rc_seqnak;
+ u32 port_cap_flags;
+ u32 pma_sample_start;
+ u32 pma_sample_interval;
+ __be16 pma_counter_select[5];
+ u16 pma_tag;
+ u16 pkey_violations;
+ u16 qkey_violations;
+ u16 mkey_violations;
+ u16 mkey_lease_period;
+ u16 sm_lid;
+ u16 repress_traps;
+ u8 sm_sl;
+ u8 mkeyprot;
+ u8 subnet_timeout;
+ u8 vl_high_limit;
+ u8 sl_to_vl[16];
+
+ struct qib_opcode_stats opstats[128];
+};
+
+struct qib_ibdev {
+ struct ib_device ibdev;
+ struct list_head pending_mmaps;
+ spinlock_t mmap_offset_lock; /* protect mmap_offset */
+ u32 mmap_offset;
+ struct qib_mregion *dma_mr;
+
+ /* QP numbers are shared by all IB ports */
+ struct qib_qpn_table qpn_table;
+ struct qib_lkey_table lk_table;
+ struct list_head piowait; /* list for wait PIO buf */
+ struct list_head dmawait; /* list for wait DMA */
+ struct list_head txwait; /* list for wait qib_verbs_txreq */
+ struct list_head memwait; /* list for wait kernel memory */
+ struct list_head txreq_free;
+ struct timer_list mem_timer;
+ struct qib_qp **qp_table;
+ struct qib_pio_header *pio_hdrs;
+ dma_addr_t pio_hdrs_phys;
+ /* list of QPs waiting for RNR timer */
+ spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
+ unsigned qp_table_size; /* size of the hash table */
+ spinlock_t qpt_lock;
+
+ u32 n_piowait;
+ u32 n_txwait;
+
+ u32 n_pds_allocated; /* number of PDs allocated for device */
+ spinlock_t n_pds_lock;
+ u32 n_ahs_allocated; /* number of AHs allocated for device */
+ spinlock_t n_ahs_lock;
+ u32 n_cqs_allocated; /* number of CQs allocated for device */
+ spinlock_t n_cqs_lock;
+ u32 n_qps_allocated; /* number of QPs allocated for device */
+ spinlock_t n_qps_lock;
+ u32 n_srqs_allocated; /* number of SRQs allocated for device */
+ spinlock_t n_srqs_lock;
+ u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+ spinlock_t n_mcast_grps_lock;
+};
+
+struct qib_verbs_counters {
+ u64 symbol_error_counter;
+ u64 link_error_recovery_counter;
+ u64 link_downed_counter;
+ u64 port_rcv_errors;
+ u64 port_rcv_remphys_errors;
+ u64 port_xmit_discards;
+ u64 port_xmit_data;
+ u64 port_rcv_data;
+ u64 port_xmit_packets;
+ u64 port_rcv_packets;
+ u32 local_link_integrity_errors;
+ u32 excessive_buffer_overrun_errors;
+ u32 vl15_dropped;
+};
+
+static inline struct qib_mr *to_imr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct qib_mr, ibmr);
+}
+
+static inline struct qib_pd *to_ipd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct qib_pd, ibpd);
+}
+
+static inline struct qib_ah *to_iah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct qib_ah, ibah);
+}
+
+static inline struct qib_cq *to_icq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct qib_cq, ibcq);
+}
+
+static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct qib_srq, ibsrq);
+}
+
+static inline struct qib_qp *to_iqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct qib_qp, ibqp);
+}
+
+static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct qib_ibdev, ibdev);
+}
+
+/*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int qib_send_ok(struct qib_qp *qp)
+{
+ return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) &&
+ (qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) ||
+ !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
+}
+
+extern struct workqueue_struct *qib_wq;
+
+/*
+ * This must be called with s_lock held.
+ */
+static inline void qib_schedule_send(struct qib_qp *qp)
+{
+ if (qib_send_ok(qp)) {
+ if (qp->processor_id == smp_processor_id())
+ queue_work(qib_wq, &qp->s_work);
+ else
+ queue_work_on(qp->processor_id,
+ qib_wq, &qp->s_work);
+ }
+}
+
+static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
+{
+ u16 p1 = pkey1 & 0x7FFF;
+ u16 p2 = pkey2 & 0x7FFF;
+
+ /*
+ * Low 15 bits must be non-zero and match, and
+ * one of the two must be a full member.
+ */
+ return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0);
+}
+
+void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
+ u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
+void qib_cap_mask_chg(struct qib_ibport *ibp);
+void qib_sys_guid_chg(struct qib_ibport *ibp);
+void qib_node_desc_chg(struct qib_ibport *ibp);
+int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad);
+int qib_create_agents(struct qib_ibdev *dev);
+void qib_free_agents(struct qib_ibdev *dev);
+
+/*
+ * Compare the lower 24 bits of the two values.
+ * Returns an integer <, ==, or > than zero.
+ */
+static inline int qib_cmp24(u32 a, u32 b)
+{
+ return (((int) a) - ((int) b)) << 8;
+}
+
+struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid);
+
+int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait);
+
+int qib_get_counters(struct qib_pportdata *ppd,
+ struct qib_verbs_counters *cntrs);
+
+int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int qib_mcast_tree_empty(struct qib_ibport *ibp);
+
+__be32 qib_compute_aeth(struct qib_qp *qp);
+
+struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn);
+
+struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int qib_destroy_qp(struct ib_qp *ibqp);
+
+int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err);
+
+int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr);
+
+unsigned qib_free_all_qps(struct qib_devdata *dd);
+
+void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
+
+void qib_free_qpn_table(struct qib_qpn_table *qpt);
+
+void qib_get_credit(struct qib_qp *qp, u32 aeth);
+
+unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
+
+void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
+
+void qib_put_txreq(struct qib_verbs_txreq *tx);
+
+int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len);
+
+void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length,
+ int release);
+
+void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release);
+
+void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+
+void qib_rc_rnr_retry(unsigned long arg);
+
+void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
+
+void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err);
+
+int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
+
+void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr);
+
+int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr);
+
+int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
+ struct qib_sge *isge, struct ib_sge *sge, int acc);
+
+int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+ u32 len, u64 vaddr, u32 rkey, int acc);
+
+int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+
+int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata);
+
+int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+
+int qib_destroy_srq(struct ib_srq *ibsrq);
+
+void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
+
+int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+
+struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
+ int comp_vector, struct ib_ucontext *context,
+ struct ib_udata *udata);
+
+int qib_destroy_cq(struct ib_cq *ibcq);
+
+int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+
+int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+
+struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
+
+struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start);
+
+struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+
+int qib_dereg_mr(struct ib_mr *ibmr);
+
+struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+
+struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
+ struct ib_device *ibdev, int page_list_len);
+
+void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
+
+int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr);
+
+struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+
+int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova);
+
+int qib_unmap_fmr(struct list_head *fmr_list);
+
+int qib_dealloc_fmr(struct ib_fmr *ibfmr);
+
+void qib_release_mmap_info(struct kref *ref);
+
+struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
+ struct ib_ucontext *context,
+ void *obj);
+
+void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
+ u32 size, void *obj);
+
+int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+int qib_get_rwqe(struct qib_qp *qp, int wr_id_only);
+
+void qib_migrate_qp(struct qib_qp *qp);
+
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, struct qib_qp *qp, u32 bth0);
+
+u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords);
+
+void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+ u32 bth0, u32 bth2);
+
+void qib_do_send(struct work_struct *work);
+
+void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+ enum ib_wc_status status);
+
+void qib_send_rc_ack(struct qib_qp *qp);
+
+int qib_make_rc_req(struct qib_qp *qp);
+
+int qib_make_uc_req(struct qib_qp *qp);
+
+int qib_make_ud_req(struct qib_qp *qp);
+
+int qib_register_ib_device(struct qib_devdata *);
+
+void qib_unregister_ib_device(struct qib_devdata *);
+
+void qib_ib_rcv(struct qib_ctxtdata *, void *, void *, u32);
+
+void qib_ib_piobufavail(struct qib_devdata *);
+
+unsigned qib_get_npkeys(struct qib_devdata *);
+
+unsigned qib_get_pkey(struct qib_ibport *, unsigned);
+
+extern const enum ib_wc_opcode ib_qib_wc_opcode[];
+
+/*
+ * Below HCA-independent IB PhysPortState values, returned
+ * by the f_ibphys_portstate() routine.
+ */
+#define IB_PHYSPORTSTATE_SLEEP 1
+#define IB_PHYSPORTSTATE_POLL 2
+#define IB_PHYSPORTSTATE_DISABLED 3
+#define IB_PHYSPORTSTATE_CFG_TRAIN 4
+#define IB_PHYSPORTSTATE_LINKUP 5
+#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
+#define IB_PHYSPORTSTATE_CFG_DEBOUNCE 8
+#define IB_PHYSPORTSTATE_CFG_IDLE 0xB
+#define IB_PHYSPORTSTATE_RECOVERY_RETRAIN 0xC
+#define IB_PHYSPORTSTATE_RECOVERY_WAITRMT 0xE
+#define IB_PHYSPORTSTATE_RECOVERY_IDLE 0xF
+#define IB_PHYSPORTSTATE_CFG_ENH 0x10
+#define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
+
+extern const int ib_qib_state_ops[];
+
+extern __be64 ib_qib_sys_image_guid; /* in network order */
+
+extern unsigned int ib_qib_lkey_table_size;
+
+extern unsigned int ib_qib_max_cqes;
+
+extern unsigned int ib_qib_max_cqs;
+
+extern unsigned int ib_qib_max_qp_wrs;
+
+extern unsigned int ib_qib_max_qps;
+
+extern unsigned int ib_qib_max_sges;
+
+extern unsigned int ib_qib_max_mcast_grps;
+
+extern unsigned int ib_qib_max_mcast_qp_attached;
+
+extern unsigned int ib_qib_max_srqs;
+
+extern unsigned int ib_qib_max_srq_sges;
+
+extern unsigned int ib_qib_max_srq_wrs;
+
+extern const u32 ib_qib_rnr_table[];
+
+extern struct ib_dma_mapping_ops qib_dma_mapping_ops;
+
+#endif /* QIB_VERBS_H */
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 47/52] IB/qib: Add qib_verbs_mcast.c
From: Ralph Campbell @ 2010-05-07 0:02 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_verbs_mcast.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_verbs_mcast.c | 368 +++++++++++++++++++++++++++
1 files changed, 368 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_verbs_mcast.c
diff --git a/drivers/infiniband/hw/qib/qib_verbs_mcast.c b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
new file mode 100644
index 0000000..dabb697
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs_mcast.c
@@ -0,0 +1,368 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/rculist.h>
+
+#include "qib.h"
+
+/**
+ * qib_mcast_qp_alloc - alloc a struct to link a QP to mcast GID struct
+ * @qp: the QP to link
+ */
+static struct qib_mcast_qp *qib_mcast_qp_alloc(struct qib_qp *qp)
+{
+ struct qib_mcast_qp *mqp;
+
+ mqp = kmalloc(sizeof *mqp, GFP_KERNEL);
+ if (!mqp)
+ goto bail;
+
+ mqp->qp = qp;
+ atomic_inc(&qp->refcount);
+
+bail:
+ return mqp;
+}
+
+static void qib_mcast_qp_free(struct qib_mcast_qp *mqp)
+{
+ struct qib_qp *qp = mqp->qp;
+
+ /* Notify qib_destroy_qp() if it is waiting. */
+ if (atomic_dec_and_test(&qp->refcount))
+ wake_up(&qp->wait);
+
+ kfree(mqp);
+}
+
+/**
+ * qib_mcast_alloc - allocate the multicast GID structure
+ * @mgid: the multicast GID
+ *
+ * A list of QPs will be attached to this structure.
+ */
+static struct qib_mcast *qib_mcast_alloc(union ib_gid *mgid)
+{
+ struct qib_mcast *mcast;
+
+ mcast = kmalloc(sizeof *mcast, GFP_KERNEL);
+ if (!mcast)
+ goto bail;
+
+ mcast->mgid = *mgid;
+ INIT_LIST_HEAD(&mcast->qp_list);
+ init_waitqueue_head(&mcast->wait);
+ atomic_set(&mcast->refcount, 0);
+ mcast->n_attached = 0;
+
+bail:
+ return mcast;
+}
+
+static void qib_mcast_free(struct qib_mcast *mcast)
+{
+ struct qib_mcast_qp *p, *tmp;
+
+ list_for_each_entry_safe(p, tmp, &mcast->qp_list, list)
+ qib_mcast_qp_free(p);
+
+ kfree(mcast);
+}
+
+/**
+ * qib_mcast_find - search the global table for the given multicast GID
+ * @ibp: the IB port structure
+ * @mgid: the multicast GID to search for
+ *
+ * Returns NULL if not found.
+ *
+ * The caller is responsible for decrementing the reference count if found.
+ */
+struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid)
+{
+ struct rb_node *n;
+ unsigned long flags;
+ struct qib_mcast *mcast;
+
+ spin_lock_irqsave(&ibp->lock, flags);
+ n = ibp->mcast_tree.rb_node;
+ while (n) {
+ int ret;
+
+ mcast = rb_entry(n, struct qib_mcast, rb_node);
+
+ ret = memcmp(mgid->raw, mcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else {
+ atomic_inc(&mcast->refcount);
+ spin_unlock_irqrestore(&ibp->lock, flags);
+ goto bail;
+ }
+ }
+ spin_unlock_irqrestore(&ibp->lock, flags);
+
+ mcast = NULL;
+
+bail:
+ return mcast;
+}
+
+/**
+ * qib_mcast_add - insert mcast GID into table and attach QP struct
+ * @mcast: the mcast GID table
+ * @mqp: the QP to attach
+ *
+ * Return zero if both were added. Return EEXIST if the GID was already in
+ * the table but the QP was added. Return ESRCH if the QP was already
+ * attached and neither structure was added.
+ */
+static int qib_mcast_add(struct qib_ibdev *dev, struct qib_ibport *ibp,
+ struct qib_mcast *mcast, struct qib_mcast_qp *mqp)
+{
+ struct rb_node **n = &ibp->mcast_tree.rb_node;
+ struct rb_node *pn = NULL;
+ int ret;
+
+ spin_lock_irq(&ibp->lock);
+
+ while (*n) {
+ struct qib_mcast *tmcast;
+ struct qib_mcast_qp *p;
+
+ pn = *n;
+ tmcast = rb_entry(pn, struct qib_mcast, rb_node);
+
+ ret = memcmp(mcast->mgid.raw, tmcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0) {
+ n = &pn->rb_left;
+ continue;
+ }
+ if (ret > 0) {
+ n = &pn->rb_right;
+ continue;
+ }
+
+ /* Search the QP list to see if this is already there. */
+ list_for_each_entry_rcu(p, &tmcast->qp_list, list) {
+ if (p->qp == mqp->qp) {
+ ret = ESRCH;
+ goto bail;
+ }
+ }
+ if (tmcast->n_attached == ib_qib_max_mcast_qp_attached) {
+ ret = ENOMEM;
+ goto bail;
+ }
+
+ tmcast->n_attached++;
+
+ list_add_tail_rcu(&mqp->list, &tmcast->qp_list);
+ ret = EEXIST;
+ goto bail;
+ }
+
+ spin_lock(&dev->n_mcast_grps_lock);
+ if (dev->n_mcast_grps_allocated == ib_qib_max_mcast_grps) {
+ spin_unlock(&dev->n_mcast_grps_lock);
+ ret = ENOMEM;
+ goto bail;
+ }
+
+ dev->n_mcast_grps_allocated++;
+ spin_unlock(&dev->n_mcast_grps_lock);
+
+ mcast->n_attached++;
+
+ list_add_tail_rcu(&mqp->list, &mcast->qp_list);
+
+ atomic_inc(&mcast->refcount);
+ rb_link_node(&mcast->rb_node, pn, n);
+ rb_insert_color(&mcast->rb_node, &ibp->mcast_tree);
+
+ ret = 0;
+
+bail:
+ spin_unlock_irq(&ibp->lock);
+
+ return ret;
+}
+
+int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ struct qib_ibdev *dev = to_idev(ibqp->device);
+ struct qib_ibport *ibp;
+ struct qib_mcast *mcast;
+ struct qib_mcast_qp *mqp;
+ int ret;
+
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ /*
+ * Allocate data structures since its better to do this outside of
+ * spin locks and it will most likely be needed.
+ */
+ mcast = qib_mcast_alloc(gid);
+ if (mcast == NULL) {
+ ret = -ENOMEM;
+ goto bail;
+ }
+ mqp = qib_mcast_qp_alloc(qp);
+ if (mqp == NULL) {
+ qib_mcast_free(mcast);
+ ret = -ENOMEM;
+ goto bail;
+ }
+ ibp = to_iport(ibqp->device, qp->port_num);
+ switch (qib_mcast_add(dev, ibp, mcast, mqp)) {
+ case ESRCH:
+ /* Neither was used: OK to attach the same QP twice. */
+ qib_mcast_qp_free(mqp);
+ qib_mcast_free(mcast);
+ break;
+
+ case EEXIST: /* The mcast wasn't used */
+ qib_mcast_free(mcast);
+ break;
+
+ case ENOMEM:
+ /* Exceeded the maximum number of mcast groups. */
+ qib_mcast_qp_free(mqp);
+ qib_mcast_free(mcast);
+ ret = -ENOMEM;
+ goto bail;
+
+ default:
+ break;
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
+{
+ struct qib_qp *qp = to_iqp(ibqp);
+ struct qib_ibdev *dev = to_idev(ibqp->device);
+ struct qib_ibport *ibp = to_iport(ibqp->device, qp->port_num);
+ struct qib_mcast *mcast = NULL;
+ struct qib_mcast_qp *p, *tmp;
+ struct rb_node *n;
+ int last = 0;
+ int ret;
+
+ if (ibqp->qp_num <= 1 || qp->state == IB_QPS_RESET) {
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ spin_lock_irq(&ibp->lock);
+
+ /* Find the GID in the mcast table. */
+ n = ibp->mcast_tree.rb_node;
+ while (1) {
+ if (n == NULL) {
+ spin_unlock_irq(&ibp->lock);
+ ret = -EINVAL;
+ goto bail;
+ }
+
+ mcast = rb_entry(n, struct qib_mcast, rb_node);
+ ret = memcmp(gid->raw, mcast->mgid.raw,
+ sizeof(union ib_gid));
+ if (ret < 0)
+ n = n->rb_left;
+ else if (ret > 0)
+ n = n->rb_right;
+ else
+ break;
+ }
+
+ /* Search the QP list. */
+ list_for_each_entry_safe(p, tmp, &mcast->qp_list, list) {
+ if (p->qp != qp)
+ continue;
+ /*
+ * We found it, so remove it, but don't poison the forward
+ * link until we are sure there are no list walkers.
+ */
+ list_del_rcu(&p->list);
+ mcast->n_attached--;
+
+ /* If this was the last attached QP, remove the GID too. */
+ if (list_empty(&mcast->qp_list)) {
+ rb_erase(&mcast->rb_node, &ibp->mcast_tree);
+ last = 1;
+ }
+ break;
+ }
+
+ spin_unlock_irq(&ibp->lock);
+
+ if (p) {
+ /*
+ * Wait for any list walkers to finish before freeing the
+ * list element.
+ */
+ wait_event(mcast->wait, atomic_read(&mcast->refcount) <= 1);
+ qib_mcast_qp_free(p);
+ }
+ if (last) {
+ atomic_dec(&mcast->refcount);
+ wait_event(mcast->wait, !atomic_read(&mcast->refcount));
+ qib_mcast_free(mcast);
+ spin_lock_irq(&dev->n_mcast_grps_lock);
+ dev->n_mcast_grps_allocated--;
+ spin_unlock_irq(&dev->n_mcast_grps_lock);
+ }
+
+ ret = 0;
+
+bail:
+ return ret;
+}
+
+int qib_mcast_tree_empty(struct qib_ibport *ibp)
+{
+ return ibp->mcast_tree.rb_node == NULL;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 48/52] IB/qib: Add qib_wc_ppc64.c
From: Ralph Campbell @ 2010-05-07 0:03 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_wc_ppc64.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_wc_ppc64.c | 62 ++++++++++++++++++++++++++++++
1 files changed, 62 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_wc_ppc64.c
diff --git a/drivers/infiniband/hw/qib/qib_wc_ppc64.c b/drivers/infiniband/hw/qib/qib_wc_ppc64.c
new file mode 100644
index 0000000..673cf4c
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_wc_ppc64.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2006, 2007, 2008 QLogic Corporation. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on PowerPC only. Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include "qib.h"
+
+/**
+ * qib_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ *
+ * Nothing to do on PowerPC, so just return without error.
+ */
+int qib_enable_wc(struct qib_devdata *dd)
+{
+ return 0;
+}
+
+/**
+ * qib_unordered_wc - indicate whether write combining is unordered
+ *
+ * Because our performance depends on our ability to do write
+ * combining mmio writes in the most efficient way, we need to
+ * know if we are on a processor that may reorder stores when
+ * write combining.
+ */
+int qib_unordered_wc(void)
+{
+ return 1;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 49/52] IB/qib: Add qib_wc_x86_64.c
From: Ralph Campbell @ 2010-05-07 0:03 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_wc_x86_64.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_wc_x86_64.c | 171 +++++++++++++++++++++++++++++
1 files changed, 171 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_wc_x86_64.c
diff --git a/drivers/infiniband/hw/qib/qib_wc_x86_64.c b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
new file mode 100644
index 0000000..561b8bc
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_wc_x86_64.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/*
+ * This file is conditionally built on x86_64 only. Otherwise weak symbol
+ * versions of the functions exported from here are used.
+ */
+
+#include <linux/pci.h>
+#include <asm/mtrr.h>
+#include <asm/processor.h>
+
+#include "qib.h"
+
+/**
+ * qib_enable_wc - enable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ *
+ * This routine is x86_64-specific; it twiddles the CPU's MTRRs to enable
+ * write combining.
+ */
+int qib_enable_wc(struct qib_devdata *dd)
+{
+ int ret = 0;
+ u64 pioaddr, piolen;
+ unsigned bits;
+ const unsigned long addr = pci_resource_start(dd->pcidev, 0);
+ const size_t len = pci_resource_len(dd->pcidev, 0);
+
+ /*
+ * Set the PIO buffers to be WCCOMB, so we get HT bursts to the
+ * chip. Linux (possibly the hardware) requires it to be on a power
+ * of 2 address matching the length (which has to be a power of 2).
+ * For rev1, that means the base address, for rev2, it will be just
+ * the PIO buffers themselves.
+ * For chips with two sets of buffers, the calculations are
+ * somewhat more complicated; we need to sum, and the piobufbase
+ * register has both offsets, 2K in low 32 bits, 4K in high 32 bits.
+ * The buffers are still packed, so a single range covers both.
+ */
+ if (dd->piobcnt2k && dd->piobcnt4k) {
+ /* 2 sizes for chip */
+ unsigned long pio2kbase, pio4kbase;
+ pio2kbase = dd->piobufbase & 0xffffffffUL;
+ pio4kbase = (dd->piobufbase >> 32) & 0xffffffffUL;
+ if (pio2kbase < pio4kbase) {
+ /* all current chips */
+ pioaddr = addr + pio2kbase;
+ piolen = pio4kbase - pio2kbase +
+ dd->piobcnt4k * dd->align4k;
+ } else {
+ pioaddr = addr + pio4kbase;
+ piolen = pio2kbase - pio4kbase +
+ dd->piobcnt2k * dd->palign;
+ }
+ } else { /* single buffer size (2K, currently) */
+ pioaddr = addr + dd->piobufbase;
+ piolen = dd->piobcnt2k * dd->palign +
+ dd->piobcnt4k * dd->align4k;
+ }
+
+ for (bits = 0; !(piolen & (1ULL << bits)); bits++)
+ /* do nothing */ ;
+
+ if (piolen != (1ULL << bits)) {
+ piolen >>= bits;
+ while (piolen >>= 1)
+ bits++;
+ piolen = 1ULL << (bits + 1);
+ }
+ if (pioaddr & (piolen - 1)) {
+ u64 atmp;
+ atmp = pioaddr & ~(piolen - 1);
+ if (atmp < addr || (atmp + piolen) > (addr + len)) {
+ qib_dev_err(dd, "No way to align address/size "
+ "(%llx/%llx), no WC mtrr\n",
+ (unsigned long long) atmp,
+ (unsigned long long) piolen << 1);
+ ret = -ENODEV;
+ } else {
+ pioaddr = atmp;
+ piolen <<= 1;
+ }
+ }
+
+ if (!ret) {
+ int cookie;
+
+ cookie = mtrr_add(pioaddr, piolen, MTRR_TYPE_WRCOMB, 0);
+ if (cookie < 0) {
+ {
+ qib_devinfo(dd->pcidev,
+ "mtrr_add() WC for PIO bufs "
+ "failed (%d)\n",
+ cookie);
+ ret = -EINVAL;
+ }
+ } else {
+ dd->wc_cookie = cookie;
+ dd->wc_base = (unsigned long) pioaddr;
+ dd->wc_len = (unsigned long) piolen;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * qib_disable_wc - disable write combining for MMIO writes to the device
+ * @dd: qlogic_ib device
+ */
+void qib_disable_wc(struct qib_devdata *dd)
+{
+ if (dd->wc_cookie) {
+ int r;
+
+ r = mtrr_del(dd->wc_cookie, dd->wc_base,
+ dd->wc_len);
+ if (r < 0)
+ qib_devinfo(dd->pcidev,
+ "mtrr_del(%lx, %lx, %lx) failed: %d\n",
+ dd->wc_cookie, dd->wc_base,
+ dd->wc_len, r);
+ dd->wc_cookie = 0; /* even on failure */
+ }
+}
+
+/**
+ * qib_unordered_wc - indicate whether write combining is ordered
+ *
+ * Because our performance depends on our ability to do write combining mmio
+ * writes in the most efficient way, we need to know if we are on an Intel
+ * or AMD x86_64 processor. AMD x86_64 processors flush WC buffers out in
+ * the order completed, and so no special flushing is required to get
+ * correct ordering. Intel processors, however, will flush write buffers
+ * out in "random" orders, and so explicit ordering is needed at times.
+ */
+int qib_unordered_wc(void)
+{
+ return boot_cpu_data.x86_vendor != X86_VENDOR_AMD;
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 50/52] IB/qib: Hooks for adding the QIB driver into the framework
From: Ralph Campbell @ 2010-05-07 0:03 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/Kconfig | 1 +
drivers/infiniband/Makefile | 1 +
2 files changed, 2 insertions(+), 0 deletions(-)
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 975adce..3a04822 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -43,6 +43,7 @@ config INFINIBAND_ADDR_TRANS
source "drivers/infiniband/hw/mthca/Kconfig"
source "drivers/infiniband/hw/ipath/Kconfig"
+source "drivers/infiniband/hw/qib/Kconfig"
source "drivers/infiniband/hw/ehca/Kconfig"
source "drivers/infiniband/hw/amso1100/Kconfig"
source "drivers/infiniband/hw/cxgb3/Kconfig"
diff --git a/drivers/infiniband/Makefile b/drivers/infiniband/Makefile
index ed35e44..27b66b1 100644
--- a/drivers/infiniband/Makefile
+++ b/drivers/infiniband/Makefile
@@ -1,6 +1,7 @@
obj-$(CONFIG_INFINIBAND) += core/
obj-$(CONFIG_INFINIBAND_MTHCA) += hw/mthca/
obj-$(CONFIG_INFINIBAND_IPATH) += hw/ipath/
+obj-$(CONFIG_INFINIBAND_QIB) += hw/qib/
obj-$(CONFIG_INFINIBAND_EHCA) += hw/ehca/
obj-$(CONFIG_INFINIBAND_AMSO1100) += hw/amso1100/
obj-$(CONFIG_INFINIBAND_CXGB3) += hw/cxgb3/
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v3 52/52] IB/core: allow HCAs to create IB port sysfs files
From: Ralph Campbell @ 2010-05-07 0:03 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100506235849.3441.85930.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
This patch adds a new parameter to ib_register_device() so that
HCAs can create files in /sys/class/infiniband/<hca>/ports/<N>/.
There is no need for an unregister function since the kobject
reference will go to zero when ib_unregister_device() is called.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/core/core_priv.h | 2 +-
drivers/infiniband/core/device.c | 4 ++--
drivers/infiniband/core/sysfs.c | 24 +++++++++++++++++++++++-
drivers/infiniband/hw/amso1100/c2_provider.c | 2 +-
drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +-
drivers/infiniband/hw/ehca/ehca_main.c | 2 +-
drivers/infiniband/hw/ipath/ipath_verbs.c | 2 +-
drivers/infiniband/hw/mlx4/main.c | 2 +-
drivers/infiniband/hw/mthca/mthca_provider.c | 2 +-
drivers/infiniband/hw/nes/nes_verbs.c | 2 +-
include/rdma/ib_verbs.h | 5 ++++-
11 files changed, 37 insertions(+), 12 deletions(-)
diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h
index 05ac36e..e70c809 100644
--- a/drivers/infiniband/core/core_priv.h
+++ b/drivers/infiniband/core/core_priv.h
@@ -38,7 +38,7 @@
#include <rdma/ib_verbs.h>
-int ib_device_register_sysfs(struct ib_device *device);
+int ib_device_register_sysfs(struct ib_device *device, sysfs_cb cb);
void ib_device_unregister_sysfs(struct ib_device *device);
int ib_sysfs_setup(void);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index d1fba41..9ef8093 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -267,7 +267,7 @@ out:
* callback for each device that is added. @device must be allocated
* with ib_alloc_device().
*/
-int ib_register_device(struct ib_device *device)
+int ib_register_device(struct ib_device *device, sysfs_cb cb)
{
int ret;
@@ -296,7 +296,7 @@ int ib_register_device(struct ib_device *device)
goto out;
}
- ret = ib_device_register_sysfs(device);
+ ret = ib_device_register_sysfs(device, cb);
if (ret) {
printk(KERN_WARNING "Couldn't register device %s with driver model\n",
device->name);
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index f901957..a47bac8 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -754,7 +754,23 @@ static struct attribute_group iw_stats_group = {
.attrs = iw_proto_stats_attrs,
};
-int ib_device_register_sysfs(struct ib_device *device)
+static int sysfs_create_port_files(struct ib_device *device, sysfs_cb cb)
+{
+ struct kobject *p;
+ struct ib_port *port;
+ int ret = 0;
+
+ list_for_each_entry(p, &device->port_list, entry) {
+ port = container_of(p, struct ib_port, kobj);
+ ret = cb(device, port->port_num, &port->kobj);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+int ib_device_register_sysfs(struct ib_device *device, sysfs_cb cb)
{
struct device *class_dev = &device->dev;
int ret;
@@ -802,6 +818,12 @@ int ib_device_register_sysfs(struct ib_device *device)
goto err_put;
}
+ if (cb) {
+ ret = sysfs_create_port_files(device, cb);
+ if (ret)
+ goto err_put;
+ }
+
return 0;
err_put:
diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index ad723bd..d128386 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c
@@ -864,7 +864,7 @@ int c2_register_device(struct c2_dev *dev)
dev->ibdev.iwcm->create_listen = c2_service_create;
dev->ibdev.iwcm->destroy_listen = c2_service_destroy;
- ret = ib_register_device(&dev->ibdev);
+ ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto out_free_iwcm;
diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
index 47b35c6..26f22ec 100644
--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
+++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
@@ -1427,7 +1427,7 @@ int iwch_register_device(struct iwch_dev *dev)
dev->ibdev.iwcm->rem_ref = iwch_qp_rem_ref;
dev->ibdev.iwcm->get_qp = iwch_get_qp;
- ret = ib_register_device(&dev->ibdev);
+ ret = ib_register_device(&dev->ibdev, NULL);
if (ret)
goto bail1;
diff --git a/drivers/infiniband/hw/ehca/ehca_main.c b/drivers/infiniband/hw/ehca/ehca_main.c
index 129a6be..d1a9278 100644
--- a/drivers/infiniband/hw/ehca/ehca_main.c
+++ b/drivers/infiniband/hw/ehca/ehca_main.c
@@ -798,7 +798,7 @@ static int __devinit ehca_probe(struct of_device *dev,
goto probe5;
}
- ret = ib_register_device(&shca->ib_device);
+ ret = ib_register_device(&shca->ib_device, NULL);
if (ret) {
ehca_err(&shca->ib_device,
"ib_register_device() failed ret=%i", ret);
diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
index 9289ab4..055e7fd 100644
--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
+++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
@@ -2181,7 +2181,7 @@ int ipath_register_ib_device(struct ipath_devdata *dd)
snprintf(dev->node_desc, sizeof(dev->node_desc),
IPATH_IDSTR " %s", init_utsname()->nodename);
- ret = ib_register_device(dev);
+ ret = ib_register_device(dev, NULL);
if (ret)
goto err_reg;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index e596537..76dcd81 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -660,7 +660,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
- if (ib_register_device(&ibdev->ib_dev))
+ if (ib_register_device(&ibdev->ib_dev, NULL))
goto err_map;
if (mlx4_ib_mad_init(ibdev))
diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c
index bcf7a40..e53a773 100644
--- a/drivers/infiniband/hw/mthca/mthca_provider.c
+++ b/drivers/infiniband/hw/mthca/mthca_provider.c
@@ -1402,7 +1402,7 @@ int mthca_register_device(struct mthca_dev *dev)
mutex_init(&dev->cap_mask_mutex);
- ret = ib_register_device(&dev->ib_dev);
+ ret = ib_register_device(&dev->ib_dev, NULL);
if (ret)
return ret;
diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index 36348bf..a45c749 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c
@@ -3961,7 +3961,7 @@ int nes_register_ofa_device(struct nes_ib_device *nesibdev)
struct nes_adapter *nesadapter = nesdev->nesadapter;
int i, ret;
- ret = ib_register_device(&nesvnic->nesibdev->ibdev);
+ ret = ib_register_device(&nesvnic->nesibdev->ibdev, NULL);
if (ret) {
return ret;
}
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 6ecd85c..c36a0bd 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -1189,7 +1189,10 @@ struct ib_client {
struct ib_device *ib_alloc_device(size_t size);
void ib_dealloc_device(struct ib_device *device);
-int ib_register_device (struct ib_device *device);
+typedef int (*sysfs_cb)(struct ib_device *dev, u8 port_num,
+ struct kobject *kobj);
+
+int ib_register_device(struct ib_device *device, sysfs_cb cb);
void ib_unregister_device(struct ib_device *device);
int ib_register_client (struct ib_client *client);
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* Re: [RFC] libibverbs: ibv_fork_init() and libhugetlbfs
From: Alexander Schmidt @ 2010-05-07 10:19 UTC (permalink / raw)
To: Roland Dreier
Cc: of-ewg, Linux RDMA, Hoang-Nam Nguyen, Stefan Roscher,
Joachim Fenkes, Christoph Raisch, Alex Vainman
In-Reply-To: <adavdb092d8.fsf-BjVyx320WGW9gfZ95n9DRSW4+XlvGpQz@public.gmane.org>
On Thu, 06 May 2010 13:55:31 -0700
Roland Dreier <rdreier-FYB4Gu1CFyUAvxtiuMwx3w@public.gmane.org> wrote:
> I think that we cannot assume huge pages only come from libhugetlbfs --
> we should support an application directly enabling huge pages (possibly
> via another library too, so we can't assume that an application knows
> the page size for a memory range it is about to register).
>
> And also the 16 MB page size constant is of course not feasible -- with
> all due respect, the x86 page size of 2 MB is much more likely in
> practice :) (Although perhaps the much slower PowerPC TLB refill makes
> users more likely to try and use hugetlb pages ;)
>
> Alex suggested parsing files in the same way as libhugetlbfs does to get
> the page size, and that seems to be the best solution, since I don't
> think the libhugetlbfs license is compatible with the BSD license for
> libibverbs.
>
> But your trick of using /proc/*/maps looks nice. Does that only work
> for libhugetlbfs or can we recognize direct mmap of hugetlb pages?
Hi Roland, thanks for your comments!
I've reworked my patch:
* added get_huge_page_size() to read the huge page size from
/proc/meminfo. This is done at ibv_fork_init() time.
* I noticed that some applications like ibv_rc_pingpong already
get memory from libhugetlbfs when running ibv_fork_init(). So
I changed the code for testing madvise() to allocate a huge page
if the huge page size is set in the system.
I have not tested this code with different libraries providing huge
pages / mmaped pages yet, but I hope this can be added later on when
we have agreed on an approach to handle huge pages.
Signed-off-by: Alexander Schmidt <alexs-23VcF4HTsmIX0ybBhKVfKdBPR1lH4CV8@public.gmane.org>
---
src/memory.c | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 95 insertions(+), 8 deletions(-)
--- libibverbs-1.1.2.orig/src/memory.c
+++ libibverbs-1.1.2/src/memory.c
@@ -40,6 +40,8 @@
#include <unistd.h>
#include <stdlib.h>
#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
#include "ibverbs.h"
@@ -68,12 +70,45 @@ struct ibv_mem_node {
static struct ibv_mem_node *mm_root;
static pthread_mutex_t mm_mutex = PTHREAD_MUTEX_INITIALIZER;
static int page_size;
+static int huge_page_size;
static int too_late;
+static int get_huge_page_size(void)
+{
+ int ret = -1;
+ FILE *file;
+ char *path = "/proc/meminfo";
+ char buf[1024], type[128];
+
+ file = fopen(path, "r");
+ if (!file)
+ goto out;
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ int n;
+ unsigned long size;
+
+ n = sscanf(buf, "%127s %lu %*s", &type, &size);
+
+ if (n < 2)
+ continue;
+
+ if (!strcmp(type, "Hugepagesize:")) {
+ /* huge page size is printed in Kb */
+ ret = size * 1024;
+ break;
+ }
+ }
+ fclose(file);
+
+out:
+ return ret;
+}
+
int ibv_fork_init(void)
{
void *tmp;
- int ret;
+ int ret, size;
if (mm_root)
return 0;
@@ -85,11 +120,18 @@ int ibv_fork_init(void)
if (page_size < 0)
return errno;
- if (posix_memalign(&tmp, page_size, page_size))
+ huge_page_size = get_huge_page_size();
+
+ if (huge_page_size > page_size)
+ size = huge_page_size;
+ else
+ size = page_size;
+
+ if (posix_memalign(&tmp, size, size))
return ENOMEM;
- ret = madvise(tmp, page_size, MADV_DONTFORK) ||
- madvise(tmp, page_size, MADV_DOFORK);
+ ret = madvise(tmp, size, MADV_DONTFORK) ||
+ madvise(tmp, size, MADV_DOFORK);
free(tmp);
@@ -446,11 +488,51 @@ static struct ibv_mem_node *__mm_find_st
return node;
}
+static int is_huge_page(void *base)
+{
+ int ret = 0;
+ pid_t pid;
+ FILE *file;
+ char buf[1024], lib[128];
+
+ pid = getpid();
+ snprintf(buf, sizeof(buf), "/proc/%d/maps", pid);
+
+ file = fopen(buf, "r");
+ if (!file)
+ goto out;
+
+ while (fgets(buf, sizeof(buf), file) != NULL) {
+ int n;
+ char *substr;
+ uintptr_t range_start, range_end;
+
+ n = sscanf(buf, "%lx-%lx %*s %*x %*s %*u %127s",
+ &range_start, &range_end, &lib);
+
+ if (n < 3)
+ continue;
+
+ substr = strstr(lib, "libhugetlbfs");
+ if (substr) {
+ if ((uintptr_t) base >= range_start &&
+ (uintptr_t) base < range_end) {
+ ret = 1;
+ break;
+ }
+ }
+ }
+ fclose(file);
+
+out:
+ return ret;
+}
+
static int ibv_madvise_range(void *base, size_t size, int advice)
{
uintptr_t start, end;
struct ibv_mem_node *node, *tmp;
- int inc;
+ int inc, range_page_size;
int ret = 0;
if (!size)
@@ -458,9 +540,14 @@ static int ibv_madvise_range(void *base,
inc = advice == MADV_DONTFORK ? 1 : -1;
- start = (uintptr_t) base & ~(page_size - 1);
- end = ((uintptr_t) (base + size + page_size - 1) &
- ~(page_size - 1)) - 1;
+ if (huge_page_size > page_size && is_huge_page(base))
+ range_page_size = huge_page_size;
+ else
+ range_page_size = page_size;
+
+ start = (uintptr_t) base & ~(range_page_size - 1);
+ end = ((uintptr_t) (base + size + range_page_size - 1) &
+ ~(range_page_size - 1)) - 1;
pthread_mutex_lock(&mm_mutex);
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH] ummunotify: Userspace support for MMU notifications V2
From: Jeff Squyres @ 2010-05-07 12:01 UTC (permalink / raw)
To: Eric B Munson
Cc: akpm, linux-kernel, linux-rdma, linux-mm, rolandd, peterz, mingo,
pavel, randy.dunlap
In-Reply-To: <1271943493-12120-1-git-send-email-ebmunson@us.ibm.com>
On Apr 22, 2010, at 9:38 AM, Eric B Munson wrote:
> From: Roland Dreier <rolandd@cisco.com>
>
> As discussed in <http://article.gmane.org/gmane.linux.drivers.openib/61925>
> and follow-up messages, libraries using RDMA would like to track
> precisely when application code changes memory mapping via free(),
> munmap(), etc. Current pure-userspace solutions using malloc hooks
> and other tricks are not robust, and the feeling among experts is that
> the issue is unfixable without kernel help.
Sorry for not replying earlier -- just to throw in my $0.02 here: the MPI community is *very interested* in having this stuff in upstream kernels. It solves a fairly major problem for us.
Open MPI (www.open-mpi.org) is ready to pretty much immediately take advantage of these capabilities. The code to use ummunotify is in a Mercurial branch; we're only waiting for ummunotify to go upstream before committing our support for it to our main SVN development trunk.
> We solve this not by implementing the full API proposed in the email
> linked above but rather with a simpler and more generic interface,
> which may be useful in other contexts. Specifically, we implement a
> new character device driver, ummunotify, that creates a /dev/ummunotify
> node. A userspace process can open this node read-only and use the fd
> as follows:
>
> 1. ioctl() to register/unregister an address range to watch in the
> kernel (cf struct ummunotify_register_ioctl in <linux/ummunotify.h>).
>
> 2. read() to retrieve events generated when a mapping in a watched
> address range is invalidated (cf struct ummunotify_event in
> <linux/ummunotify.h>). select()/poll()/epoll() and SIGIO are
> handled for this IO.
>
> 3. mmap() one page at offset 0 to map a kernel page that contains a
> generation counter that is incremented each time an event is
> generated. This allows userspace to have a fast path that checks
> that no events have occurred without a system call.
>
> Thanks to Jason Gunthorpe <jgunthorpe <at> obsidianresearch.com> for
> suggestions on the interface design. Also thanks to Jeff Squyres
> <jsquyres <at> cisco.com> for prototyping support for this in Open MPI, which
> helped find several bugs during development.
>
> Signed-off-by: Roland Dreier <rolandd@cisco.com>
> Signed-off-by: Eric B Munson <ebmunson@us.ibm.com>
Acked-by: Jeff Squyers <jsquyres@cisco.com>
> ---
>
> Changes from V1:
> - Update Kbuild to handle test program build properly
> - Update documentation to cover questions not addressed in previous
> thread
> ---
> Documentation/Makefile | 3 +-
> Documentation/ummunotify/Makefile | 7 +
> Documentation/ummunotify/ummunotify.txt | 162 +++++++++
> Documentation/ummunotify/umn-test.c | 200 +++++++++++
> drivers/char/Kconfig | 12 +
> drivers/char/Makefile | 1 +
> drivers/char/ummunotify.c | 567 +++++++++++++++++++++++++++++++
> include/linux/Kbuild | 1 +
> include/linux/ummunotify.h | 121 +++++++
> 9 files changed, 1073 insertions(+), 1 deletions(-)
> create mode 100644 Documentation/ummunotify/Makefile
> create mode 100644 Documentation/ummunotify/ummunotify.txt
> create mode 100644 Documentation/ummunotify/umn-test.c
> create mode 100644 drivers/char/ummunotify.c
> create mode 100644 include/linux/ummunotify.h
>
> diff --git a/Documentation/Makefile b/Documentation/Makefile
> index 6fc7ea1..27ba76a 100644
> --- a/Documentation/Makefile
> +++ b/Documentation/Makefile
> @@ -1,3 +1,4 @@
> obj-m := DocBook/ accounting/ auxdisplay/ connector/ \
> filesystems/ filesystems/configfs/ ia64/ laptops/ networking/ \
> - pcmcia/ spi/ timers/ video4linux/ vm/ watchdog/src/
> + pcmcia/ spi/ timers/ video4linux/ vm/ ummunotify/ \
> + watchdog/src/
> diff --git a/Documentation/ummunotify/Makefile b/Documentation/ummunotify/Makefile
> new file mode 100644
> index 0000000..89f31a0
> --- /dev/null
> +++ b/Documentation/ummunotify/Makefile
> @@ -0,0 +1,7 @@
> +# List of programs to build
> +hostprogs-y := umn-test
> +
> +# Tell kbuild to always build the programs
> +always := $(hostprogs-y)
> +
> +HOSTCFLAGS_umn-test.o += -I$(objtree)/usr/include
> diff --git a/Documentation/ummunotify/ummunotify.txt b/Documentation/ummunotify/ummunotify.txt
> new file mode 100644
> index 0000000..d6c2ccc
> --- /dev/null
> +++ b/Documentation/ummunotify/ummunotify.txt
> @@ -0,0 +1,162 @@
> +UMMUNOTIFY
> +
> + Ummunotify relays MMU notifier events to userspace. This is useful
> + for libraries that need to track the memory mapping of applications;
> + for example, MPI implementations using RDMA want to cache memory
> + registrations for performance, but tracking all possible crazy cases
> + such as when, say, the FORTRAN runtime frees memory is impossible
> + without kernel help.
> +
> +Basic Model
> +
> + A userspace process uses it by opening /dev/ummunotify, which
> + returns a file descriptor. Interest in address ranges is registered
> + using ioctl() and MMU notifier events are retrieved using read(), as
> + described in more detail below. Userspace can register multiple
> + address ranges to watch, and can unregister individual ranges.
> +
> + Userspace can also mmap() a single read-only page at offset 0 on
> + this file descriptor. This page contains (at offest 0) a single
> + 64-bit generation counter that the kernel increments each time an
> + MMU notifier event occurs. Userspace can use this to very quickly
> + check if there are any events to retrieve without needing to do a
> + system call.
> +
> +Control
> +
> + To start using ummunotify, a process opens /dev/ummunotify in
> + read-only mode. This will attach to current->mm because the current
> + consumers of this functionality do all monitoring in the process
> + being monitored. It is currently not possible to use this device to
> + monitor other processes. Control from userspace is done via ioctl().
> + An ioctl was chosen because the number of files required to register
> + a new address range in sysfs would be unwieldy and new procfs entries
> + are discouraged. The defined ioctls are:
> +
> + UMMUNOTIFY_EXCHANGE_FEATURES: This ioctl takes a single 32-bit
> + word of feature flags as input, and the kernel updates the
> + features flags word to contain only features requested by
> + userspace and also supported by the kernel.
> +
> + This ioctl is only included for forward compatibility; no
> + feature flags are currently defined, and the kernel will simply
> + update any requested feature mask to 0. The kernel will always
> + default to a feature mask of 0 if this ioctl is not used, so
> + current userspace does not need to perform this ioctl.
> +
> + UMMUNOTIFY_REGISTER_REGION: Userspace uses this ioctl to tell the
> + kernel to start delivering events for an address range. The
> + range is described using struct ummunotify_register_ioctl:
> +
> + struct ummunotify_register_ioctl {
> + __u64 start;
> + __u64 end;
> + __u64 user_cookie;
> + __u32 flags;
> + __u32 reserved;
> + };
> +
> + start and end give the range of userspace virtual addresses;
> + start is included in the range and end is not, so an example of
> + a 4 KB range would be start=0x1000, end=0x2000.
> +
> + user_cookie is an opaque 64-bit quantity that is returned by the
> + kernel in events involving the range, and used by userspace to
> + stop watching the range. Each registered address range must
> + have a distinct user_cookie.
> +
> + It is fine with the kernel if userspace registers multiple
> + overlapping or even duplicate address ranges, as long as a
> + different cookie is used for each registration.
> +
> + flags and reserved are included for forward compatibility;
> + userspace should simply set them to 0 for the current interface.
> +
> + UMMUNOTIFY_UNREGISTER_REGION: Userspace passes in the 64-bit
> + user_cookie used to register a range to tell the kernel to stop
> + watching an address range. Once this ioctl completes, the
> + kernel will not deliver any further events for the range that is
> + unregistered.
> +
> +Events
> +
> + When an event occurs that invalidates some of a process's memory
> + mapping in an address range being watched, ummunotify queues an
> + event report for that address range. If more than one event
> + invalidates parts of the same address range before userspace
> + retrieves the queued report, then further reports for the same range
> + will not be queued -- when userspace does read the queue, only a
> + single report for a given range will be returned.
> +
> + If multiple ranges being watched are invalidated by a single event
> + (which is especially likely if userspace registers overlapping
> + ranges), then an event report structure will be queued for each
> + address range registration.
> +
> + It is possible, if a large enough number of overlapping ranges are
> + registered and the list of invalidated events is busy enough and
> + ignored long enough, to cause the kernel to run out of memory.
> + Because this situation is unlikely to occur, the event queue size
> + is not bounded in order to avoid dropping events if the queue grows
> + beyond set bounds.
> +
> + Userspace retrieves queued events via read() on the ummunotify file
> + descriptor; a buffer that is at least as big as struct
> + ummunotify_event should be used to retrieve event reports, and if a
> + larger buffer is passed to read(), multiple reports will be returned
> + (if available).
> +
> + If the ummunotify file descriptor is in blocking mode, a read() call
> + will wait for an event report to be available. Userspace may also
> + set the ummunotify file descriptor to non-blocking mode and use all
> + standard ways of waiting for data to be available on the ummunotify
> + file descriptor, including epoll/poll()/select() and SIGIO.
> +
> + The format of event reports is:
> +
> + struct ummunotify_event {
> + __u32 type;
> + __u32 flags;
> + __u64 hint_start;
> + __u64 hint_end;
> + __u64 user_cookie_counter;
> + };
> +
> + where the type field is either UMMUNOTIFY_EVENT_TYPE_INVAL or
> + UMMUNOTIFY_EVENT_TYPE_LAST. Events of type INVAL describe
> + invalidation events as follows: user_cookie_counter contains the
> + cookie passed in when userspace registered the range that the event
> + is for. hint_start and hint_end contain the start address and end
> + address that were invalidated.
> +
> + The flags word contains bit flags, with only UMMUNOTIFY_EVENT_FLAG_HINT
> + defined at the moment. If HINT is set, then the invalidation event
> + invalidated less than the full address range and the kernel returns
> + the exact range invalidated; if HINT is not sent then hint_start and
> + hint_end are set to the original range registered by userspace.
> + (HINT will not be set if, for example, multiple events invalidated
> + disjoint parts of the range and so a single start/end pair cannot
> + represent the parts of the range that were invalidated)
> +
> + If the event type is LAST, then the read operation has emptied the
> + list of invalidated regions, and the flags, hint_start and hint_end
> + fields are not used. user_cookie_counter holds the value of the
> + kernel's generation counter (see below of more details) when the
> + empty list occurred.
> +
> +Generation Count
> +
> + Userspace may mmap() a page on a ummunotify file descriptor via
> +
> + mmap(NULL, sizeof (__u64), PROT_READ, MAP_SHARED, ummunotify_fd, 0);
> +
> + to get a read-only mapping of the kernel's 64-bit generation
> + counter. The kernel will increment this generation counter each
> + time an event report is queued.
> +
> + Userspace can use the generation counter as a quick check to avoid
> + system calls; if the value read from the mapped kernel counter is
> + still equal to the value returned in user_cookie_counter for the
> + most recent LAST event retrieved, then no further events have been
> + queued and there is no need to try a read() on the ummunotify file
> + descriptor.
> diff --git a/Documentation/ummunotify/umn-test.c b/Documentation/ummunotify/umn-test.c
> new file mode 100644
> index 0000000..143db2c
> --- /dev/null
> +++ b/Documentation/ummunotify/umn-test.c
> @@ -0,0 +1,200 @@
> +/*
> + * Copyright (c) 2009 Cisco Systems. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#include <stdint.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <unistd.h>
> +
> +#include <linux/ummunotify.h>
> +
> +#include <sys/mman.h>
> +#include <sys/stat.h>
> +#include <sys/types.h>
> +#include <sys/ioctl.h>
> +
> +#define UMN_TEST_COOKIE 123
> +
> +static int umn_fd;
> +static volatile __u64 *umn_counter;
> +
> +static int umn_init(void)
> +{
> + __u32 flags;
> +
> + umn_fd = open("/dev/ummunotify", O_RDONLY);
> + if (umn_fd < 0) {
> + perror("open");
> + return 1;
> + }
> +
> + if (ioctl(umn_fd, UMMUNOTIFY_EXCHANGE_FEATURES, &flags)) {
> + perror("exchange ioctl");
> + return 1;
> + }
> +
> + printf("kernel feature flags: 0x%08x\n", flags);
> +
> + umn_counter = mmap(NULL, sizeof *umn_counter, PROT_READ,
> + MAP_SHARED, umn_fd, 0);
> + if (umn_counter == MAP_FAILED) {
> + perror("mmap");
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +static int umn_register(void *buf, size_t size, __u64 cookie)
> +{
> + struct ummunotify_register_ioctl r = {
> + .start = (unsigned long) buf,
> + .end = (unsigned long) buf + size,
> + .user_cookie = cookie,
> + };
> +
> + if (ioctl(umn_fd, UMMUNOTIFY_REGISTER_REGION, &r)) {
> + perror("register ioctl");
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +static int umn_unregister(__u64 cookie)
> +{
> + if (ioctl(umn_fd, UMMUNOTIFY_UNREGISTER_REGION, &cookie)) {
> + perror("unregister ioctl");
> + return 1;
> + }
> +
> + return 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> + int page_size;
> + __u64 old_counter;
> + void *t;
> + int got_it;
> +
> + if (umn_init())
> + return 1;
> +
> + printf("\n");
> +
> + old_counter = *umn_counter;
> + if (old_counter != 0) {
> + fprintf(stderr, "counter = %lld (expected 0)\n", old_counter);
> + return 1;
> + }
> +
> + page_size = sysconf(_SC_PAGESIZE);
> + t = mmap(NULL, 3 * page_size, PROT_READ,
> + MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
> +
> + if (umn_register(t, 3 * page_size, UMN_TEST_COOKIE))
> + return 1;
> +
> + munmap(t + page_size, page_size);
> +
> + old_counter = *umn_counter;
> + if (old_counter != 1) {
> + fprintf(stderr, "counter = %lld (expected 1)\n", old_counter);
> + return 1;
> + }
> +
> + got_it = 0;
> + while (1) {
> + struct ummunotify_event ev;
> + int len;
> +
> + len = read(umn_fd, &ev, sizeof ev);
> + if (len < 0) {
> + perror("read event");
> + return 1;
> + }
> + if (len != sizeof ev) {
> + fprintf(stderr, "Read gave %d bytes (!= event size %zd)\n",
> + len, sizeof ev);
> + return 1;
> + }
> +
> + switch (ev.type) {
> + case UMMUNOTIFY_EVENT_TYPE_INVAL:
> + if (got_it) {
> + fprintf(stderr, "Extra invalidate event\n");
> + return 1;
> + }
> + if (ev.user_cookie_counter != UMN_TEST_COOKIE) {
> + fprintf(stderr, "Invalidate event for cookie %lld (expected %d)\n",
> + ev.user_cookie_counter,
> + UMN_TEST_COOKIE);
> + return 1;
> + }
> +
> + printf("Invalidate event:\tcookie %lld\n",
> + ev.user_cookie_counter);
> +
> + if (!(ev.flags & UMMUNOTIFY_EVENT_FLAG_HINT)) {
> + fprintf(stderr, "Hint flag not set\n");
> + return 1;
> + }
> +
> + if (ev.hint_start != (uintptr_t) t + page_size ||
> + ev.hint_end != (uintptr_t) t + page_size * 2) {
> + fprintf(stderr, "Got hint %llx..%llx, expected %p..%p\n",
> + ev.hint_start, ev.hint_end,
> + t + page_size, t + page_size * 2);
> + return 1;
> + }
> +
> + printf("\t\t\thint %llx...%llx\n",
> + ev.hint_start, ev.hint_end);
> +
> + got_it = 1;
> + break;
> +
> + case UMMUNOTIFY_EVENT_TYPE_LAST:
> + if (!got_it) {
> + fprintf(stderr, "Last event without invalidate event\n");
> + return 1;
> + }
> +
> + printf("Empty event:\t\tcounter %lld\n",
> + ev.user_cookie_counter);
> + goto done;
> +
> + default:
> + fprintf(stderr, "unknown event type %d\n",
> + ev.type);
> + return 1;
> + }
> + }
> +
> +done:
> + umn_unregister(123);
> + munmap(t, page_size);
> +
> + old_counter = *umn_counter;
> + if (old_counter != 1) {
> + fprintf(stderr, "counter = %lld (expected 1)\n", old_counter);
> + return 1;
> + }
> +
> + return 0;
> +}
> diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
> index 3141dd3..cf26019 100644
> --- a/drivers/char/Kconfig
> +++ b/drivers/char/Kconfig
> @@ -1111,6 +1111,18 @@ config DEVPORT
> depends on ISA || PCI
> default y
>
> +config UMMUNOTIFY
> + tristate "Userspace MMU notifications"
> + select MMU_NOTIFIER
> + help
> + The ummunotify (userspace MMU notification) driver creates a
> + character device that can be used by userspace libraries to
> + get notifications when an application's memory mapping
> + changed. This is used, for example, by RDMA libraries to
> + improve the reliability of memory registration caching, since
> + the kernel's MMU notifications can be used to know precisely
> + when to shoot down a cached registration.
> +
> source "drivers/s390/char/Kconfig"
>
> endmenu
> diff --git a/drivers/char/Makefile b/drivers/char/Makefile
> index f957edf..521e5de 100644
> --- a/drivers/char/Makefile
> +++ b/drivers/char/Makefile
> @@ -97,6 +97,7 @@ obj-$(CONFIG_NSC_GPIO) += nsc_gpio.o
> obj-$(CONFIG_CS5535_GPIO) += cs5535_gpio.o
> obj-$(CONFIG_GPIO_TB0219) += tb0219.o
> obj-$(CONFIG_TELCLOCK) += tlclk.o
> +obj-$(CONFIG_UMMUNOTIFY) += ummunotify.o
>
> obj-$(CONFIG_MWAVE) += mwave/
> obj-$(CONFIG_AGP) += agp/
> diff --git a/drivers/char/ummunotify.c b/drivers/char/ummunotify.c
> new file mode 100644
> index 0000000..c14df3f
> --- /dev/null
> +++ b/drivers/char/ummunotify.c
> @@ -0,0 +1,567 @@
> +/*
> + * Copyright (c) 2009 Cisco Systems. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#include <linux/fs.h>
> +#include <linux/init.h>
> +#include <linux/list.h>
> +#include <linux/miscdevice.h>
> +#include <linux/mm.h>
> +#include <linux/mmu_notifier.h>
> +#include <linux/module.h>
> +#include <linux/poll.h>
> +#include <linux/rbtree.h>
> +#include <linux/sched.h>
> +#include <linux/slab.h>
> +#include <linux/spinlock.h>
> +#include <linux/uaccess.h>
> +#include <linux/ummunotify.h>
> +
> +#include <asm/cacheflush.h>
> +
> +MODULE_AUTHOR("Roland Dreier");
> +MODULE_DESCRIPTION("Userspace MMU notifiers");
> +MODULE_LICENSE("GPL v2");
> +
> +/*
> + * Information about an address range userspace has asked us to watch.
> + *
> + * user_cookie: Opaque cookie given to us when userspace registers the
> + * address range.
> + *
> + * start, end: Address range; start is inclusive, end is exclusive.
> + *
> + * hint_start, hint_end: If a single MMU notification event
> + * invalidates the address range, we hold the actual range of
> + * addresses that were invalidated (and set UMMUNOTIFY_FLAG_HINT).
> + * If another event hits this range before userspace reads the
> + * event, we give up and don't try to keep track of which subsets
> + * got invalidated.
> + *
> + * flags: Holds the INVALID flag for ranges that are on the invalid
> + * list and/or the HINT flag for ranges where the hint range holds
> + * good information.
> + *
> + * node: Used to put the range into an rbtree we use to be able to
> + * scan address ranges in order.
> + *
> + * list: Used to put the range on the invalid list when an MMU
> + * notification event hits the range.
> + */
> +enum {
> + UMMUNOTIFY_FLAG_INVALID = 1,
> + UMMUNOTIFY_FLAG_HINT = 2,
> +};
> +
> +struct ummunotify_reg {
> + u64 user_cookie;
> + unsigned long start;
> + unsigned long end;
> + unsigned long hint_start;
> + unsigned long hint_end;
> + unsigned long flags;
> + struct rb_node node;
> + struct list_head list;
> +};
> +
> +/*
> + * Context attached to each file that userspace opens.
> + *
> + * mmu_notifier: MMU notifier registered for this context.
> + *
> + * mm: mm_struct for process that created the context; we use this to
> + * hold a reference to the mm to make sure it doesn't go away until
> + * we're done with it.
> + *
> + * reg_tree: RB tree of address ranges being watched, sorted by start
> + * address.
> + *
> + * invalid_list: List of address ranges that have been invalidated by
> + * MMU notification events; as userspace reads events, the address
> + * range corresponding to the event is removed from the list.
> + *
> + * counter: Page that can be mapped read-only by userspace, which
> + * holds a generation count that is incremented each time an event
> + * occurs.
> + *
> + * lock: Spinlock used to protect all context.
> + *
> + * read_wait: Wait queue used to wait for data to become available in
> + * blocking read()s.
> + *
> + * async_queue: Used to implement fasync().
> + *
> + * need_empty: Set when userspace reads an invalidation event, so that
> + * read() knows it must generate an "empty" event when userspace
> + * drains the invalid_list.
> + *
> + * used: Set after userspace does anything with the file, so that the
> + * "exchange flags" ioctl() knows it's too late to change anything.
> + */
> +struct ummunotify_file {
> + struct mmu_notifier mmu_notifier;
> + struct mm_struct *mm;
> + struct rb_root reg_tree;
> + struct list_head invalid_list;
> + u64 *counter;
> + spinlock_t lock;
> + wait_queue_head_t read_wait;
> + struct fasync_struct *async_queue;
> + int need_empty;
> + int used;
> +};
> +
> +static void ummunotify_handle_notify(struct mmu_notifier *mn,
> + unsigned long start, unsigned long end)
> +{
> + struct ummunotify_file *priv =
> + container_of(mn, struct ummunotify_file, mmu_notifier);
> + struct rb_node *n;
> + struct ummunotify_reg *reg;
> + unsigned long flags;
> + int hit = 0;
> +
> + spin_lock_irqsave(&priv->lock, flags);
> +
> + for (n = rb_first(&priv->reg_tree); n; n = rb_next(n)) {
> + reg = rb_entry(n, struct ummunotify_reg, node);
> +
> + /*
> + * Ranges overlap if they're not disjoint; and they're
> + * disjoint if the end of one is before the start of
> + * the other one. So if both disjointness comparisons
> + * fail then the ranges overlap.
> + *
> + * Since we keep the tree of regions we're watching
> + * sorted by start address, we can end this loop as
> + * soon as we hit a region that starts past the end of
> + * the range for the event we're handling.
> + */
> + if (reg->start >= end)
> + break;
> +
> + /*
> + * Just go to the next region if the start of the
> + * range is after the end of the region -- there
> + * might still be more overlapping ranges that have a
> + * greater start.
> + */
> + if (start >= reg->end)
> + continue;
> +
> + hit = 1;
> +
> + if (test_and_set_bit(UMMUNOTIFY_FLAG_INVALID, ®->flags)) {
> + /* Already on invalid list */
> + clear_bit(UMMUNOTIFY_FLAG_HINT, ®->flags);
> + } else {
> + list_add_tail(®->list, &priv->invalid_list);
> + set_bit(UMMUNOTIFY_FLAG_HINT, ®->flags);
> + reg->hint_start = start;
> + reg->hint_end = end;
> + }
> + }
> +
> + if (hit) {
> + ++(*priv->counter);
> + flush_dcache_page(virt_to_page(priv->counter));
> + wake_up_interruptible(&priv->read_wait);
> + kill_fasync(&priv->async_queue, SIGIO, POLL_IN);
> + }
> +
> + spin_unlock_irqrestore(&priv->lock, flags);
> +}
> +
> +static void ummunotify_invalidate_page(struct mmu_notifier *mn,
> + struct mm_struct *mm,
> + unsigned long addr)
> +{
> + ummunotify_handle_notify(mn, addr, addr + PAGE_SIZE);
> +}
> +
> +static void ummunotify_invalidate_range_start(struct mmu_notifier *mn,
> + struct mm_struct *mm,
> + unsigned long start,
> + unsigned long end)
> +{
> + ummunotify_handle_notify(mn, start, end);
> +}
> +
> +static const struct mmu_notifier_ops ummunotify_mmu_notifier_ops = {
> + .invalidate_page = ummunotify_invalidate_page,
> + .invalidate_range_start = ummunotify_invalidate_range_start,
> +};
> +
> +static int ummunotify_open(struct inode *inode, struct file *filp)
> +{
> + struct ummunotify_file *priv;
> + int ret;
> +
> + if (filp->f_mode & FMODE_WRITE)
> + return -EINVAL;
> +
> + priv = kmalloc(sizeof *priv, GFP_KERNEL);
> + if (!priv)
> + return -ENOMEM;
> +
> + priv->counter = (void *) get_zeroed_page(GFP_KERNEL);
> + if (!priv->counter) {
> + ret = -ENOMEM;
> + goto err;
> + }
> +
> + priv->reg_tree = RB_ROOT;
> + INIT_LIST_HEAD(&priv->invalid_list);
> + spin_lock_init(&priv->lock);
> + init_waitqueue_head(&priv->read_wait);
> + priv->async_queue = NULL;
> + priv->need_empty = 0;
> + priv->used = 0;
> +
> + priv->mmu_notifier.ops = &ummunotify_mmu_notifier_ops;
> + /*
> + * Register notifier last, since notifications can occur as
> + * soon as we register....
> + */
> + ret = mmu_notifier_register(&priv->mmu_notifier, current->mm);
> + if (ret)
> + goto err_page;
> +
> + priv->mm = current->mm;
> + atomic_inc(&priv->mm->mm_count);
> +
> + filp->private_data = priv;
> +
> + return 0;
> +
> +err_page:
> + free_page((unsigned long) priv->counter);
> +
> +err:
> + kfree(priv);
> + return ret;
> +}
> +
> +static int ummunotify_close(struct inode *inode, struct file *filp)
> +{
> + struct ummunotify_file *priv = filp->private_data;
> + struct rb_node *n;
> + struct ummunotify_reg *reg;
> +
> + mmu_notifier_unregister(&priv->mmu_notifier, priv->mm);
> + mmdrop(priv->mm);
> + free_page((unsigned long) priv->counter);
> +
> + for (n = rb_first(&priv->reg_tree); n; n = rb_next(n)) {
> + reg = rb_entry(n, struct ummunotify_reg, node);
> + kfree(reg);
> + }
> +
> + kfree(priv);
> +
> + return 0;
> +}
> +
> +static bool ummunotify_readable(struct ummunotify_file *priv)
> +{
> + return priv->need_empty || !list_empty(&priv->invalid_list);
> +}
> +
> +static ssize_t ummunotify_read(struct file *filp, char __user *buf,
> + size_t count, loff_t *pos)
> +{
> + struct ummunotify_file *priv = filp->private_data;
> + struct ummunotify_reg *reg;
> + ssize_t ret;
> + struct ummunotify_event *events;
> + int max;
> + int n;
> +
> + priv->used = 1;
> +
> + events = (void *) get_zeroed_page(GFP_KERNEL);
> + if (!events) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + spin_lock_irq(&priv->lock);
> +
> + while (!ummunotify_readable(priv)) {
> + spin_unlock_irq(&priv->lock);
> +
> + if (filp->f_flags & O_NONBLOCK) {
> + ret = -EAGAIN;
> + goto out;
> + }
> +
> + if (wait_event_interruptible(priv->read_wait,
> + ummunotify_readable(priv))) {
> + ret = -ERESTARTSYS;
> + goto out;
> + }
> +
> + spin_lock_irq(&priv->lock);
> + }
> +
> + max = min_t(size_t, PAGE_SIZE, count) / sizeof *events;
> +
> + for (n = 0; n < max; ++n) {
> + if (list_empty(&priv->invalid_list)) {
> + events[n].type = UMMUNOTIFY_EVENT_TYPE_LAST;
> + events[n].user_cookie_counter = *priv->counter;
> + ++n;
> + priv->need_empty = 0;
> + break;
> + }
> +
> + reg = list_first_entry(&priv->invalid_list,
> + struct ummunotify_reg, list);
> +
> + events[n].type = UMMUNOTIFY_EVENT_TYPE_INVAL;
> + if (test_bit(UMMUNOTIFY_FLAG_HINT, ®->flags)) {
> + events[n].flags = UMMUNOTIFY_EVENT_FLAG_HINT;
> + events[n].hint_start = max(reg->start, reg->hint_start);
> + events[n].hint_end = min(reg->end, reg->hint_end);
> + } else {
> + events[n].hint_start = reg->start;
> + events[n].hint_end = reg->end;
> + }
> + events[n].user_cookie_counter = reg->user_cookie;
> +
> + list_del(®->list);
> + reg->flags = 0;
> + priv->need_empty = 1;
> + }
> +
> + spin_unlock_irq(&priv->lock);
> +
> + if (copy_to_user(buf, events, n * sizeof *events))
> + ret = -EFAULT;
> + else
> + ret = n * sizeof *events;
> +
> +out:
> + free_page((unsigned long) events);
> + return ret;
> +}
> +
> +static unsigned int ummunotify_poll(struct file *filp,
> + struct poll_table_struct *wait)
> +{
> + struct ummunotify_file *priv = filp->private_data;
> +
> + poll_wait(filp, &priv->read_wait, wait);
> +
> + return ummunotify_readable(priv) ? (POLLIN | POLLRDNORM) : 0;
> +}
> +
> +static long ummunotify_exchange_features(struct ummunotify_file *priv,
> + __u32 __user *arg)
> +{
> + u32 feature_mask;
> +
> + if (priv->used)
> + return -EINVAL;
> +
> + priv->used = 1;
> +
> + if (copy_from_user(&feature_mask, arg, sizeof(feature_mask)))
> + return -EFAULT;
> +
> + /* No extensions defined at present. */
> + feature_mask = 0;
> +
> + if (copy_to_user(arg, &feature_mask, sizeof(feature_mask)))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> +static long ummunotify_register_region(struct ummunotify_file *priv,
> + void __user *arg)
> +{
> + struct ummunotify_register_ioctl parm;
> + struct ummunotify_reg *reg, *treg;
> + struct rb_node **n = &priv->reg_tree.rb_node;
> + struct rb_node *pn;
> + int ret = 0;
> +
> + if (copy_from_user(&parm, arg, sizeof parm))
> + return -EFAULT;
> +
> + priv->used = 1;
> +
> + reg = kmalloc(sizeof *reg, GFP_KERNEL);
> + if (!reg)
> + return -ENOMEM;
> +
> + reg->user_cookie = parm.user_cookie;
> + reg->start = parm.start;
> + reg->end = parm.end;
> + reg->flags = 0;
> +
> + spin_lock_irq(&priv->lock);
> +
> + for (pn = rb_first(&priv->reg_tree); pn; pn = rb_next(pn)) {
> + treg = rb_entry(pn, struct ummunotify_reg, node);
> +
> + if (treg->user_cookie == parm.user_cookie) {
> + kfree(reg);
> + ret = -EINVAL;
> + goto out;
> + }
> + }
> +
> + pn = NULL;
> + while (*n) {
> + pn = *n;
> + treg = rb_entry(pn, struct ummunotify_reg, node);
> +
> + if (reg->start <= treg->start)
> + n = &pn->rb_left;
> + else
> + n = &pn->rb_right;
> + }
> +
> + rb_link_node(®->node, pn, n);
> + rb_insert_color(®->node, &priv->reg_tree);
> +
> +out:
> + spin_unlock_irq(&priv->lock);
> +
> + return ret;
> +}
> +
> +static long ummunotify_unregister_region(struct ummunotify_file *priv,
> + __u64 __user *arg)
> +{
> + u64 user_cookie;
> + struct rb_node *n;
> + struct ummunotify_reg *reg;
> + int ret = -EINVAL;
> +
> + if (copy_from_user(&user_cookie, arg, sizeof(user_cookie)))
> + return -EFAULT;
> +
> + spin_lock_irq(&priv->lock);
> +
> + for (n = rb_first(&priv->reg_tree); n; n = rb_next(n)) {
> + reg = rb_entry(n, struct ummunotify_reg, node);
> +
> + if (reg->user_cookie == user_cookie) {
> + rb_erase(n, &priv->reg_tree);
> + if (test_bit(UMMUNOTIFY_FLAG_INVALID, ®->flags))
> + list_del(®->list);
> + kfree(reg);
> + ret = 0;
> + break;
> + }
> + }
> +
> + spin_unlock_irq(&priv->lock);
> +
> + return ret;
> +}
> +
> +static long ummunotify_ioctl(struct file *filp, unsigned int cmd,
> + unsigned long arg)
> +{
> + struct ummunotify_file *priv = filp->private_data;
> + void __user *argp = (void __user *) arg;
> +
> + switch (cmd) {
> + case UMMUNOTIFY_EXCHANGE_FEATURES:
> + return ummunotify_exchange_features(priv, argp);
> + case UMMUNOTIFY_REGISTER_REGION:
> + return ummunotify_register_region(priv, argp);
> + case UMMUNOTIFY_UNREGISTER_REGION:
> + return ummunotify_unregister_region(priv, argp);
> + default:
> + return -ENOIOCTLCMD;
> + }
> +}
> +
> +static int ummunotify_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +{
> + struct ummunotify_file *priv = vma->vm_private_data;
> +
> + if (vmf->pgoff != 0)
> + return VM_FAULT_SIGBUS;
> +
> + vmf->page = virt_to_page(priv->counter);
> + get_page(vmf->page);
> +
> + return 0;
> +
> +}
> +
> +static struct vm_operations_struct ummunotify_vm_ops = {
> + .fault = ummunotify_fault,
> +};
> +
> +static int ummunotify_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> + struct ummunotify_file *priv = filp->private_data;
> +
> + if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff != 0)
> + return -EINVAL;
> +
> + vma->vm_ops = &ummunotify_vm_ops;
> + vma->vm_private_data = priv;
> +
> + return 0;
> +}
> +
> +static int ummunotify_fasync(int fd, struct file *filp, int on)
> +{
> + struct ummunotify_file *priv = filp->private_data;
> +
> + return fasync_helper(fd, filp, on, &priv->async_queue);
> +}
> +
> +static const struct file_operations ummunotify_fops = {
> + .owner = THIS_MODULE,
> + .open = ummunotify_open,
> + .release = ummunotify_close,
> + .read = ummunotify_read,
> + .poll = ummunotify_poll,
> + .unlocked_ioctl = ummunotify_ioctl,
> +#ifdef CONFIG_COMPAT
> + .compat_ioctl = ummunotify_ioctl,
> +#endif
> + .mmap = ummunotify_mmap,
> + .fasync = ummunotify_fasync,
> +};
> +
> +static struct miscdevice ummunotify_misc = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "ummunotify",
> + .fops = &ummunotify_fops,
> +};
> +
> +static int __init ummunotify_init(void)
> +{
> + return misc_register(&ummunotify_misc);
> +}
> +
> +static void __exit ummunotify_cleanup(void)
> +{
> + misc_deregister(&ummunotify_misc);
> +}
> +
> +module_init(ummunotify_init);
> +module_exit(ummunotify_cleanup);
> diff --git a/include/linux/Kbuild b/include/linux/Kbuild
> index e2ea0b2..e086b39 100644
> --- a/include/linux/Kbuild
> +++ b/include/linux/Kbuild
> @@ -163,6 +163,7 @@ header-y += tipc_config.h
> header-y += toshiba.h
> header-y += udf_fs_i.h
> header-y += ultrasound.h
> +header-y += ummunotify.h
> header-y += un.h
> header-y += utime.h
> header-y += veth.h
> diff --git a/include/linux/ummunotify.h b/include/linux/ummunotify.h
> new file mode 100644
> index 0000000..21b0d03
> --- /dev/null
> +++ b/include/linux/ummunotify.h
> @@ -0,0 +1,121 @@
> +/*
> + * Copyright (c) 2009 Cisco Systems. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
> + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
> + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
> + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
> + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
> + * SOFTWARE.
> + */
> +
> +#ifndef _LINUX_UMMUNOTIFY_H
> +#define _LINUX_UMMUNOTIFY_H
> +
> +#include <linux/types.h>
> +#include <linux/ioctl.h>
> +
> +/*
> + * Ummunotify relays MMU notifier events to userspace. A userspace
> + * process uses it by opening /dev/ummunotify, which returns a file
> + * descriptor. Interest in address ranges is registered using ioctl()
> + * and MMU notifier events are retrieved using read(), as described in
> + * more detail below.
> + *
> + * Userspace can also mmap() a single read-only page at offset 0 on
> + * this file descriptor. This page contains (at offest 0) a single
> + * 64-bit generation counter that the kernel increments each time an
> + * MMU notifier event occurs. Userspace can use this to very quickly
> + * check if there are any events to retrieve without needing to do a
> + * system call.
> + */
> +
> +/*
> + * struct ummunotify_register_ioctl describes an address range from
> + * start to end (including start but not including end) to be
> + * monitored. user_cookie is an opaque handle that userspace assigns,
> + * and which is used to unregister. flags and reserved are currently
> + * unused and should be set to 0 for forward compatibility.
> + */
> +struct ummunotify_register_ioctl {
> + __u64 start;
> + __u64 end;
> + __u64 user_cookie;
> + __u32 flags;
> + __u32 reserved;
> +};
> +
> +#define UMMUNOTIFY_MAGIC 'U'
> +
> +/*
> + * Forward compatibility: Userspace passes in a 32-bit feature mask
> + * with feature flags set indicating which extensions it wishes to
> + * use. The kernel will return a feature mask with the bits of
> + * userspace's mask that the kernel implements; from that point on
> + * both userspace and the kernel should behave as described by the
> + * kernel's feature mask.
> + *
> + * If userspace does not perform a UMMUNOTIFY_EXCHANGE_FEATURES ioctl,
> + * then the kernel will use a feature mask of 0.
> + *
> + * No feature flags are currently defined, so the kernel will always
> + * return a feature mask of 0 at present.
> + */
> +#define UMMUNOTIFY_EXCHANGE_FEATURES _IOWR(UMMUNOTIFY_MAGIC, 1, __u32)
> +
> +/*
> + * Register interest in an address range; userspace should pass in a
> + * struct ummunotify_register_ioctl describing the region.
> + */
> +#define UMMUNOTIFY_REGISTER_REGION _IOW(UMMUNOTIFY_MAGIC, 2, \
> + struct ummunotify_register_ioctl)
> +/*
> + * Unregister interest in an address range; userspace should pass in
> + * the user_cookie value that was used to register the address range.
> + * No events for the address range will be reported once it is
> + * unregistered.
> + */
> +#define UMMUNOTIFY_UNREGISTER_REGION _IOW(UMMUNOTIFY_MAGIC, 3, __u64)
> +
> +/*
> + * Invalidation events are returned whenever the kernel changes the
> + * mapping for a monitored address. These events are retrieved by
> + * read() on the ummunotify file descriptor, which will fill the
> + * read() buffer with struct ummunotify_event.
> + *
> + * If type field is INVAL, then user_cookie_counter holds the
> + * user_cookie for the region being reported; if the HINT flag is set
> + * then hint_start/hint_end hold the start and end of the mapping that
> + * was invalidated. (If HINT is not set, then multiple events
> + * invalidated parts of the registered range and hint_start/hint_end
> + * and set to the start/end of the whole registered range)
> + *
> + * If type is LAST, then the read operation has emptied the list of
> + * invalidated regions, and user_cookie_counter holds the value of the
> + * kernel's generation counter when the empty list occurred. The
> + * other fields are not filled in for this event.
> + */
> +enum {
> + UMMUNOTIFY_EVENT_TYPE_INVAL = 0,
> + UMMUNOTIFY_EVENT_TYPE_LAST = 1,
> +};
> +
> +enum {
> + UMMUNOTIFY_EVENT_FLAG_HINT = 1 << 0,
> +};
> +
> +struct ummunotify_event {
> + __u32 type;
> + __u32 flags;
> + __u64 hint_start;
> + __u64 hint_end;
> + __u64 user_cookie_counter;
> +};
> +
> +#endif /* _LINUX_UMMUNOTIFY_H */
> --
> 1.6.3.3
>
>
--
Jeff Squyres
jsquyres@cisco.com
For corporate legal information go to:
http://www.cisco.com/web/about/doing_business/legal/cri/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox