Netdev List
 help / color / mirror / Atom feed
* [PATCH for bnxt_re V3 20/21] bnxt_re: Add QP event handling
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	michael.chan-dY08KVG/lbpWk0Htik3J/w, Selvin Xavier, Eddie Wai,
	Devesh Sharma, Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>

Implements callback handler for processing Async events related to a QP.
This patch also implements the control path command completion handling.

v3: Removes unwanted braces

Signed-off-by: Eddie Wai <eddie.wai-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Devesh Sharma <devesh.sharma-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Somnath Kotur <somnath.kotur-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Selvin Xavier <selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c | 47 ++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
index 488c26f..761d32a 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
@@ -257,6 +257,44 @@ static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
 	return 0;
 }
 
+static int bnxt_qplib_process_qp_event(struct bnxt_qplib_rcfw *rcfw,
+				       struct creq_qp_event *qp_event)
+{
+	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
+	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+	struct bnxt_qplib_crsqe *crsqe;
+	u16 cbit, cookie, blocked = 0;
+	unsigned long flags;
+	u32 sw_cons;
+
+	switch (qp_event->event) {
+	case CREQ_QP_EVENT_EVENT_QP_ERROR_NOTIFICATION:
+		break;
+	default:
+		/* Command Response */
+		spin_lock_irqsave(&cmdq->lock, flags);
+		sw_cons = HWQ_CMP(crsq->cons, crsq);
+		crsqe = &crsq->crsq[sw_cons];
+		crsq->cons++;
+		memcpy(&crsqe->qp_event, qp_event, sizeof(crsqe->qp_event));
+
+		cookie = le16_to_cpu(crsqe->qp_event.cookie);
+		blocked = cookie & RCFW_CMD_IS_BLOCKING;
+		cookie &= RCFW_MAX_COOKIE_VALUE;
+		cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+		if (!test_and_clear_bit(cbit, rcfw->cmdq_bitmap))
+			dev_warn(&rcfw->pdev->dev,
+				 "QPLIB: CMD bit %d was not requested", cbit);
+
+		cmdq->cons += crsqe->req_size;
+		spin_unlock_irqrestore(&cmdq->lock, flags);
+		if (!blocked)
+			wake_up(&rcfw->waitq);
+		break;
+	}
+	return 0;
+}
+
 /* SP - CREQ Completion handlers */
 static void bnxt_qplib_service_creq(unsigned long data)
 {
@@ -280,6 +318,15 @@ static void bnxt_qplib_service_creq(unsigned long data)
 		type = creqe->type & CREQ_BASE_TYPE_MASK;
 		switch (type) {
 		case CREQ_BASE_TYPE_QP_EVENT:
+			if (!bnxt_qplib_process_qp_event
+			    (rcfw, (struct creq_qp_event *)creqe))
+				rcfw->creq_qp_event_processed++;
+			else {
+				dev_warn(&rcfw->pdev->dev, "QPLIB: crsqe with");
+				dev_warn(&rcfw->pdev->dev,
+					 "QPLIB: type = 0x%x not handled",
+					 type);
+			}
 			break;
 		case CREQ_BASE_TYPE_FUNC_EVENT:
 			if (!bnxt_qplib_process_func_event
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH for bnxt_re V3 21/21] bnxt_re: Add bnxt_re driver build support
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	michael.chan-dY08KVG/lbpWk0Htik3J/w, Selvin Xavier, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>

Makefile and Kconfig changes for enabling bnxt_re compilation

v3: Adds list of MAINTAINERS of bnxt_re driver. Removes bnxt_re_debugfs.c from
    Makefile as this file is no longer present

Signed-off-by: Devesh Sharma <devesh.sharma-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Somnath Kotur <somnath.kotur-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
Signed-off-by: Selvin Xavier <selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
---
 MAINTAINERS                           | 11 +++++++++++
 drivers/infiniband/Kconfig            |  2 ++
 drivers/infiniband/hw/Makefile        |  1 +
 drivers/infiniband/hw/bnxtre/Kconfig  |  9 +++++++++
 drivers/infiniband/hw/bnxtre/Makefile |  6 ++++++
 5 files changed, 29 insertions(+)
 create mode 100644 drivers/infiniband/hw/bnxtre/Kconfig
 create mode 100644 drivers/infiniband/hw/bnxtre/Makefile

diff --git a/MAINTAINERS b/MAINTAINERS
index 331f6af..9545eb65 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2776,6 +2776,17 @@ L:	linux-arm-kernel-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r@public.gmane.org (moderated for non-subscribers)
 S:	Maintained
 F:	arch/arm64/boot/dts/broadcom/vulcan*
 
+BROADCOM NETXTREME-E ROCE DRIVER
+M:	Selvin Xavier <selvin.xavier-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
+M:	Devesh Sharma <devesh.sharma-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
+M:	Somnath Kotur <somnath.kotur-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
+M:	Sriharsha Basavapatna <sriharsha.basavapatna-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
+L:	linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
+W:	http://www.broadcom.com
+S:	Supported
+F:	drivers/infiniband/hw/bnxtre/
+F:	include/uapi/rdma/bnxtre-abi.h
+
 BROCADE BFA FC SCSI DRIVER
 M:	Anil Gurumurthy <anil.gurumurthy-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
 M:	Sudarsana Kalluru <sudarsana.kalluru-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index fb3fb89..a4fab22 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -91,4 +91,6 @@ source "drivers/infiniband/hw/hfi1/Kconfig"
 
 source "drivers/infiniband/hw/qedr/Kconfig"
 
+source "drivers/infiniband/hw/bnxtre/Kconfig"
+
 endif # INFINIBAND
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index e7a5ed9..7227b36 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -11,3 +11,4 @@ obj-$(CONFIG_INFINIBAND_USNIC)		+= usnic/
 obj-$(CONFIG_INFINIBAND_HFI1)		+= hfi1/
 obj-$(CONFIG_INFINIBAND_HNS)		+= hns/
 obj-$(CONFIG_INFINIBAND_QEDR)		+= qedr/
+obj-$(CONFIG_INFINIBAND_BNXTRE)		+= bnxtre/
diff --git a/drivers/infiniband/hw/bnxtre/Kconfig b/drivers/infiniband/hw/bnxtre/Kconfig
new file mode 100644
index 0000000..b1f153f
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/Kconfig
@@ -0,0 +1,9 @@
+config INFINIBAND_BNXTRE
+    tristate "Broadcom Netxtreme HCA support"
+    depends on ETHERNET && NETDEVICES && PCI && INET
+    select NET_VENDOR_BROADCOM
+    select BNXT
+    ---help---
+	  This driver supports Broadcom NetXtreme-E 10/25/40/50 gigabit
+	  RoCE HCAs.  To compile this driver as a module, choose M here:
+	  the module will be called bnxt_re.
diff --git a/drivers/infiniband/hw/bnxtre/Makefile b/drivers/infiniband/hw/bnxtre/Makefile
new file mode 100644
index 0000000..71aa5a1
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/Makefile
@@ -0,0 +1,6 @@
+
+ccflags-y := -Idrivers/net/ethernet/broadcom/bnxt
+obj-$(CONFIG_INFINIBAND_BNXTRE) += bnxt_re.o
+bnxt_re-y := bnxt_re_main.o bnxt_re_ib_verbs.o \
+	     bnxt_qplib_res.o bnxt_qplib_rcfw.o	\
+	     bnxt_qplib_sp.o bnxt_qplib_fp.o
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH for bnxt_re V3 01/21] bnxt_re: Add bnxt_re RoCE driver files
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

This patch adds the required skeletal files for Broadcom NetXtreme-E RoCE
driver. Also, added the load/unload routines for bnxt_re driver.

v2: Modified the license text to include Dual License

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c    |  37 ++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h    |  42 +++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c  |  37 ++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h  |  42 +++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c   |  37 ++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h   |  42 +++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c    |  37 ++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h    |  43 +++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re.h          |  46 ++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_hsi.h      |  42 +++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c |  37 ++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h |  42 +++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c     | 116 ++++++++++++++++++++++++
 13 files changed, 600 insertions(+)
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_re.h
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_re_hsi.h
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
 create mode 100644 drivers/infiniband/hw/bnxtre/bnxt_re_main.c

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
new file mode 100644
index 0000000..36c4b81
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
@@ -0,0 +1,37 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Fast Path Operators
+ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
new file mode 100644
index 0000000..0983465
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
@@ -0,0 +1,42 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Fast Path Operators (header)
+ */
+
+#ifndef __BNXT_QPLIB_FP_H__
+#define __BNXT_QPLIB_FP_H__
+
+#endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
new file mode 100644
index 0000000..4029935
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
@@ -0,0 +1,37 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RDMA Controller HW interface
+ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h
new file mode 100644
index 0000000..1f63956
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h
@@ -0,0 +1,42 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RDMA Controller HW interface (header)
+ */
+
+#ifndef __BNXT_QPLIB_RCFW_H__
+#define __BNXT_QPLIB_RCFW_H__
+
+#endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
new file mode 100644
index 0000000..178eebd
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
@@ -0,0 +1,37 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: QPLib resource manager
+ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
new file mode 100644
index 0000000..5fc4107
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
@@ -0,0 +1,42 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: QPLib resource manager (header)
+ */
+
+#ifndef __BNXT_QPLIB_RES_H__
+#define __BNXT_QPLIB_RES_H__
+
+#endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
new file mode 100644
index 0000000..667c8e1
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
@@ -0,0 +1,37 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators
+ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
new file mode 100644
index 0000000..90901c1
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
@@ -0,0 +1,43 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators (header)
+ *
+ */
+
+#ifndef __BNXT_QPLIB_SP_H__
+#define __BNXT_QPLIB_SP_H__
+
+#endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re.h b/drivers/infiniband/hw/bnxtre/bnxt_re.h
new file mode 100644
index 0000000..f9b8542
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re.h
@@ -0,0 +1,46 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Slow Path Operators (header)
+ *
+ */
+
+#ifndef __BNXT_RE_H__
+#define __BNXT_RE_H__
+#define ROCE_DRV_MODULE_NAME		"bnxt_re"
+#define ROCE_DRV_MODULE_VERSION		"1.0.0"
+
+#define BNXT_RE_DESC	"Broadcom NetXtreme-C/E RoCE Driver"
+#endif
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_hsi.h b/drivers/infiniband/hw/bnxtre/bnxt_re_hsi.h
new file mode 100644
index 0000000..b7d35aa
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_hsi.h
@@ -0,0 +1,42 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: RoCE HSI File - Autogenerated
+ */
+
+#ifndef __BNXT_RE_HSI_H__
+#define __BNXT_RE_HSI_H__
+
+#endif /* __BNXT_RE_HSI_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
new file mode 100644
index 0000000..c01fa40
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
@@ -0,0 +1,37 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: IB Verbs interpreter
+ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
new file mode 100644
index 0000000..9162774
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
@@ -0,0 +1,42 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: IB Verbs interpreter (header)
+ */
+
+#ifndef __BNXT_RE_IB_VERBS_H__
+#define __BNXT_RE_IB_VERBS_H__
+
+#endif /* __BNXT_RE_IB_VERBS_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
new file mode 100644
index 0000000..6c22d51
--- /dev/null
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -0,0 +1,116 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Main component of the bnxt_re driver
+ */
+
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include "bnxt_re.h"
+static char version[] =
+		BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
+
+MODULE_AUTHOR("Eddie Wai <eddie.wai@broadcom.com>");
+MODULE_DESCRIPTION(BNXT_RE_DESC " Driver");
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_VERSION(ROCE_DRV_MODULE_VERSION);
+
+/* globals */
+static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
+/* Mutex to protect the list of bnxt_re devices added */
+static DEFINE_MUTEX(bnxt_re_dev_lock);
+static struct workqueue_struct *bnxt_re_wq;
+/*
+ * "Notifier chain callback can be invoked for the same chain from
+ * different CPUs at the same time".
+ *
+ * For cases when the netdev is already present, our call to the
+ * register_netdevice_notifier() will actually get the rtnl_lock()
+ * before sending NETDEV_REGISTER and (if up) NETDEV_UP
+ * events.
+ *
+ * But for cases when the netdev is not already present, the notifier
+ * chain is subjected to be invoked from different CPUs simultaneously.
+ *
+ * This is protected by the netdev_mutex.
+ */
+static int bnxt_re_netdev_event(struct notifier_block *notifier,
+				unsigned long event, void *ptr)
+{
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block bnxt_re_netdev_notifier = {
+	.notifier_call = bnxt_re_netdev_event
+};
+
+static int __init bnxt_re_mod_init(void)
+{
+	int rc = 0;
+
+	pr_info("%s: %s", ROCE_DRV_MODULE_NAME, version);
+
+	bnxt_re_wq = create_singlethread_workqueue("bnxt_re");
+	if (!bnxt_re_wq)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&bnxt_re_dev_list);
+
+	rc = register_netdevice_notifier(&bnxt_re_netdev_notifier);
+	if (rc) {
+		pr_err("%s: Cannot register to netdevice_notifier",
+		       ROCE_DRV_MODULE_NAME);
+		goto err_netdev;
+	}
+	return 0;
+
+err_netdev:
+	destroy_workqueue(bnxt_re_wq);
+
+	return rc;
+}
+
+static void __exit bnxt_re_mod_exit(void)
+{
+	unregister_netdevice_notifier(&bnxt_re_netdev_notifier);
+	if (bnxt_re_wq)
+		destroy_workqueue(bnxt_re_wq);
+}
+
+module_init(bnxt_re_mod_init);
+module_exit(bnxt_re_mod_exit);
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 03/21] bnxt_re: register with the NIC driver
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

This patch handles the registration with the bnxt_en driver.
The bnxt_re driver first registers with netdev notifier chain and upon
receiving the NETDEV_REGISTER event, it registers with bnxt_en driver.

	1. bnxt_en's ulp_probe function returns a structure that contains
	   information 	about the device and additional entry points.
	2. bnxt_en driver returns 'struct bnxt_eth_dev' that contains set
	   of operation  vectors that bnxt_re driver invokes later.
	3. bnxt_request_msix() allows the bnxt_re driver to specify the
	   number of MSI-X vectors that are needed.
	4. bnxt_send_fw_msg () is used to send messages to the FW
	5. bnxt_register_async_events() is used to register for async
	   event callbacks.

v2: Remove some sparse warning. Also, remove some unused code from unreg
    path.
v3: Removed condition checks for rdev reported during static code analysis.
    Check the return value of try_module_get while getting bnxt_en
    reference.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_re.h      |  51 ++++
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c | 448 ++++++++++++++++++++++++++++
 2 files changed, 499 insertions(+)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re.h b/drivers/infiniband/hw/bnxtre/bnxt_re.h
index f9b8542..8b73e3d 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re.h
@@ -42,5 +42,56 @@
 #define ROCE_DRV_MODULE_NAME		"bnxt_re"
 #define ROCE_DRV_MODULE_VERSION		"1.0.0"
 
+#define BNXT_RE_REF_WAIT_COUNT		10
 #define BNXT_RE_DESC	"Broadcom NetXtreme-C/E RoCE Driver"
+
+struct bnxt_re_work {
+	struct work_struct	work;
+	unsigned long		event;
+	struct bnxt_re_dev      *rdev;
+	struct net_device	*vlan_dev;
+};
+
+#define BNXT_RE_MIN_MSIX		2
+#define BNXT_RE_MAX_MSIX		16
+struct bnxt_re_dev {
+	struct ib_device		ibdev;
+	struct list_head		list;
+	atomic_t			ref_count;
+	unsigned long			flags;
+#define BNXT_RE_FLAG_NETDEV_REGISTERED	0
+#define BNXT_RE_FLAG_IBDEV_REGISTERED	1
+#define BNXT_RE_FLAG_GOT_MSIX		2
+#define BNXT_RE_FLAG_RCFW_CHANNEL_EN	8
+#define BNXT_RE_FLAG_QOS_WORK_REG	16
+	struct net_device		*netdev;
+	unsigned int			version, major, minor;
+	struct bnxt_en_dev		*en_dev;
+	struct bnxt_msix_entry		msix_entries[BNXT_RE_MAX_MSIX];
+	int				num_msix;
+
+	int				id;
+
+	atomic_t			qp_count;
+	struct mutex			qp_lock;	/* protect qp list */
+	struct list_head		qp_list;
+
+	atomic_t			cq_count;
+	atomic_t			srq_count;
+	atomic_t			mr_count;
+	atomic_t			mw_count;
+	/* Max of 2 lossless traffic class supported per port */
+	u16				cosq[2];
+};
+
+#define to_bnxt_re_dev(ptr, member)	\
+	container_of((ptr), struct bnxt_re_dev, member)
+
+static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
+{
+	if (rdev)
+		return  &rdev->ibdev.dev;
+	return NULL;
+}
+
 #endif
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index 6c22d51..fbd2ad3 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -38,10 +38,24 @@
 
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/ethtool.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/rculist.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <net/ipv6.h>
+#include <net/addrconf.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+
+#include "bnxt_ulp.h"
+#include "bnxt_re_hsi.h"
 #include "bnxt_re.h"
+#include "bnxt.h"
 static char version[] =
 		BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
 
@@ -55,6 +69,384 @@ static struct list_head bnxt_re_dev_list = LIST_HEAD_INIT(bnxt_re_dev_list);
 /* Mutex to protect the list of bnxt_re devices added */
 static DEFINE_MUTEX(bnxt_re_dev_lock);
 static struct workqueue_struct *bnxt_re_wq;
+
+/* for handling bnxt_en callbacks later */
+static void bnxt_re_stop(void *p)
+{
+}
+
+static void bnxt_re_start(void *p)
+{
+}
+
+static void bnxt_re_sriov_config(void *p, int num_vfs)
+{
+}
+
+static struct bnxt_ulp_ops bnxt_re_ulp_ops = {
+	.ulp_async_notifier = NULL,
+	.ulp_stop = bnxt_re_stop,
+	.ulp_start = bnxt_re_start,
+	.ulp_sriov_config = bnxt_re_sriov_config
+};
+
+/* The rdev ref_count is to protect immature removal of the device */
+static inline void bnxt_re_hold(struct bnxt_re_dev *rdev)
+{
+	atomic_inc(&rdev->ref_count);
+}
+
+static inline void bnxt_re_put(struct bnxt_re_dev *rdev)
+{
+	atomic_dec(&rdev->ref_count);
+}
+
+/* RoCE -> Net driver */
+
+/* Driver registration routines used to let the networking driver (bnxt_en)
+ * to know that the RoCE driver is now installed
+ */
+static int bnxt_re_unregister_netdev(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+	struct bnxt_en_dev *en_dev;
+	int rc;
+
+	if (!rdev)
+		return -EINVAL;
+
+	en_dev = rdev->en_dev;
+	/* Acquire rtnl lock if it is not invokded from netdev event */
+	if (lock_wait)
+		rtnl_lock();
+
+	rc = en_dev->en_ops->bnxt_unregister_device(rdev->en_dev,
+						    BNXT_ROCE_ULP);
+	if (lock_wait)
+		rtnl_unlock();
+	return rc;
+}
+
+static int bnxt_re_register_netdev(struct bnxt_re_dev *rdev)
+{
+	struct bnxt_en_dev *en_dev;
+	int rc = 0;
+
+	if (!rdev)
+		return -EINVAL;
+
+	en_dev = rdev->en_dev;
+
+	rtnl_lock();
+	rc = en_dev->en_ops->bnxt_register_device(en_dev, BNXT_ROCE_ULP,
+						  &bnxt_re_ulp_ops, rdev);
+	rtnl_unlock();
+	return rc;
+}
+
+static int bnxt_re_free_msix(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+	struct bnxt_en_dev *en_dev;
+	int rc;
+
+	if (!rdev)
+		return -EINVAL;
+
+	en_dev = rdev->en_dev;
+
+	if (lock_wait)
+		rtnl_lock();
+
+	rc = en_dev->en_ops->bnxt_free_msix(rdev->en_dev, BNXT_ROCE_ULP);
+
+	if (lock_wait)
+		rtnl_unlock();
+	return rc;
+}
+
+static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
+{
+	int rc = 0, num_msix_want = BNXT_RE_MIN_MSIX, num_msix_got;
+	struct bnxt_en_dev *en_dev;
+
+	if (!rdev)
+		return -EINVAL;
+
+	en_dev = rdev->en_dev;
+
+	rtnl_lock();
+	num_msix_got = en_dev->en_ops->bnxt_request_msix(en_dev, BNXT_ROCE_ULP,
+							 rdev->msix_entries,
+							 num_msix_want);
+	if (num_msix_got < BNXT_RE_MIN_MSIX) {
+		rc = -EINVAL;
+		goto done;
+	}
+	if (num_msix_got != num_msix_want) {
+		dev_warn(rdev_to_dev(rdev),
+			 "Requested %d MSI-X vectors, got %d\n",
+			 num_msix_want, num_msix_got);
+	}
+	rdev->num_msix = num_msix_got;
+done:
+	rtnl_unlock();
+	return rc;
+}
+
+/* Device */
+
+static bool is_bnxt_re_dev(struct net_device *netdev)
+{
+	struct ethtool_drvinfo drvinfo;
+
+	if (netdev->ethtool_ops && netdev->ethtool_ops->get_drvinfo) {
+		memset(&drvinfo, 0, sizeof(drvinfo));
+		netdev->ethtool_ops->get_drvinfo(netdev, &drvinfo);
+
+		if (strcmp(drvinfo.driver, "bnxt_en"))
+			return false;
+		return true;
+	}
+	return false;
+}
+
+static struct bnxt_re_dev *bnxt_re_from_netdev(struct net_device *netdev)
+{
+	struct bnxt_re_dev *rdev;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(rdev, &bnxt_re_dev_list, list) {
+		if (rdev->netdev == netdev) {
+			rcu_read_unlock();
+			return rdev;
+		}
+	}
+	rcu_read_unlock();
+	return NULL;
+}
+
+static void bnxt_re_dev_unprobe(struct net_device *netdev,
+				struct bnxt_en_dev *en_dev)
+{
+	dev_put(netdev);
+	module_put(en_dev->pdev->driver->driver.owner);
+}
+
+static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
+{
+	struct bnxt *bp = netdev_priv(netdev);
+	struct bnxt_en_dev *en_dev;
+	struct pci_dev *pdev;
+
+	/* Call bnxt_en's RoCE probe via indirect API */
+	if (!bp->ulp_probe)
+		return ERR_PTR(-EINVAL);
+
+	en_dev = bp->ulp_probe(netdev);
+	if (IS_ERR(en_dev))
+		return en_dev;
+
+	pdev = en_dev->pdev;
+	if (!pdev)
+		return ERR_PTR(-EINVAL);
+
+	/* Bump net device reference count */
+	if (!try_module_get(pdev->driver->driver.owner))
+		return ERR_PTR(-ENODEV);
+
+	dev_hold(netdev);
+
+	return en_dev;
+}
+
+static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
+{
+	int i = BNXT_RE_REF_WAIT_COUNT;
+
+	/* Wait for rdev refcount to come down */
+	while ((atomic_read(&rdev->ref_count) > 1) && i--)
+		msleep(100);
+
+	if (atomic_read(&rdev->ref_count) > 1)
+		dev_err(rdev_to_dev(rdev),
+			"Failed waiting for ref count to deplete %d",
+			atomic_read(&rdev->ref_count));
+
+	atomic_set(&rdev->ref_count, 0);
+	dev_put(rdev->netdev);
+	rdev->netdev = NULL;
+
+	mutex_lock(&bnxt_re_dev_lock);
+	list_del_rcu(&rdev->list);
+	mutex_unlock(&bnxt_re_dev_lock);
+
+	synchronize_rcu();
+	flush_workqueue(bnxt_re_wq);
+
+	ib_dealloc_device(&rdev->ibdev);
+	/* rdev is gone */
+}
+
+static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
+					   struct bnxt_en_dev *en_dev)
+{
+	struct bnxt_re_dev *rdev;
+
+	/* Allocate bnxt_re_dev instance here */
+	rdev = (struct bnxt_re_dev *)ib_alloc_device(sizeof(*rdev));
+	if (!rdev) {
+		dev_err(NULL, "%s: bnxt_re_dev allocation failure!",
+			ROCE_DRV_MODULE_NAME);
+		return NULL;
+	}
+	/* Default values */
+	atomic_set(&rdev->ref_count, 0);
+	rdev->netdev = netdev;
+	dev_hold(rdev->netdev);
+	rdev->en_dev = en_dev;
+	rdev->id = rdev->en_dev->pdev->devfn;
+	INIT_LIST_HEAD(&rdev->qp_list);
+	mutex_init(&rdev->qp_lock);
+	atomic_set(&rdev->qp_count, 0);
+	atomic_set(&rdev->cq_count, 0);
+	atomic_set(&rdev->srq_count, 0);
+	atomic_set(&rdev->mr_count, 0);
+	atomic_set(&rdev->mw_count, 0);
+	rdev->cosq[0] = 0xFFFF;
+	rdev->cosq[1] = 0xFFFF;
+
+	mutex_lock(&bnxt_re_dev_lock);
+	list_add_tail_rcu(&rdev->list, &bnxt_re_dev_list);
+	mutex_unlock(&bnxt_re_dev_lock);
+	return rdev;
+}
+
+static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+	int rc;
+
+	if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
+		rc = bnxt_re_free_msix(rdev, lock_wait);
+		if (rc)
+			dev_warn(rdev_to_dev(rdev),
+				 "Failed to free MSI-X vectors: %#x", rc);
+	}
+	if (test_and_clear_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags)) {
+		rc = bnxt_re_unregister_netdev(rdev, lock_wait);
+		if (rc)
+			dev_warn(rdev_to_dev(rdev),
+				 "Failed to unregister with netdev: %#x", rc);
+	}
+}
+
+static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
+{
+	int i, j, rc;
+
+	/* Registered a new RoCE device instance to netdev */
+	rc = bnxt_re_register_netdev(rdev);
+	if (rc) {
+		pr_err("Failed to register with netedev: %#x\n", rc);
+		return -EINVAL;
+	}
+	set_bit(BNXT_RE_FLAG_NETDEV_REGISTERED, &rdev->flags);
+
+	rc = bnxt_re_request_msix(rdev);
+	if (rc) {
+		pr_err("Failed to get MSI-X vectors: %#x\n", rc);
+		rc = -EINVAL;
+		goto fail;
+	}
+	set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
+
+	return 0;
+fail:
+	bnxt_re_ib_unreg(rdev, true);
+	return rc;
+}
+
+static void bnxt_re_dev_unreg(struct bnxt_re_dev *rdev)
+{
+	struct bnxt_en_dev *en_dev = rdev->en_dev;
+	struct net_device *netdev = rdev->netdev;
+
+	bnxt_re_dev_remove(rdev);
+
+	if (netdev)
+		bnxt_re_dev_unprobe(netdev, en_dev);
+}
+
+static int bnxt_re_dev_reg(struct bnxt_re_dev **rdev, struct net_device *netdev)
+{
+	struct bnxt_en_dev *en_dev;
+	int rc = 0;
+
+	if (!is_bnxt_re_dev(netdev))
+		return -ENODEV;
+
+	en_dev = bnxt_re_dev_probe(netdev);
+	if (IS_ERR(en_dev)) {
+		pr_err("%s: Failed to probe\n", ROCE_DRV_MODULE_NAME);
+		rc = PTR_ERR(en_dev);
+		goto exit;
+	}
+	*rdev = bnxt_re_dev_add(netdev, en_dev);
+	if (!*rdev) {
+		rc = -ENOMEM;
+		bnxt_re_dev_unprobe(netdev, en_dev);
+		goto exit;
+	}
+	bnxt_re_hold(*rdev);
+exit:
+	return rc;
+}
+
+static void bnxt_re_remove_one(struct bnxt_re_dev *rdev)
+{
+	pci_dev_put(rdev->en_dev->pdev);
+}
+
+/* Handle all deferred netevents tasks */
+static void bnxt_re_task(struct work_struct *work)
+{
+	struct bnxt_re_work *re_work;
+	struct bnxt_re_dev *rdev;
+	int rc = 0;
+
+	re_work = container_of(work, struct bnxt_re_work, work);
+	rdev = re_work->rdev;
+
+	if (re_work->event != NETDEV_REGISTER &&
+	    !test_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags))
+		return;
+
+	switch (re_work->event) {
+	case NETDEV_REGISTER:
+		rc = bnxt_re_ib_reg(rdev);
+		if (rc)
+			dev_err(rdev_to_dev(rdev),
+				"Failed to register with IB: %#x", rc);
+		break;
+	case NETDEV_UP:
+
+		break;
+	case NETDEV_DOWN:
+
+		break;
+
+	case NETDEV_CHANGE:
+
+		break;
+	default:
+		break;
+	}
+	kfree(re_work);
+}
+
+static void bnxt_re_init_one(struct bnxt_re_dev *rdev)
+{
+	pci_dev_get(rdev->en_dev->pdev);
+}
+
 /*
  * "Notifier chain callback can be invoked for the same chain from
  * different CPUs at the same time".
@@ -72,6 +464,62 @@ static struct workqueue_struct *bnxt_re_wq;
 static int bnxt_re_netdev_event(struct notifier_block *notifier,
 				unsigned long event, void *ptr)
 {
+	struct net_device *real_dev, *netdev = netdev_notifier_info_to_dev(ptr);
+	struct bnxt_re_work *re_work;
+	struct bnxt_re_dev *rdev;
+	int rc = 0;
+
+	real_dev = rdma_vlan_dev_real_dev(netdev);
+	if (!real_dev)
+		real_dev = netdev;
+
+	rdev = bnxt_re_from_netdev(real_dev);
+	if (!rdev && event != NETDEV_REGISTER)
+		goto exit;
+	if (real_dev != netdev)
+		goto exit;
+
+	if (rdev)
+		bnxt_re_hold(rdev);
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		if (rdev)
+			break;
+		rc = bnxt_re_dev_reg(&rdev, real_dev);
+		if (rc == -ENODEV)
+			break;
+		if (rc) {
+			pr_err("Failed to register with the device %s: %#x\n",
+			       real_dev->name, rc);
+			break;
+		}
+		bnxt_re_init_one(rdev);
+		goto sch_work;
+
+	case NETDEV_UNREGISTER:
+		bnxt_re_ib_unreg(rdev, false);
+		bnxt_re_remove_one(rdev);
+		bnxt_re_dev_unreg(rdev);
+		break;
+
+	default:
+sch_work:
+		/* Allocate for the deferred task */
+		re_work = kzalloc(sizeof(*re_work), GFP_ATOMIC);
+		if (!re_work)
+			break;
+
+		re_work->rdev = rdev;
+		re_work->event = event;
+		re_work->vlan_dev = (real_dev == netdev ? NULL : netdev);
+		INIT_WORK(&re_work->work, bnxt_re_task);
+		queue_work(bnxt_re_wq, &re_work->work);
+		break;
+	}
+	if (rdev)
+		bnxt_re_put(rdev);
+exit:
 	return NOTIFY_DONE;
 }
 
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 04/21] bnxt_re: Enabling RoCE control path
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

This patch covers the basic initialization of the HW interface.
Implements some of the slow path FW commands required for the
HW intialization. It also handles registration with the IB stack.

v2: Fix some of the sparse warnings
v3: Removed some more smatch and sparse warnings related to endianness.
    Fixes cross compilation failure warnings. Also, fixes the retry logic
    to avoid system hangs in case of delay or no response for the FW commands.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c | 608 ++++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h | 189 +++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c  | 738 +++++++++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h  | 165 ++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c   | 163 ++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h   |  42 ++
 drivers/infiniband/hw/bnxtre/bnxt_re.h         |  15 +
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c    | 428 +++++++++++++-
 8 files changed, 2343 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
index 4029935..488c26f 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.c
@@ -35,3 +35,611 @@
  *
  * Description: RDMA Controller HW interface
  */
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/prefetch.h>
+#include "bnxt_re_hsi.h"
+#include "bnxt_qplib_res.h"
+#include "bnxt_qplib_rcfw.h"
+static void bnxt_qplib_service_creq(unsigned long data);
+
+/* Hardware communication channel */
+int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+	u16 cbit;
+	int rc;
+
+	cookie &= RCFW_MAX_COOKIE_VALUE;
+	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+	if (!test_bit(cbit, rcfw->cmdq_bitmap))
+		dev_warn(&rcfw->pdev->dev,
+			 "QPLIB: CMD bit %d for cookie 0x%x is not set?",
+			 cbit, cookie);
+
+	rc = wait_event_timeout(rcfw->waitq,
+				!test_bit(cbit, rcfw->cmdq_bitmap),
+				msecs_to_jiffies(RCFW_CMD_WAIT_TIME_MS));
+	if (!rc) {
+		dev_warn(&rcfw->pdev->dev,
+			 "QPLIB: Bono Error: timeout %d msec, msg {0x%x}\n",
+			 RCFW_CMD_WAIT_TIME_MS, cookie);
+	}
+
+	return rc;
+};
+
+int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie)
+{
+	u32 count = -1;
+	u16 cbit;
+
+	cookie &= RCFW_MAX_COOKIE_VALUE;
+	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+	if (!test_bit(cbit, rcfw->cmdq_bitmap))
+		goto done;
+	do {
+		bnxt_qplib_service_creq((unsigned long)rcfw);
+	} while (test_bit(cbit, rcfw->cmdq_bitmap) && --count);
+done:
+	return count;
+};
+
+void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+				   struct cmdq_base *req, void **crsbe,
+				   u8 is_block)
+{
+	struct bnxt_qplib_crsq *crsq = &rcfw->crsq;
+	struct bnxt_qplib_cmdqe *cmdqe, **cmdq_ptr;
+	struct bnxt_qplib_hwq *cmdq = &rcfw->cmdq;
+	struct bnxt_qplib_hwq *crsb = &rcfw->crsb;
+	struct bnxt_qplib_crsqe *crsqe = NULL;
+	struct bnxt_qplib_crsbe **crsb_ptr;
+	u32 sw_prod, cmdq_prod;
+	u8 retry_cnt = 0xFF;
+	dma_addr_t dma_addr;
+	unsigned long flags;
+	u32 size, opcode;
+	u16 cookie, cbit;
+	int pg, idx;
+	u8 *preq;
+
+retry:
+	opcode = req->opcode;
+	if (!test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
+	    (opcode != CMDQ_BASE_OPCODE_QUERY_FUNC &&
+	     opcode != CMDQ_BASE_OPCODE_INITIALIZE_FW)) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: RCFW not initialized, reject opcode 0x%x",
+			opcode);
+		return NULL;
+	}
+
+	if (test_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags) &&
+	    opcode == CMDQ_BASE_OPCODE_INITIALIZE_FW) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW already initialized!");
+		return NULL;
+	}
+
+	/* Cmdq are in 16-byte units, each request can consume 1 or more
+	 * cmdqe
+	 */
+	spin_lock_irqsave(&cmdq->lock, flags);
+	if (req->cmd_size > cmdq->max_elements -
+	    ((HWQ_CMP(cmdq->prod, cmdq) - HWQ_CMP(cmdq->cons, cmdq)) &
+	     (cmdq->max_elements - 1))) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: RCFW: CMDQ is full!");
+		spin_unlock_irqrestore(&cmdq->lock, flags);
+
+		if (!retry_cnt--)
+			return NULL;
+		goto retry;
+	}
+
+	retry_cnt = 0xFF;
+
+	cookie = atomic_inc_return(&rcfw->seq_num) & RCFW_MAX_COOKIE_VALUE;
+	cbit = cookie % RCFW_MAX_OUTSTANDING_CMD;
+	if (is_block)
+		cookie |= RCFW_CMD_IS_BLOCKING;
+	req->cookie = cpu_to_le16(cookie);
+	if (test_and_set_bit(cbit, rcfw->cmdq_bitmap)) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: RCFW MAX outstanding cmd reached!");
+		atomic_dec(&rcfw->seq_num);
+		spin_unlock_irqrestore(&cmdq->lock, flags);
+
+		if (!retry_cnt--)
+			return NULL;
+		goto retry;
+	}
+	/* Reserve a resp buffer slot if requested */
+	if (req->resp_size && crsbe) {
+		spin_lock(&crsb->lock);
+		sw_prod = HWQ_CMP(crsb->prod, crsb);
+		crsb_ptr = (struct bnxt_qplib_crsbe **)crsb->pbl_ptr;
+		*crsbe = (void *)&crsb_ptr[get_crsb_pg(sw_prod)]
+					  [get_crsb_idx(sw_prod)];
+		bnxt_qplib_crsb_dma_next(crsb->pbl_dma_ptr, sw_prod, &dma_addr);
+		req->resp_addr = cpu_to_le64(dma_addr);
+		crsb->prod++;
+		spin_unlock(&crsb->lock);
+
+		req->resp_size = (sizeof(struct bnxt_qplib_crsbe) +
+				  BNXT_QPLIB_CMDQE_UNITS - 1) /
+				 BNXT_QPLIB_CMDQE_UNITS;
+	}
+	cmdq_ptr = (struct bnxt_qplib_cmdqe **)cmdq->pbl_ptr;
+	preq = (u8 *)req;
+	size = req->cmd_size * BNXT_QPLIB_CMDQE_UNITS;
+	do {
+		pg = 0;
+		idx = 0;
+
+		/* Locate the next cmdq slot */
+		sw_prod = HWQ_CMP(cmdq->prod, cmdq);
+		cmdqe = &cmdq_ptr[get_cmdq_pg(sw_prod)][get_cmdq_idx(sw_prod)];
+		if (!cmdqe) {
+			dev_err(&rcfw->pdev->dev,
+				"QPLIB: RCFW request failed with no cmdqe!");
+			goto done;
+		}
+		/* Copy a segment of the req cmd to the cmdq */
+		memset(cmdqe, 0, sizeof(*cmdqe));
+		memcpy(cmdqe, preq, min_t(u32, size, sizeof(*cmdqe)));
+		preq += min_t(u32, size, sizeof(*cmdqe));
+		size -= min_t(u32, size, sizeof(*cmdqe));
+		cmdq->prod++;
+	} while (size > 0);
+
+	cmdq_prod = cmdq->prod;
+	if (rcfw->flags & FIRMWARE_FIRST_FLAG) {
+		/* The very first doorbell write is required to set this flag
+		 * which prompts the FW to reset its internal pointers
+		 */
+		cmdq_prod |= FIRMWARE_FIRST_FLAG;
+		rcfw->flags &= ~FIRMWARE_FIRST_FLAG;
+	}
+	sw_prod = HWQ_CMP(crsq->prod, crsq);
+	crsqe = &crsq->crsq[sw_prod];
+	memset(crsqe, 0, sizeof(*crsqe));
+	crsq->prod++;
+	crsqe->req_size = req->cmd_size;
+
+	/* ring CMDQ DB */
+	writel(cmdq_prod, rcfw->cmdq_bar_reg_iomem +
+	       rcfw->cmdq_bar_reg_prod_off);
+	writel(RCFW_CMDQ_TRIG_VAL, rcfw->cmdq_bar_reg_iomem +
+	       rcfw->cmdq_bar_reg_trig_off);
+done:
+	spin_unlock_irqrestore(&cmdq->lock, flags);
+	/* Return the CREQ response pointer */
+	return crsqe ? &crsqe->qp_event : NULL;
+}
+
+/* Completions */
+static int bnxt_qplib_process_func_event(struct bnxt_qplib_rcfw *rcfw,
+					 struct creq_func_event *func_event)
+{
+	switch (func_event->event) {
+	case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+		/* SRQ ctx error, call srq_handler??
+		 * But there's no SRQ handle!
+		 */
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_VF_COMM_REQUEST:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_RESOURCE_EXHAUSTED:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* SP - CREQ Completion handlers */
+static void bnxt_qplib_service_creq(unsigned long data)
+{
+	struct bnxt_qplib_rcfw *rcfw = (struct bnxt_qplib_rcfw *)data;
+	struct bnxt_qplib_hwq *creq = &rcfw->creq;
+	struct creq_base *creqe, **creq_ptr;
+	u32 sw_cons, raw_cons;
+	unsigned long flags;
+	u32 type;
+
+	/* Service the CREQ until empty */
+	spin_lock_irqsave(&creq->lock, flags);
+	raw_cons = creq->cons;
+	while (1) {
+		sw_cons = HWQ_CMP(raw_cons, creq);
+		creq_ptr = (struct creq_base **)creq->pbl_ptr;
+		creqe = &creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)];
+		if (!CREQ_CMP_VALID(creqe, raw_cons, creq->max_elements))
+			break;
+
+		type = creqe->type & CREQ_BASE_TYPE_MASK;
+		switch (type) {
+		case CREQ_BASE_TYPE_QP_EVENT:
+			break;
+		case CREQ_BASE_TYPE_FUNC_EVENT:
+			if (!bnxt_qplib_process_func_event
+			    (rcfw, (struct creq_func_event *)creqe))
+				rcfw->creq_func_event_processed++;
+			else
+				dev_warn
+				(&rcfw->pdev->dev, "QPLIB:aeqe:%#x Not handled",
+				 type);
+			break;
+		default:
+			dev_warn(&rcfw->pdev->dev, "QPLIB: creqe with ");
+			dev_warn(&rcfw->pdev->dev,
+				 "QPLIB: op_event = 0x%x not handled", type);
+			break;
+		}
+		raw_cons++;
+	}
+	if (creq->cons != raw_cons) {
+		creq->cons = raw_cons;
+		CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, raw_cons,
+			      creq->max_elements);
+	}
+	spin_unlock_irqrestore(&creq->lock, flags);
+}
+
+static irqreturn_t bnxt_qplib_creq_irq(int irq, void *dev_instance)
+{
+	struct bnxt_qplib_rcfw *rcfw = dev_instance;
+	struct bnxt_qplib_hwq *creq = &rcfw->creq;
+	struct creq_base **creq_ptr;
+	u32 sw_cons;
+
+	/* Prefetch the CREQ element */
+	sw_cons = HWQ_CMP(creq->cons, creq);
+	creq_ptr = (struct creq_base **)rcfw->creq.pbl_ptr;
+	prefetch(&creq_ptr[get_creq_pg(sw_cons)][get_creq_idx(sw_cons)]);
+
+	tasklet_schedule(&rcfw->worker);
+
+	return IRQ_HANDLED;
+}
+
+/* RCFW */
+int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw)
+{
+	struct creq_deinitialize_fw_resp *resp;
+	struct cmdq_deinitialize_fw req;
+	u16 cmd_flags = 0;
+
+	RCFW_CMD_PREP(req, DEINITIALIZE_FW, cmd_flags);
+	resp = (struct creq_deinitialize_fw_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp)
+		return -EINVAL;
+
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie)))
+		return -ETIMEDOUT;
+
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie))
+		return -EFAULT;
+
+	clear_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+	return 0;
+}
+
+static int __get_pbl_pg_idx(struct bnxt_qplib_pbl *pbl)
+{
+	return (pbl->pg_size == ROCE_PG_SIZE_4K ?
+				      CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K :
+		pbl->pg_size == ROCE_PG_SIZE_8K ?
+				      CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8K :
+		pbl->pg_size == ROCE_PG_SIZE_64K ?
+				      CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_64K :
+		pbl->pg_size == ROCE_PG_SIZE_2M ?
+				      CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_2M :
+		pbl->pg_size == ROCE_PG_SIZE_8M ?
+				      CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_8M :
+		pbl->pg_size == ROCE_PG_SIZE_1G ?
+				      CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_1G :
+				      CMDQ_INITIALIZE_FW_QPC_PG_SIZE_PG_4K);
+}
+
+int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
+			 struct bnxt_qplib_ctx *ctx, int is_virtfn)
+{
+	struct creq_initialize_fw_resp *resp;
+	struct cmdq_initialize_fw req;
+	u16 cmd_flags = 0, level;
+
+	RCFW_CMD_PREP(req, INITIALIZE_FW, cmd_flags);
+
+	/*
+	 * VFs need not setup the HW context area, PF
+	 * shall setup this area for VF. Skipping the
+	 * HW programming
+	 */
+	if (is_virtfn)
+		goto skip_ctx_setup;
+
+	level = ctx->qpc_tbl.level;
+	req.qpc_pg_size_qpc_lvl = (level << CMDQ_INITIALIZE_FW_QPC_LVL_SFT) |
+				__get_pbl_pg_idx(&ctx->qpc_tbl.pbl[level]);
+	level = ctx->mrw_tbl.level;
+	req.mrw_pg_size_mrw_lvl = (level << CMDQ_INITIALIZE_FW_MRW_LVL_SFT) |
+				__get_pbl_pg_idx(&ctx->mrw_tbl.pbl[level]);
+	level = ctx->srqc_tbl.level;
+	req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
+				__get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
+	level = ctx->cq_tbl.level;
+	req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
+				__get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
+	level = ctx->srqc_tbl.level;
+	req.srq_pg_size_srq_lvl = (level << CMDQ_INITIALIZE_FW_SRQ_LVL_SFT) |
+				__get_pbl_pg_idx(&ctx->srqc_tbl.pbl[level]);
+	level = ctx->cq_tbl.level;
+	req.cq_pg_size_cq_lvl = (level << CMDQ_INITIALIZE_FW_CQ_LVL_SFT) |
+				__get_pbl_pg_idx(&ctx->cq_tbl.pbl[level]);
+	level = ctx->tim_tbl.level;
+	req.tim_pg_size_tim_lvl = (level << CMDQ_INITIALIZE_FW_TIM_LVL_SFT) |
+				  __get_pbl_pg_idx(&ctx->tim_tbl.pbl[level]);
+	level = ctx->tqm_pde_level;
+	req.tqm_pg_size_tqm_lvl = (level << CMDQ_INITIALIZE_FW_TQM_LVL_SFT) |
+				  __get_pbl_pg_idx(&ctx->tqm_pde.pbl[level]);
+
+	req.qpc_page_dir =
+		cpu_to_le64(ctx->qpc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+	req.mrw_page_dir =
+		cpu_to_le64(ctx->mrw_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+	req.srq_page_dir =
+		cpu_to_le64(ctx->srqc_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+	req.cq_page_dir =
+		cpu_to_le64(ctx->cq_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+	req.tim_page_dir =
+		cpu_to_le64(ctx->tim_tbl.pbl[PBL_LVL_0].pg_map_arr[0]);
+	req.tqm_page_dir =
+		cpu_to_le64(ctx->tqm_pde.pbl[PBL_LVL_0].pg_map_arr[0]);
+
+	req.number_of_qp = cpu_to_le32(ctx->qpc_tbl.max_elements);
+	req.number_of_mrw = cpu_to_le32(ctx->mrw_tbl.max_elements);
+	req.number_of_srq = cpu_to_le32(ctx->srqc_tbl.max_elements);
+	req.number_of_cq = cpu_to_le32(ctx->cq_tbl.max_elements);
+
+	req.max_qp_per_vf = cpu_to_le32(ctx->vf_res.max_qp_per_vf);
+	req.max_mrw_per_vf = cpu_to_le32(ctx->vf_res.max_mrw_per_vf);
+	req.max_srq_per_vf = cpu_to_le32(ctx->vf_res.max_srq_per_vf);
+	req.max_cq_per_vf = cpu_to_le32(ctx->vf_res.max_cq_per_vf);
+	req.max_gid_per_vf = cpu_to_le32(ctx->vf_res.max_gid_per_vf);
+
+skip_ctx_setup:
+	req.stat_ctx_id = cpu_to_le32(ctx->stats.fw_id);
+	resp = (struct creq_initialize_fw_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: RCFW: INITIALIZE_FW send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: RCFW: INITIALIZE_FW timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: RCFW: INITIALIZE_FW failed");
+		return -EINVAL;
+	}
+	set_bit(FIRMWARE_INITIALIZED_FLAG, &rcfw->flags);
+	return 0;
+}
+
+void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->crsb);
+	kfree(rcfw->crsq.crsq);
+	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->cmdq);
+	bnxt_qplib_free_hwq(rcfw->pdev, &rcfw->creq);
+
+	rcfw->pdev = NULL;
+}
+
+int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+				  struct bnxt_qplib_rcfw *rcfw)
+{
+	rcfw->pdev = pdev;
+	rcfw->creq.max_elements = BNXT_QPLIB_CREQE_MAX_CNT;
+	if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->creq, NULL, 0,
+				      &rcfw->creq.max_elements,
+				      BNXT_QPLIB_CREQE_UNITS, 0, PAGE_SIZE,
+				      HWQ_TYPE_L2_CMPL)) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: HW channel CREQ allocation failed");
+		goto fail;
+	}
+	rcfw->cmdq.max_elements = BNXT_QPLIB_CMDQE_MAX_CNT;
+	if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->cmdq, NULL, 0,
+				      &rcfw->cmdq.max_elements,
+				      BNXT_QPLIB_CMDQE_UNITS, 0, PAGE_SIZE,
+				      HWQ_TYPE_CTX)) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: HW channel CMDQ allocation failed");
+		goto fail;
+	}
+
+	rcfw->crsq.max_elements = rcfw->cmdq.max_elements;
+	rcfw->crsq.crsq = kcalloc(rcfw->crsq.max_elements,
+				  sizeof(*rcfw->crsq.crsq), GFP_KERNEL);
+	if (!rcfw->crsq.crsq)
+		goto fail;
+
+	rcfw->crsb.max_elements = BNXT_QPLIB_CRSBE_MAX_CNT;
+	if (bnxt_qplib_alloc_init_hwq(rcfw->pdev, &rcfw->crsb, NULL, 0,
+				      &rcfw->crsb.max_elements,
+				      BNXT_QPLIB_CRSBE_UNITS, 0, PAGE_SIZE,
+				      HWQ_TYPE_CTX)) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: HW channel CRSB allocation failed");
+		goto fail;
+	}
+	return 0;
+
+fail:
+	bnxt_qplib_free_rcfw_channel(rcfw);
+	return -ENOMEM;
+}
+
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw)
+{
+	unsigned long indx;
+
+	/* Make sure the HW channel is stopped! */
+	synchronize_irq(rcfw->vector);
+	tasklet_disable(&rcfw->worker);
+	tasklet_kill(&rcfw->worker);
+
+	if (rcfw->requested) {
+		free_irq(rcfw->vector, rcfw);
+		rcfw->requested = false;
+	}
+	if (rcfw->cmdq_bar_reg_iomem)
+		iounmap(rcfw->cmdq_bar_reg_iomem);
+	rcfw->cmdq_bar_reg_iomem = NULL;
+
+	if (rcfw->creq_bar_reg_iomem)
+		iounmap(rcfw->creq_bar_reg_iomem);
+	rcfw->creq_bar_reg_iomem = NULL;
+
+	indx = find_first_bit(rcfw->cmdq_bitmap, rcfw->bmap_size);
+	if (indx != rcfw->bmap_size)
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: disabling RCFW with pending cmd-bit %lx", indx);
+	kfree(rcfw->cmdq_bitmap);
+	rcfw->bmap_size = 0;
+
+	rcfw->aeq_handler = NULL;
+	rcfw->vector = 0;
+}
+
+int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
+				   struct bnxt_qplib_rcfw *rcfw,
+				   int msix_vector,
+				   int cp_bar_reg_off, int virt_fn,
+				   int (*aeq_handler)(struct bnxt_qplib_rcfw *,
+						      struct creq_func_event *))
+{
+	resource_size_t res_base;
+	struct cmdq_init init;
+	u16 bmap_size;
+	int rc;
+
+	/* General */
+	atomic_set(&rcfw->seq_num, 0);
+	rcfw->flags = FIRMWARE_FIRST_FLAG;
+	bmap_size = BITS_TO_LONGS(RCFW_MAX_OUTSTANDING_CMD *
+				  sizeof(unsigned long));
+	rcfw->cmdq_bitmap = kzalloc(bmap_size, GFP_KERNEL);
+	if (!rcfw->cmdq_bitmap)
+		return -ENOMEM;
+	rcfw->bmap_size = bmap_size;
+
+	/* CMDQ */
+	rcfw->cmdq_bar_reg = RCFW_COMM_PCI_BAR_REGION;
+	res_base = pci_resource_start(pdev, rcfw->cmdq_bar_reg);
+	if (!res_base)
+		return -ENOMEM;
+
+	rcfw->cmdq_bar_reg_iomem = ioremap_nocache(res_base +
+					      RCFW_COMM_BASE_OFFSET,
+					      RCFW_COMM_SIZE);
+	if (!rcfw->cmdq_bar_reg_iomem) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: CMDQ BAR region %d mapping failed",
+			rcfw->cmdq_bar_reg);
+		return -ENOMEM;
+	}
+
+	rcfw->cmdq_bar_reg_prod_off = virt_fn ? RCFW_VF_COMM_PROD_OFFSET :
+					RCFW_PF_COMM_PROD_OFFSET;
+
+	rcfw->cmdq_bar_reg_trig_off = RCFW_COMM_TRIG_OFFSET;
+
+	/* CRSQ */
+	rcfw->crsq.prod = 0;
+	rcfw->crsq.cons = 0;
+
+	/* CREQ */
+	rcfw->creq_bar_reg = RCFW_COMM_CONS_PCI_BAR_REGION;
+	res_base = pci_resource_start(pdev, rcfw->creq_bar_reg);
+	if (!res_base)
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: CREQ BAR region %d resc start is 0!",
+			rcfw->creq_bar_reg);
+	rcfw->creq_bar_reg_iomem = ioremap_nocache(res_base + cp_bar_reg_off,
+						   4);
+	if (!rcfw->creq_bar_reg_iomem) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: CREQ BAR region %d mapping failed",
+			rcfw->creq_bar_reg);
+		return -ENOMEM;
+	}
+	rcfw->creq_qp_event_processed = 0;
+	rcfw->creq_func_event_processed = 0;
+
+	rcfw->vector = msix_vector;
+	if (aeq_handler)
+		rcfw->aeq_handler = aeq_handler;
+
+	tasklet_init(&rcfw->worker, bnxt_qplib_service_creq,
+		     (unsigned long)rcfw);
+
+	rcfw->requested = false;
+	rc = request_irq(rcfw->vector, bnxt_qplib_creq_irq, 0,
+			 "bnxt_qplib_creq", rcfw);
+	if (rc) {
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: Failed to request IRQ for CREQ rc = 0x%x", rc);
+		bnxt_qplib_disable_rcfw_channel(rcfw);
+		return rc;
+	}
+	rcfw->requested = true;
+
+	init_waitqueue_head(&rcfw->waitq);
+
+	CREQ_DB_REARM(rcfw->creq_bar_reg_iomem, 0, rcfw->creq.max_elements);
+
+	init.cmdq_pbl = cpu_to_le64(rcfw->cmdq.pbl[PBL_LVL_0].pg_map_arr[0]);
+	init.cmdq_size_cmdq_lvl = cpu_to_le16(
+		((BNXT_QPLIB_CMDQE_MAX_CNT << CMDQ_INIT_CMDQ_SIZE_SFT) &
+		 CMDQ_INIT_CMDQ_SIZE_MASK) |
+		((rcfw->cmdq.level << CMDQ_INIT_CMDQ_LVL_SFT) &
+		 CMDQ_INIT_CMDQ_LVL_MASK));
+	init.creq_ring_id = cpu_to_le16(rcfw->creq_ring_id);
+
+	/* Write to the Bono mailbox register */
+	__iowrite32_copy(rcfw->cmdq_bar_reg_iomem, &init, sizeof(init) / 4);
+	return 0;
+}
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h
index 1f63956..b1e114e 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_rcfw.h
@@ -39,4 +39,193 @@
 #ifndef __BNXT_QPLIB_RCFW_H__
 #define __BNXT_QPLIB_RCFW_H__
 
+#define RCFW_CMDQ_TRIG_VAL		1
+#define RCFW_COMM_PCI_BAR_REGION	0
+#define RCFW_COMM_CONS_PCI_BAR_REGION	2
+#define RCFW_COMM_BASE_OFFSET		0x600
+#define RCFW_PF_COMM_PROD_OFFSET		0x7c
+#define RCFW_VF_COMM_PROD_OFFSET	0xc
+#define RCFW_COMM_TRIG_OFFSET		0x100
+#define RCFW_COMM_SIZE			0x104
+
+#define RCFW_DBR_PCI_BAR_REGION		2
+
+#define RCFW_CMD_PREP(req, CMD, cmd_flags)				\
+	do {								\
+		memset(&(req), 0, sizeof((req)));			\
+		(req).opcode = CMDQ_BASE_OPCODE_##CMD;			\
+		(req).cmd_size = (sizeof((req)) +			\
+				BNXT_QPLIB_CMDQE_UNITS - 1) /		\
+				BNXT_QPLIB_CMDQE_UNITS;			\
+		(req).flags = cpu_to_le16(cmd_flags);			\
+	} while (0)
+
+#define RCFW_CMD_WAIT_TIME_MS		20000 /* 20 Seconds timeout */
+
+/* CMDQ elements */
+#define BNXT_QPLIB_CMDQE_MAX_CNT	256
+#define BNXT_QPLIB_CMDQE_UNITS		sizeof(struct bnxt_qplib_cmdqe)
+#define BNXT_QPLIB_CMDQE_CNT_PER_PG	(PAGE_SIZE / BNXT_QPLIB_CMDQE_UNITS)
+
+#define MAX_CMDQ_IDX			(BNXT_QPLIB_CMDQE_MAX_CNT - 1)
+#define MAX_CMDQ_IDX_PER_PG		(BNXT_QPLIB_CMDQE_CNT_PER_PG - 1)
+
+#define RCFW_MAX_OUTSTANDING_CMD	BNXT_QPLIB_CMDQE_MAX_CNT
+#define RCFW_MAX_COOKIE_VALUE		0x7FFF
+#define RCFW_CMD_IS_BLOCKING		0x8000
+
+/* Cmdq contains a fix number of a 16-Byte slots */
+struct bnxt_qplib_cmdqe {
+	u8		data[16];
+};
+
+static inline u32 get_cmdq_pg(u32 val)
+{
+	return (val & ~MAX_CMDQ_IDX_PER_PG) / BNXT_QPLIB_CMDQE_CNT_PER_PG;
+}
+
+static inline u32 get_cmdq_idx(u32 val)
+{
+	return val & MAX_CMDQ_IDX_PER_PG;
+}
+
+/* Crsq buf is 1024-Byte */
+struct bnxt_qplib_crsbe {
+	u8			data[1024];
+};
+
+/* CRSQ SB */
+#define BNXT_QPLIB_CRSBE_MAX_CNT	4
+#define BNXT_QPLIB_CRSBE_UNITS		sizeof(struct bnxt_qplib_crsbe)
+#define BNXT_QPLIB_CRSBE_CNT_PER_PG	(PAGE_SIZE / BNXT_QPLIB_CRSBE_UNITS)
+
+#define MAX_CRSB_IDX			(BNXT_QPLIB_CRSBE_MAX_CNT - 1)
+#define MAX_CRSB_IDX_PER_PG		(BNXT_QPLIB_CRSBE_CNT_PER_PG - 1)
+
+static inline u32 get_crsb_pg(u32 val)
+{
+	return (val & ~MAX_CRSB_IDX_PER_PG) / BNXT_QPLIB_CRSBE_CNT_PER_PG;
+}
+
+static inline u32 get_crsb_idx(u32 val)
+{
+	return val & MAX_CRSB_IDX_PER_PG;
+}
+
+static inline void bnxt_qplib_crsb_dma_next(dma_addr_t *pg_map_arr,
+					    u32 prod, dma_addr_t *dma_addr)
+{
+		*dma_addr = pg_map_arr[(prod) / BNXT_QPLIB_CRSBE_CNT_PER_PG];
+		*dma_addr += ((prod) % BNXT_QPLIB_CRSBE_CNT_PER_PG) *
+			      BNXT_QPLIB_CRSBE_UNITS;
+}
+
+/* CREQ */
+/* Allocate 1 per QP for async error notification for now */
+#define BNXT_QPLIB_CREQE_MAX_CNT	(64 * 1024)
+#define BNXT_QPLIB_CREQE_UNITS		16	/* 16-Bytes per prod unit */
+#define BNXT_QPLIB_CREQE_CNT_PER_PG	(PAGE_SIZE / BNXT_QPLIB_CREQE_UNITS)
+
+#define MAX_CREQ_IDX			(BNXT_QPLIB_CREQE_MAX_CNT - 1)
+#define MAX_CREQ_IDX_PER_PG		(BNXT_QPLIB_CREQE_CNT_PER_PG - 1)
+
+static inline u32 get_creq_pg(u32 val)
+{
+	return (val & ~MAX_CREQ_IDX_PER_PG) / BNXT_QPLIB_CREQE_CNT_PER_PG;
+}
+
+static inline u32 get_creq_idx(u32 val)
+{
+	return val & MAX_CREQ_IDX_PER_PG;
+}
+
+#define BNXT_QPLIB_CREQE_PER_PG	(PAGE_SIZE / sizeof(struct creq_base))
+
+#define CREQ_CMP_VALID(hdr, raw_cons, cp_bit)			\
+	(!!((hdr)->v & CREQ_BASE_V) ==				\
+	   !((raw_cons) & (cp_bit)))
+
+#define CREQ_DB_KEY_CP			(0x2 << CMPL_DOORBELL_KEY_SFT)
+#define CREQ_DB_IDX_VALID		CMPL_DOORBELL_IDX_VALID
+#define CREQ_DB_IRQ_DIS			CMPL_DOORBELL_MASK
+#define CREQ_DB_CP_FLAGS_REARM		(CREQ_DB_KEY_CP |	\
+					 CREQ_DB_IDX_VALID)
+#define CREQ_DB_CP_FLAGS		(CREQ_DB_KEY_CP |	\
+					 CREQ_DB_IDX_VALID |	\
+					 CREQ_DB_IRQ_DIS)
+#define CREQ_DB_REARM(db, raw_cons, cp_bit)			\
+	writel(CREQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
+#define CREQ_DB(db, raw_cons, cp_bit)				\
+	writel(CREQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
+
+/* HWQ */
+struct bnxt_qplib_crsqe {
+	struct creq_qp_event	qp_event;
+	u32			req_size;
+};
+
+struct bnxt_qplib_crsq {
+	struct bnxt_qplib_crsqe	*crsq;
+	u32			prod;
+	u32			cons;
+	u32			max_elements;
+};
+
+/* RCFW Communication Channels */
+struct bnxt_qplib_rcfw {
+	struct pci_dev		*pdev;
+	int			vector;
+	struct tasklet_struct	worker;
+	bool			requested;
+	unsigned long		*cmdq_bitmap;
+	u32			bmap_size;
+	unsigned long		flags;
+#define FIRMWARE_INITIALIZED_FLAG	1
+#define FIRMWARE_FIRST_FLAG		BIT(31)
+	wait_queue_head_t	waitq;
+	int			(*aeq_handler)(struct bnxt_qplib_rcfw *,
+					       struct creq_func_event *);
+	atomic_t		seq_num;
+
+	/* Bar region info */
+	void __iomem		*cmdq_bar_reg_iomem;
+	u16			cmdq_bar_reg;
+	u16			cmdq_bar_reg_prod_off;
+	u16			cmdq_bar_reg_trig_off;
+	u16			creq_ring_id;
+	u16			creq_bar_reg;
+	void __iomem		*creq_bar_reg_iomem;
+
+	/* Cmd-Resp and Async Event notification queue */
+	struct bnxt_qplib_hwq	creq;
+	u64			creq_qp_event_processed;
+	u64			creq_func_event_processed;
+
+	/* Actual Cmd and Resp Queues */
+	struct bnxt_qplib_hwq	cmdq;
+	struct bnxt_qplib_crsq	crsq;
+	struct bnxt_qplib_hwq	crsb;
+};
+
+void bnxt_qplib_free_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_alloc_rcfw_channel(struct pci_dev *pdev,
+				  struct bnxt_qplib_rcfw *rcfw);
+void bnxt_qplib_disable_rcfw_channel(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_enable_rcfw_channel(struct pci_dev *pdev,
+				   struct bnxt_qplib_rcfw *rcfw,
+				   int msix_vector,
+				   int cp_bar_reg_off, int virt_fn,
+				   int (*aeq_handler)
+					(struct bnxt_qplib_rcfw *,
+					 struct creq_func_event *));
+
+int bnxt_qplib_rcfw_block_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
+int bnxt_qplib_rcfw_wait_for_resp(struct bnxt_qplib_rcfw *rcfw, u16 cookie);
+void *bnxt_qplib_rcfw_send_message(struct bnxt_qplib_rcfw *rcfw,
+				   struct cmdq_base *req, void **crsbe,
+				   u8 is_block);
+
+int bnxt_qplib_deinit_rcfw(struct bnxt_qplib_rcfw *rcfw);
+int bnxt_qplib_init_rcfw(struct bnxt_qplib_rcfw *rcfw,
+			 struct bnxt_qplib_ctx *ctx, int is_virtfn);
 #endif /* __BNXT_QPLIB_RCFW_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
index 178eebd..eaaf67e 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
@@ -35,3 +35,741 @@
  *
  * Description: QPLib resource manager
  */
+
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/inetdevice.h>
+#include <linux/dma-mapping.h>
+#include <linux/if_vlan.h>
+#include "bnxt_re_hsi.h"
+#include "bnxt_qplib_res.h"
+#include "bnxt_qplib_sp.h"
+#include "bnxt_qplib_rcfw.h"
+
+static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+				      struct bnxt_qplib_stats *stats);
+static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+				      struct bnxt_qplib_stats *stats);
+
+/* PBL */
+static void __free_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
+		       bool is_umem)
+{
+	int i;
+
+	if (!is_umem) {
+		for (i = 0; i < pbl->pg_count; i++) {
+			if (pbl->pg_arr[i])
+				dma_free_coherent(&pdev->dev, pbl->pg_size,
+						  (void *)((unsigned long)
+						   pbl->pg_arr[i] &
+						  PAGE_MASK),
+						  pbl->pg_map_arr[i]);
+			else
+				dev_warn(&pdev->dev,
+					 "QPLIB: PBL free pg_arr[%d] empty?!",
+					 i);
+			pbl->pg_arr[i] = NULL;
+		}
+	}
+	kfree(pbl->pg_arr);
+	pbl->pg_arr = NULL;
+	kfree(pbl->pg_map_arr);
+	pbl->pg_map_arr = NULL;
+	pbl->pg_count = 0;
+	pbl->pg_size = 0;
+}
+
+static int __alloc_pbl(struct pci_dev *pdev, struct bnxt_qplib_pbl *pbl,
+		       struct scatterlist *sghead, u32 pages, u32 pg_size)
+{
+	struct scatterlist *sg;
+	bool is_umem = false;
+	int i;
+
+	/* page ptr arrays */
+	pbl->pg_arr = kcalloc(pages, sizeof(void *), GFP_KERNEL);
+	if (!pbl->pg_arr)
+		return -ENOMEM;
+
+	pbl->pg_map_arr = kcalloc(pages, sizeof(dma_addr_t), GFP_KERNEL);
+	if (!pbl->pg_map_arr) {
+		kfree(pbl->pg_arr);
+		pbl->pg_arr = NULL;
+		return -ENOMEM;
+	}
+	pbl->pg_count = 0;
+	pbl->pg_size = pg_size;
+
+	if (!sghead) {
+		for (i = 0; i < pages; i++) {
+			pbl->pg_arr[i] = dma_alloc_coherent(&pdev->dev,
+							    pbl->pg_size,
+							    &pbl->pg_map_arr[i],
+							    GFP_KERNEL);
+			if (!pbl->pg_arr[i])
+				goto fail;
+			memset(pbl->pg_arr[i], 0, pbl->pg_size);
+			pbl->pg_count++;
+		}
+	} else {
+		i = 0;
+		is_umem = true;
+		for_each_sg(sghead, sg, pages, i) {
+			pbl->pg_map_arr[i] = sg_dma_address(sg);
+			pbl->pg_arr[i] = sg_virt(sg);
+			if (!pbl->pg_arr[i])
+				goto fail;
+
+			pbl->pg_count++;
+		}
+	}
+
+	return 0;
+
+fail:
+	__free_pbl(pdev, pbl, is_umem);
+	return -ENOMEM;
+}
+
+/* HWQ */
+void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq)
+{
+	int i;
+
+	if (!hwq->max_elements)
+		return;
+	if (hwq->level >= PBL_LVL_MAX)
+		return;
+
+	for (i = 0; i < hwq->level + 1; i++) {
+		if (i == hwq->level)
+			__free_pbl(pdev, &hwq->pbl[i], hwq->is_user);
+		else
+			__free_pbl(pdev, &hwq->pbl[i], false);
+	}
+
+	hwq->level = PBL_LVL_MAX;
+	hwq->max_elements = 0;
+	hwq->element_size = 0;
+	hwq->prod = 0;
+	hwq->cons = 0;
+	hwq->cp_bit = 0;
+}
+
+/* All HWQs are power of 2 in size */
+int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
+			      struct scatterlist *sghead, int nmap,
+			      u32 *elements, u32 element_size, u32 aux,
+			      u32 pg_size, enum bnxt_qplib_hwq_type hwq_type)
+{
+	u32 pages, slots, size, aux_pages = 0, aux_size = 0;
+	dma_addr_t *src_phys_ptr, **dst_virt_ptr;
+	int i, rc;
+
+	hwq->level = PBL_LVL_MAX;
+
+	slots = roundup_pow_of_two(*elements);
+	if (aux) {
+		aux_size = roundup_pow_of_two(aux);
+		aux_pages = (slots * aux_size) / pg_size;
+		if ((slots * aux_size) % pg_size)
+			aux_pages++;
+	}
+	size = roundup_pow_of_two(element_size);
+
+	if (!sghead) {
+		hwq->is_user = false;
+		pages = (slots * size) / pg_size + aux_pages;
+		if ((slots * size) % pg_size)
+			pages++;
+		if (!pages)
+			return -EINVAL;
+	} else {
+		hwq->is_user = true;
+		pages = nmap;
+	}
+
+	/* Alloc the 1st memory block; can be a PDL/PTL/PBL */
+	if (sghead && (pages == MAX_PBL_LVL_0_PGS))
+		rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], sghead,
+				 pages, pg_size);
+	else
+		rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_0], NULL, 1, pg_size);
+	if (rc)
+		goto fail;
+
+	hwq->level = PBL_LVL_0;
+
+	if (pages > MAX_PBL_LVL_0_PGS) {
+		if (pages > MAX_PBL_LVL_1_PGS) {
+			/* 2 levels of indirection */
+			rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], NULL,
+					 MAX_PBL_LVL_1_PGS_FOR_LVL_2, pg_size);
+			if (rc)
+				goto fail;
+			/* Fill in lvl0 PBL */
+			dst_virt_ptr =
+				(dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
+			src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
+			for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++)
+				dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+					src_phys_ptr[i] | PTU_PDE_VALID;
+			hwq->level = PBL_LVL_1;
+
+			rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_2], sghead,
+					 pages, pg_size);
+			if (rc)
+				goto fail;
+
+			/* Fill in lvl1 PBL */
+			dst_virt_ptr =
+				(dma_addr_t **)hwq->pbl[PBL_LVL_1].pg_arr;
+			src_phys_ptr = hwq->pbl[PBL_LVL_2].pg_map_arr;
+			for (i = 0; i < hwq->pbl[PBL_LVL_2].pg_count; i++) {
+				dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+					src_phys_ptr[i] | PTU_PTE_VALID;
+			}
+			if (hwq_type == HWQ_TYPE_QUEUE) {
+				/* Find the last pg of the size */
+				i = hwq->pbl[PBL_LVL_2].pg_count;
+				dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+								  PTU_PTE_LAST;
+				if (i > 1)
+					dst_virt_ptr[PTR_PG(i - 2)]
+						    [PTR_IDX(i - 2)] |=
+						    PTU_PTE_NEXT_TO_LAST;
+			}
+			hwq->level = PBL_LVL_2;
+		} else {
+			u32 flag = hwq_type == HWQ_TYPE_L2_CMPL ? 0 :
+						PTU_PTE_VALID;
+
+			/* 1 level of indirection */
+			rc = __alloc_pbl(pdev, &hwq->pbl[PBL_LVL_1], sghead,
+					 pages, pg_size);
+			if (rc)
+				goto fail;
+			/* Fill in lvl0 PBL */
+			dst_virt_ptr =
+				(dma_addr_t **)hwq->pbl[PBL_LVL_0].pg_arr;
+			src_phys_ptr = hwq->pbl[PBL_LVL_1].pg_map_arr;
+			for (i = 0; i < hwq->pbl[PBL_LVL_1].pg_count; i++) {
+				dst_virt_ptr[PTR_PG(i)][PTR_IDX(i)] =
+					src_phys_ptr[i] | flag;
+			}
+			if (hwq_type == HWQ_TYPE_QUEUE) {
+				/* Find the last pg of the size */
+				i = hwq->pbl[PBL_LVL_1].pg_count;
+				dst_virt_ptr[PTR_PG(i - 1)][PTR_IDX(i - 1)] |=
+								  PTU_PTE_LAST;
+				if (i > 1)
+					dst_virt_ptr[PTR_PG(i - 2)]
+						    [PTR_IDX(i - 2)] |=
+						    PTU_PTE_NEXT_TO_LAST;
+			}
+			hwq->level = PBL_LVL_1;
+		}
+	}
+	hwq->pdev = pdev;
+	spin_lock_init(&hwq->lock);
+	hwq->prod = 0;
+	hwq->cons = 0;
+	*elements = hwq->max_elements = slots;
+	hwq->element_size = size;
+
+	/* For direct access to the elements */
+	hwq->pbl_ptr = hwq->pbl[hwq->level].pg_arr;
+	hwq->pbl_dma_ptr = hwq->pbl[hwq->level].pg_map_arr;
+
+	return 0;
+
+fail:
+	bnxt_qplib_free_hwq(pdev, hwq);
+	return -ENOMEM;
+}
+
+/* Context Tables */
+void bnxt_qplib_free_ctx(struct pci_dev *pdev,
+			 struct bnxt_qplib_ctx *ctx)
+{
+	int i;
+
+	bnxt_qplib_free_hwq(pdev, &ctx->qpc_tbl);
+	bnxt_qplib_free_hwq(pdev, &ctx->mrw_tbl);
+	bnxt_qplib_free_hwq(pdev, &ctx->srqc_tbl);
+	bnxt_qplib_free_hwq(pdev, &ctx->cq_tbl);
+	bnxt_qplib_free_hwq(pdev, &ctx->tim_tbl);
+	for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+		bnxt_qplib_free_hwq(pdev, &ctx->tqm_tbl[i]);
+	bnxt_qplib_free_hwq(pdev, &ctx->tqm_pde);
+	bnxt_qplib_free_stats_ctx(pdev, &ctx->stats);
+}
+
+/*
+ * Routine: bnxt_qplib_alloc_ctx
+ * Description:
+ *     Context tables are memories which are used by the chip fw.
+ *     The 6 tables defined are:
+ *             QPC ctx - holds QP states
+ *             MRW ctx - holds memory region and window
+ *             SRQ ctx - holds shared RQ states
+ *             CQ ctx - holds completion queue states
+ *             TQM ctx - holds Tx Queue Manager context
+ *             TIM ctx - holds timer context
+ *     Depending on the size of the tbl requested, either a 1 Page Buffer List
+ *     or a 1-to-2-stage indirection Page Directory List + 1 PBL is used
+ *     instead.
+ *     Table might be employed as follows:
+ *             For 0      < ctx size <= 1 PAGE, 0 level of ind is used
+ *             For 1 PAGE < ctx size <= 512 entries size, 1 level of ind is used
+ *             For 512    < ctx size <= MAX, 2 levels of ind is used
+ * Returns:
+ *     0 if success, else -ERRORS
+ */
+int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
+			 struct bnxt_qplib_ctx *ctx,
+			 bool virt_fn)
+{
+	int i, j, k, rc = 0;
+	int fnz_idx = -1;
+	__le64 **pbl_ptr;
+
+	if (virt_fn)
+		goto stats_alloc;
+
+	/* QPC Tables */
+	ctx->qpc_tbl.max_elements = ctx->qpc_count;
+	rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->qpc_tbl, NULL, 0,
+				       &ctx->qpc_tbl.max_elements,
+				       BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE, 0,
+				       PAGE_SIZE, HWQ_TYPE_CTX);
+	if (rc)
+		goto fail;
+
+	/* MRW Tables */
+	ctx->mrw_tbl.max_elements = ctx->mrw_count;
+	rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->mrw_tbl, NULL, 0,
+				       &ctx->mrw_tbl.max_elements,
+				       BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE, 0,
+				       PAGE_SIZE, HWQ_TYPE_CTX);
+	if (rc)
+		goto fail;
+
+	/* SRQ Tables */
+	ctx->srqc_tbl.max_elements = ctx->srqc_count;
+	rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->srqc_tbl, NULL, 0,
+				       &ctx->srqc_tbl.max_elements,
+				       BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE, 0,
+				       PAGE_SIZE, HWQ_TYPE_CTX);
+	if (rc)
+		goto fail;
+
+	/* CQ Tables */
+	ctx->cq_tbl.max_elements = ctx->cq_count;
+	rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->cq_tbl, NULL, 0,
+				       &ctx->cq_tbl.max_elements,
+				       BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE, 0,
+				       PAGE_SIZE, HWQ_TYPE_CTX);
+	if (rc)
+		goto fail;
+
+	/* TQM Buffer */
+	ctx->tqm_pde.max_elements = 512;
+	rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_pde, NULL, 0,
+				       &ctx->tqm_pde.max_elements, sizeof(u64),
+				       0, PAGE_SIZE, HWQ_TYPE_CTX);
+	if (rc)
+		goto fail;
+
+	for (i = 0; i < MAX_TQM_ALLOC_REQ; i++) {
+		if (!ctx->tqm_count[i])
+			continue;
+		ctx->tqm_tbl[i].max_elements = ctx->qpc_count *
+					       ctx->tqm_count[i];
+		rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tqm_tbl[i], NULL, 0,
+					       &ctx->tqm_tbl[i].max_elements, 1,
+					       0, PAGE_SIZE, HWQ_TYPE_CTX);
+		if (rc)
+			goto fail;
+	}
+	pbl_ptr = (__le64 **)ctx->tqm_pde.pbl_ptr;
+	for (i = 0, j = 0; i < MAX_TQM_ALLOC_REQ;
+	     i++, j += MAX_TQM_ALLOC_BLK_SIZE) {
+		if (!ctx->tqm_tbl[i].max_elements)
+			continue;
+		if (fnz_idx == -1)
+			fnz_idx = i;
+		switch (ctx->tqm_tbl[i].level) {
+		case PBL_LVL_2:
+			for (k = 0; k < ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_count;
+			     k++)
+				pbl_ptr[PTR_PG(j + k)][PTR_IDX(j + k)] =
+				  cpu_to_le64(
+				    ctx->tqm_tbl[i].pbl[PBL_LVL_1].pg_map_arr[k]
+				    | PTU_PTE_VALID);
+			break;
+		case PBL_LVL_1:
+		case PBL_LVL_0:
+		default:
+			pbl_ptr[PTR_PG(j)][PTR_IDX(j)] = cpu_to_le64(
+				ctx->tqm_tbl[i].pbl[PBL_LVL_0].pg_map_arr[0] |
+				PTU_PTE_VALID);
+			break;
+		}
+	}
+	if (fnz_idx == -1)
+		fnz_idx = 0;
+	ctx->tqm_pde_level = ctx->tqm_tbl[fnz_idx].level == PBL_LVL_2 ?
+			     PBL_LVL_2 : ctx->tqm_tbl[fnz_idx].level + 1;
+
+	/* TIM Buffer */
+	ctx->tim_tbl.max_elements = ctx->qpc_count * 16;
+	rc = bnxt_qplib_alloc_init_hwq(pdev, &ctx->tim_tbl, NULL, 0,
+				       &ctx->tim_tbl.max_elements, 1,
+				       0, PAGE_SIZE, HWQ_TYPE_CTX);
+	if (rc)
+		goto fail;
+
+stats_alloc:
+	/* Stats */
+	rc = bnxt_qplib_alloc_stats_ctx(pdev, &ctx->stats);
+	if (rc)
+		goto fail;
+
+	return 0;
+
+fail:
+	bnxt_qplib_free_ctx(pdev, ctx);
+	return rc;
+}
+
+static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res,
+				     struct bnxt_qplib_sgid_tbl *sgid_tbl)
+{
+	kfree(sgid_tbl->tbl);
+	kfree(sgid_tbl->hw_id);
+	kfree(sgid_tbl->ctx);
+	sgid_tbl->tbl = NULL;
+	sgid_tbl->hw_id = NULL;
+	sgid_tbl->ctx = NULL;
+	sgid_tbl->max = 0;
+	sgid_tbl->active = 0;
+}
+
+static int bnxt_qplib_alloc_sgid_tbl(struct bnxt_qplib_res *res,
+				     struct bnxt_qplib_sgid_tbl *sgid_tbl,
+				     u16 max)
+{
+	sgid_tbl->tbl = kcalloc(max, sizeof(struct bnxt_qplib_gid), GFP_KERNEL);
+	if (!sgid_tbl->tbl)
+		return -ENOMEM;
+
+	sgid_tbl->hw_id = kcalloc(max, sizeof(u16), GFP_KERNEL);
+	if (!sgid_tbl->hw_id)
+		goto out_free1;
+
+	sgid_tbl->ctx = kcalloc(max, sizeof(void *), GFP_KERNEL);
+	if (!sgid_tbl->ctx)
+		goto out_free2;
+
+	sgid_tbl->max = max;
+	return 0;
+out_free2:
+	kfree(sgid_tbl->hw_id);
+	sgid_tbl->hw_id = NULL;
+out_free1:
+	kfree(sgid_tbl->tbl);
+	sgid_tbl->tbl = NULL;
+	return -ENOMEM;
+};
+
+static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
+					struct bnxt_qplib_sgid_tbl *sgid_tbl)
+{
+	int i;
+
+	memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+	memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
+	sgid_tbl->active = 0;
+}
+
+static void bnxt_qplib_init_sgid_tbl(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+				     struct net_device *netdev)
+{
+	memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
+	memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
+}
+
+static void bnxt_qplib_free_pkey_tbl(struct bnxt_qplib_res *res,
+				     struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+	if (!pkey_tbl->tbl)
+		dev_dbg(&res->pdev->dev, "QPLIB: PKEY tbl not present");
+	else
+		kfree(pkey_tbl->tbl);
+
+	pkey_tbl->tbl = NULL;
+	pkey_tbl->max = 0;
+	pkey_tbl->active = 0;
+}
+
+static int bnxt_qplib_alloc_pkey_tbl(struct bnxt_qplib_res *res,
+				     struct bnxt_qplib_pkey_tbl *pkey_tbl,
+				     u16 max)
+{
+	pkey_tbl->tbl = kcalloc(max, sizeof(u16), GFP_KERNEL);
+	if (!pkey_tbl->tbl)
+		return -ENOMEM;
+
+	pkey_tbl->max = max;
+	return 0;
+};
+
+static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt)
+{
+	kfree(pdt->tbl);
+	pdt->tbl = NULL;
+	pdt->max = 0;
+}
+
+static int bnxt_qplib_alloc_pd_tbl(struct bnxt_qplib_res *res,
+				   struct bnxt_qplib_pd_tbl *pdt,
+				   u32 max)
+{
+	u32 bytes;
+
+	bytes = max >> 3;
+	if (!bytes)
+		bytes = 1;
+	pdt->tbl = kmalloc(bytes, GFP_KERNEL);
+	if (!pdt->tbl)
+		return -ENOMEM;
+
+	pdt->max = max;
+	memset((u8 *)pdt->tbl, 0xFF, bytes);
+
+	return 0;
+}
+
+/* DPIs */
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
+			 struct bnxt_qplib_dpi     *dpi,
+			 void                      *app)
+{
+	u32 bit_num;
+
+	bit_num = find_first_bit(dpit->tbl, dpit->max);
+	if (bit_num == dpit->max)
+		return -ENOMEM;
+
+	/* Found unused DPI */
+	clear_bit(bit_num, dpit->tbl);
+	dpit->app_tbl[bit_num] = app;
+
+	dpi->dpi = bit_num;
+	dpi->dbr = dpit->dbr_bar_reg_iomem + (bit_num * PAGE_SIZE);
+	dpi->umdbr = dpit->unmapped_dbr + (bit_num * PAGE_SIZE);
+
+	return 0;
+}
+
+int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+			   struct bnxt_qplib_dpi_tbl *dpit,
+			   struct bnxt_qplib_dpi     *dpi)
+{
+	if (dpi->dpi >= dpit->max) {
+		dev_warn(&res->pdev->dev, "Invalid DPI? dpi = %d", dpi->dpi);
+		return -EINVAL;
+	}
+	if (test_and_set_bit(dpi->dpi, dpit->tbl)) {
+		dev_warn(&res->pdev->dev, "Freeing an unused DPI? dpi = %d",
+			 dpi->dpi);
+		return -EINVAL;
+	}
+	if (dpit->app_tbl)
+		dpit->app_tbl[dpi->dpi] = NULL;
+	memset(dpi, 0, sizeof(*dpi));
+
+	return 0;
+}
+
+static void bnxt_qplib_free_dpi_tbl(struct bnxt_qplib_res     *res,
+				    struct bnxt_qplib_dpi_tbl *dpit)
+{
+	kfree(dpit->tbl);
+	kfree(dpit->app_tbl);
+	if (dpit->dbr_bar_reg_iomem)
+		pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+	memset(dpit, 0, sizeof(*dpit));
+}
+
+static int bnxt_qplib_alloc_dpi_tbl(struct bnxt_qplib_res     *res,
+				    struct bnxt_qplib_dpi_tbl *dpit,
+				    u32                       dbr_offset)
+{
+	u32 dbr_bar_reg = RCFW_DBR_PCI_BAR_REGION;
+	resource_size_t bar_reg_base;
+	u32 dbr_len, bytes;
+
+	if (dpit->dbr_bar_reg_iomem) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: DBR BAR region %d already mapped", dbr_bar_reg);
+		return -EALREADY;
+	}
+
+	bar_reg_base = pci_resource_start(res->pdev, dbr_bar_reg);
+	if (!bar_reg_base) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: BAR region %d resc start failed", dbr_bar_reg);
+		return -ENOMEM;
+	}
+
+	dbr_len = pci_resource_len(res->pdev, dbr_bar_reg) - dbr_offset;
+	if (!dbr_len || ((dbr_len & (PAGE_SIZE - 1)) != 0)) {
+		dev_err(&res->pdev->dev, "QPLIB: Invalid DBR length %d",
+			dbr_len);
+		return -ENOMEM;
+	}
+
+	dpit->dbr_bar_reg_iomem = ioremap_nocache(bar_reg_base + dbr_offset,
+						  dbr_len);
+	if (!dpit->dbr_bar_reg_iomem) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: FP: DBR BAR region %d mapping failed",
+			dbr_bar_reg);
+		return -ENOMEM;
+	}
+
+	dpit->unmapped_dbr = bar_reg_base + dbr_offset;
+	dpit->max = dbr_len / PAGE_SIZE;
+
+	dpit->app_tbl = kcalloc(dpit->max, sizeof(void *), GFP_KERNEL);
+	if (!dpit->app_tbl) {
+		pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+		dev_err(&res->pdev->dev,
+			"QPLIB: DPI app tbl allocation failed");
+		return -ENOMEM;
+	}
+
+	bytes = dpit->max >> 3;
+	if (!bytes)
+		bytes = 1;
+
+	dpit->tbl = kmalloc(bytes, GFP_KERNEL);
+	if (!dpit->tbl) {
+		pci_iounmap(res->pdev, dpit->dbr_bar_reg_iomem);
+		kfree(dpit->app_tbl);
+		dpit->app_tbl = NULL;
+		dev_err(&res->pdev->dev,
+			"QPLIB: DPI tbl allocation failed for size = %d",
+			bytes);
+		return -ENOMEM;
+	}
+
+	memset((u8 *)dpit->tbl, 0xFF, bytes);
+
+	return 0;
+}
+
+/* PKEYs */
+static void bnxt_qplib_cleanup_pkey_tbl(struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+	memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
+	pkey_tbl->active = 0;
+}
+
+static void bnxt_qplib_init_pkey_tbl(struct bnxt_qplib_res *res,
+				     struct bnxt_qplib_pkey_tbl *pkey_tbl)
+{
+	u16 pkey = 0xFFFF;
+
+	memset(pkey_tbl->tbl, 0, sizeof(u16) * pkey_tbl->max);
+
+	/* pkey default = 0xFFFF */
+	bnxt_qplib_add_pkey(res, pkey_tbl, &pkey, false);
+}
+
+/* Stats */
+static void bnxt_qplib_free_stats_ctx(struct pci_dev *pdev,
+				      struct bnxt_qplib_stats *stats)
+{
+	if (stats->dma) {
+		dma_free_coherent(&pdev->dev, stats->size,
+				  stats->dma, stats->dma_map);
+	}
+	memset(stats, 0, sizeof(*stats));
+	stats->fw_id = -1;
+}
+
+static int bnxt_qplib_alloc_stats_ctx(struct pci_dev *pdev,
+				      struct bnxt_qplib_stats *stats)
+{
+	memset(stats, 0, sizeof(*stats));
+	stats->fw_id = -1;
+	stats->size = sizeof(struct ctx_hw_stats);
+	stats->dma = dma_alloc_coherent(&pdev->dev, stats->size,
+					&stats->dma_map, GFP_KERNEL);
+	if (!stats->dma) {
+		dev_err(&pdev->dev, "QPLIB: Stats DMA allocation failed");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res)
+{
+	bnxt_qplib_cleanup_pkey_tbl(&res->pkey_tbl);
+	bnxt_qplib_cleanup_sgid_tbl(res, &res->sgid_tbl);
+}
+
+int bnxt_qplib_init_res(struct bnxt_qplib_res *res)
+{
+	bnxt_qplib_init_sgid_tbl(&res->sgid_tbl, res->netdev);
+	bnxt_qplib_init_pkey_tbl(res, &res->pkey_tbl);
+
+	return 0;
+}
+
+void bnxt_qplib_free_res(struct bnxt_qplib_res *res)
+{
+	bnxt_qplib_free_pkey_tbl(res, &res->pkey_tbl);
+	bnxt_qplib_free_sgid_tbl(res, &res->sgid_tbl);
+	bnxt_qplib_free_pd_tbl(&res->pd_tbl);
+	bnxt_qplib_free_dpi_tbl(res, &res->dpi_tbl);
+
+	res->netdev = NULL;
+	res->pdev = NULL;
+}
+
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
+			 struct net_device *netdev,
+			 struct bnxt_qplib_dev_attr *dev_attr)
+{
+	int rc = 0;
+
+	res->pdev = pdev;
+	res->netdev = netdev;
+
+	rc = bnxt_qplib_alloc_sgid_tbl(res, &res->sgid_tbl, dev_attr->max_sgid);
+	if (rc)
+		goto fail;
+
+	rc = bnxt_qplib_alloc_pkey_tbl(res, &res->pkey_tbl, dev_attr->max_pkey);
+	if (rc)
+		goto fail;
+
+	rc = bnxt_qplib_alloc_pd_tbl(res, &res->pd_tbl, dev_attr->max_pd);
+	if (rc)
+		goto fail;
+
+	rc = bnxt_qplib_alloc_dpi_tbl(res, &res->dpi_tbl, dev_attr->l2_db_size);
+	if (rc)
+		goto fail;
+
+	return 0;
+fail:
+	bnxt_qplib_free_res(res);
+	return rc;
+}
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
index 5fc4107..ce122cf 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
@@ -39,4 +39,169 @@
 #ifndef __BNXT_QPLIB_RES_H__
 #define __BNXT_QPLIB_RES_H__
 
+extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero;
+
+#define PTR_CNT_PER_PG		(PAGE_SIZE / sizeof(void *))
+#define PTR_MAX_IDX_PER_PG	(PTR_CNT_PER_PG - 1)
+#define PTR_PG(x)		(((x) & ~PTR_MAX_IDX_PER_PG) / PTR_CNT_PER_PG)
+#define PTR_IDX(x)		((x) & PTR_MAX_IDX_PER_PG)
+
+#define HWQ_CMP(idx, hwq)	((idx) & ((hwq)->max_elements - 1))
+
+enum bnxt_qplib_hwq_type {
+	HWQ_TYPE_CTX,
+	HWQ_TYPE_QUEUE,
+	HWQ_TYPE_L2_CMPL
+};
+
+#define MAX_PBL_LVL_0_PGS		1
+#define MAX_PBL_LVL_1_PGS		512
+#define MAX_PBL_LVL_1_PGS_SHIFT		9
+#define MAX_PBL_LVL_1_PGS_FOR_LVL_2	256
+#define MAX_PBL_LVL_2_PGS		(256 * 512)
+
+enum bnxt_qplib_pbl_lvl {
+	PBL_LVL_0,
+	PBL_LVL_1,
+	PBL_LVL_2,
+	PBL_LVL_MAX
+};
+
+#define ROCE_PG_SIZE_4K		(4 * 1024)
+#define ROCE_PG_SIZE_8K		(8 * 1024)
+#define ROCE_PG_SIZE_64K	(64 * 1024)
+#define ROCE_PG_SIZE_2M		(2 * 1024 * 1024)
+#define ROCE_PG_SIZE_8M		(8 * 1024 * 1024)
+#define ROCE_PG_SIZE_1G		(1024 * 1024 * 1024)
+
+struct bnxt_qplib_pbl {
+	u32				pg_count;
+	u32				pg_size;
+	void				**pg_arr;
+	dma_addr_t			*pg_map_arr;
+};
+
+struct bnxt_qplib_hwq {
+	struct pci_dev			*pdev;
+	/* lock to protect qplib_hwq */
+	spinlock_t			lock;
+	struct bnxt_qplib_pbl		pbl[PBL_LVL_MAX];
+	enum bnxt_qplib_pbl_lvl		level;		/* 0, 1, or 2 */
+	/* ptr for easy access to the PBL entries */
+	void				**pbl_ptr;
+	/* ptr for easy access to the dma_addr */
+	dma_addr_t			*pbl_dma_ptr;
+	u32				max_elements;
+	u16				element_size;	/* Size of each entry */
+
+	u32				prod;		/* raw */
+	u32				cons;		/* raw */
+	u8				cp_bit;
+	u8				is_user;
+};
+
+/* Tables */
+struct bnxt_qplib_pd_tbl {
+	unsigned long			*tbl;
+	u32				max;
+};
+
+struct bnxt_qplib_sgid_tbl {
+	struct bnxt_qplib_gid		*tbl;
+	u16				*hw_id;
+	u16				max;
+	u16				active;
+	void				*ctx;
+};
+
+struct bnxt_qplib_pkey_tbl {
+	u16				*tbl;
+	u16				max;
+	u16				active;
+};
+
+struct bnxt_qplib_dpi {
+	u32				dpi;
+	void __iomem			*dbr;
+	u64				umdbr;
+};
+
+struct bnxt_qplib_dpi_tbl {
+	void				**app_tbl;
+	unsigned long			*tbl;
+	u16				max;
+	void __iomem			*dbr_bar_reg_iomem;
+	u64				unmapped_dbr;
+};
+
+struct bnxt_qplib_stats {
+	dma_addr_t			dma_map;
+	void				*dma;
+	u32				size;
+	u32				fw_id;
+};
+
+struct bnxt_qplib_vf_res {
+	u32 max_qp_per_vf;
+	u32 max_mrw_per_vf;
+	u32 max_srq_per_vf;
+	u32 max_cq_per_vf;
+	u32 max_gid_per_vf;
+};
+
+#define BNXT_QPLIB_MAX_QP_CTX_ENTRY_SIZE	448
+#define BNXT_QPLIB_MAX_SRQ_CTX_ENTRY_SIZE	64
+#define BNXT_QPLIB_MAX_CQ_CTX_ENTRY_SIZE	64
+#define BNXT_QPLIB_MAX_MRW_CTX_ENTRY_SIZE	128
+
+struct bnxt_qplib_ctx {
+	u32				qpc_count;
+	struct bnxt_qplib_hwq		qpc_tbl;
+	u32				mrw_count;
+	struct bnxt_qplib_hwq		mrw_tbl;
+	u32				srqc_count;
+	struct bnxt_qplib_hwq		srqc_tbl;
+	u32				cq_count;
+	struct bnxt_qplib_hwq		cq_tbl;
+	struct bnxt_qplib_hwq		tim_tbl;
+#define MAX_TQM_ALLOC_REQ		32
+#define MAX_TQM_ALLOC_BLK_SIZE		8
+	u8				tqm_count[MAX_TQM_ALLOC_REQ];
+	struct bnxt_qplib_hwq		tqm_pde;
+	u32				tqm_pde_level;
+	struct bnxt_qplib_hwq		tqm_tbl[MAX_TQM_ALLOC_REQ];
+	struct bnxt_qplib_stats		stats;
+	struct bnxt_qplib_vf_res	vf_res;
+};
+
+struct bnxt_qplib_res {
+	struct pci_dev			*pdev;
+	struct net_device		*netdev;
+
+	struct bnxt_qplib_rcfw		*rcfw;
+
+	struct bnxt_qplib_pd_tbl	pd_tbl;
+	struct bnxt_qplib_sgid_tbl	sgid_tbl;
+	struct bnxt_qplib_pkey_tbl	pkey_tbl;
+	struct bnxt_qplib_dpi_tbl	dpi_tbl;
+};
+
+struct bnxt_qplib_dev_attr;
+
+void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq);
+int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
+			      struct scatterlist *sl, int nmap, u32 *elements,
+			      u32 elements_per_page, u32 aux, u32 pg_size,
+			      enum bnxt_qplib_hwq_type hwq_type);
+void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
+int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
+void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
+int bnxt_qplib_alloc_res(struct bnxt_qplib_res *res, struct pci_dev *pdev,
+			 struct net_device *netdev,
+			 struct bnxt_qplib_dev_attr *dev_attr);
+void bnxt_qplib_free_ctx(struct pci_dev *pdev,
+			 struct bnxt_qplib_ctx *ctx);
+int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
+			 struct bnxt_qplib_ctx *ctx,
+			 bool virt_fn);
 #endif /* __BNXT_QPLIB_RES_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
index 667c8e1..103f9ab 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
@@ -35,3 +35,166 @@
  *
  * Description: Slow Path Operators
  */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/pci.h>
+
+#include "bnxt_re_hsi.h"
+
+#include "bnxt_qplib_res.h"
+#include "bnxt_qplib_rcfw.h"
+#include "bnxt_qplib_sp.h"
+/* Device */
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
+			    struct bnxt_qplib_dev_attr *attr)
+{
+	struct cmdq_query_func req;
+	struct creq_query_func_resp *resp;
+	struct creq_query_func_resp_sb *sb;
+	u16 cmd_flags = 0;
+	u32 temp;
+	u8 *tqm_alloc;
+	int i;
+
+	RCFW_CMD_PREP(req, QUERY_FUNC, cmd_flags);
+
+	req.resp_size = sizeof(*sb) / BNXT_QPLIB_CMDQE_UNITS;
+	resp = (struct creq_query_func_resp *)
+		bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, (void **)&sb,
+					     0);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: QUERY_FUNC failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+	/* Extract the context from the side buffer */
+	attr->max_qp = le32_to_cpu(sb->max_qp);
+	attr->max_qp_rd_atom =
+		sb->max_qp_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
+		BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_rd_atom;
+	attr->max_qp_init_rd_atom =
+		sb->max_qp_init_rd_atom > BNXT_QPLIB_MAX_OUT_RD_ATOM ?
+		BNXT_QPLIB_MAX_OUT_RD_ATOM : sb->max_qp_init_rd_atom;
+	attr->max_qp_wqes = le16_to_cpu(sb->max_qp_wr);
+	attr->max_qp_sges = sb->max_sge;
+	attr->max_cq = le32_to_cpu(sb->max_cq);
+	attr->max_cq_wqes = le32_to_cpu(sb->max_cqe);
+	attr->max_cq_sges = attr->max_qp_sges;
+	attr->max_mr = le32_to_cpu(sb->max_mr);
+	attr->max_mw = le32_to_cpu(sb->max_mw);
+
+	attr->max_mr_size = le64_to_cpu(sb->max_mr_size);
+	attr->max_pd = 64 * 1024;
+	attr->max_raw_ethy_qp = le32_to_cpu(sb->max_raw_eth_qp);
+	attr->max_ah = le32_to_cpu(sb->max_ah);
+
+	attr->max_fmr = le32_to_cpu(sb->max_fmr);
+	attr->max_map_per_fmr = sb->max_map_per_fmr;
+
+	attr->max_srq = le16_to_cpu(sb->max_srq);
+	attr->max_srq_wqes = le32_to_cpu(sb->max_srq_wr) - 1;
+	attr->max_srq_sges = sb->max_srq_sge;
+	/* Bono only reports 1 PKEY for now, but it can support > 1 */
+	attr->max_pkey = le32_to_cpu(sb->max_pkeys);
+
+	attr->max_inline_data = le32_to_cpu(sb->max_inline_data);
+	attr->l2_db_size = (sb->l2_db_space_size + 1) * PAGE_SIZE;
+	attr->max_sgid = le32_to_cpu(sb->max_gid);
+
+	strlcpy(attr->fw_ver, "20.6.28.0", sizeof(attr->fw_ver));
+
+	for (i = 0; i < MAX_TQM_ALLOC_REQ / 4; i++) {
+		temp = le32_to_cpu(sb->tqm_alloc_reqs[i]);
+		tqm_alloc = (u8 *)&temp;
+		attr->tqm_alloc_reqs[i * 4] = *tqm_alloc;
+		attr->tqm_alloc_reqs[i * 4 + 1] = *(++tqm_alloc);
+		attr->tqm_alloc_reqs[i * 4 + 2] = *(++tqm_alloc);
+		attr->tqm_alloc_reqs[i * 4 + 3] = *(++tqm_alloc);
+	}
+	return 0;
+}
+
+int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
+			struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+			bool update)
+{
+	int i, rc = 0;
+
+	if (!pkey_tbl) {
+		dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+		return -EINVAL;
+	}
+
+	/* Do we need a pkey_lock here? */
+	if (!pkey_tbl->active) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: PKEY table has no active entries");
+		return -ENOMEM;
+	}
+	for (i = 0; i < pkey_tbl->max; i++) {
+		if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
+			break;
+	}
+	if (i == pkey_tbl->max) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: PKEY 0x%04x not found in the pkey table",
+			*pkey);
+		return -ENOMEM;
+	}
+	memset(&pkey_tbl->tbl[i], 0, sizeof(*pkey));
+	pkey_tbl->active--;
+
+	/* unlock */
+	return rc;
+}
+
+int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
+			struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+			bool update)
+{
+	int i, free_idx, rc = 0;
+
+	if (!pkey_tbl) {
+		dev_err(&res->pdev->dev, "QPLIB: PKEY table not allocated");
+		return -EINVAL;
+	}
+
+	/* Do we need a pkey_lock here? */
+	if (pkey_tbl->active == pkey_tbl->max) {
+		dev_err(&res->pdev->dev, "QPLIB: PKEY table is full");
+		return -ENOMEM;
+	}
+	free_idx = pkey_tbl->max;
+	for (i = 0; i < pkey_tbl->max; i++) {
+		if (!memcmp(&pkey_tbl->tbl[i], pkey, sizeof(*pkey)))
+			return -EALREADY;
+		else if (!pkey_tbl->tbl[i] && free_idx == pkey_tbl->max)
+			free_idx = i;
+	}
+	if (free_idx == pkey_tbl->max) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: PKEY table is FULL but count is not MAX??");
+		return -ENOMEM;
+	}
+	/* Add PKEY to the pkey_tbl */
+	memcpy(&pkey_tbl->tbl[free_idx], pkey, sizeof(*pkey));
+	pkey_tbl->active++;
+
+	/* unlock */
+	return rc;
+}
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
index 90901c1..0b5adda 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
@@ -40,4 +40,46 @@
 #ifndef __BNXT_QPLIB_SP_H__
 #define __BNXT_QPLIB_SP_H__
 
+struct bnxt_qplib_dev_attr {
+	char				fw_ver[32];
+	u16				max_sgid;
+	u16				max_mrw;
+	u32				max_qp;
+#define BNXT_QPLIB_MAX_OUT_RD_ATOM	126
+	u32				max_qp_rd_atom;
+	u32				max_qp_init_rd_atom;
+	u32				max_qp_wqes;
+	u32				max_qp_sges;
+	u32				max_cq;
+	u32				max_cq_wqes;
+	u32				max_cq_sges;
+	u32				max_mr;
+	u64				max_mr_size;
+	u32				max_pd;
+	u32				max_mw;
+	u32				max_raw_ethy_qp;
+	u32				max_ah;
+	u32				max_fmr;
+	u32				max_map_per_fmr;
+	u32				max_srq;
+	u32				max_srq_wqes;
+	u32				max_srq_sges;
+	u32				max_pkey;
+	u32				max_inline_data;
+	u32				l2_db_size;
+	u8				tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
+};
+
+struct bnxt_qplib_gid {
+	u8				data[16];
+};
+
+int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
+			struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+			bool update);
+int bnxt_qplib_add_pkey(struct bnxt_qplib_res *res,
+			struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
+			bool update);
+int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
+			    struct bnxt_qplib_dev_attr *attr);
 #endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re.h b/drivers/infiniband/hw/bnxtre/bnxt_re.h
index 8b73e3d..78e95a5 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re.h
@@ -45,6 +45,11 @@
 #define BNXT_RE_REF_WAIT_COUNT		10
 #define BNXT_RE_DESC	"Broadcom NetXtreme-C/E RoCE Driver"
 
+#define BNXT_RE_MAX_QPC_COUNT		(64 * 1024)
+#define BNXT_RE_MAX_MRW_COUNT		(64 * 1024)
+#define BNXT_RE_MAX_SRQC_COUNT		(64 * 1024)
+#define BNXT_RE_MAX_CQ_COUNT		(64 * 1024)
+
 struct bnxt_re_work {
 	struct work_struct	work;
 	unsigned long		event;
@@ -54,6 +59,7 @@ struct bnxt_re_work {
 
 #define BNXT_RE_MIN_MSIX		2
 #define BNXT_RE_MAX_MSIX		16
+#define BNXT_RE_AEQ_IDX			0
 struct bnxt_re_dev {
 	struct ib_device		ibdev;
 	struct list_head		list;
@@ -72,6 +78,15 @@ struct bnxt_re_dev {
 
 	int				id;
 
+	/* RCFW Channel */
+	struct bnxt_qplib_rcfw		rcfw;
+
+	/* Device Resources */
+	struct bnxt_qplib_dev_attr	dev_attr;
+	struct bnxt_qplib_ctx		qplib_ctx;
+	struct bnxt_qplib_res		qplib_res;
+	struct bnxt_qplib_dpi		dpi_privileged;
+
 	atomic_t			qp_count;
 	struct mutex			qp_lock;	/* protect qp list */
 	struct list_head		qp_list;
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index fbd2ad3..f584786 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -54,6 +54,10 @@
 
 #include "bnxt_ulp.h"
 #include "bnxt_re_hsi.h"
+#include "bnxt_qplib_res.h"
+#include "bnxt_qplib_sp.h"
+#include "bnxt_qplib_fp.h"
+#include "bnxt_qplib_rcfw.h"
 #include "bnxt_re.h"
 #include "bnxt.h"
 static char version[] =
@@ -192,6 +196,160 @@ static int bnxt_re_request_msix(struct bnxt_re_dev *rdev)
 	return rc;
 }
 
+static void bnxt_re_init_hwrm_hdr(struct bnxt_re_dev *rdev, struct input *hdr,
+				  u16 opcd, u16 crid, u16 trid)
+{
+	hdr->req_type = cpu_to_le16(opcd);
+	hdr->cmpl_ring = cpu_to_le16(crid);
+	hdr->target_id = cpu_to_le16(trid);
+}
+
+static void bnxt_re_fill_fw_msg(struct bnxt_fw_msg *fw_msg, void *msg,
+				int msg_len, void *resp, int resp_max_len,
+				int timeout)
+{
+	fw_msg->msg = msg;
+	fw_msg->msg_len = msg_len;
+	fw_msg->resp = resp;
+	fw_msg->resp_max_len = resp_max_len;
+	fw_msg->timeout = timeout;
+}
+
+static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, u16 fw_ring_id,
+				 bool lock_wait)
+{
+	struct bnxt_en_dev *en_dev = rdev->en_dev;
+	struct hwrm_ring_free_input req = {0};
+	struct hwrm_ring_free_output resp;
+	struct bnxt_fw_msg fw_msg;
+	bool do_unlock = false;
+	int rc = -EINVAL;
+
+	if (!en_dev)
+		return rc;
+
+	memset(&fw_msg, 0, sizeof(fw_msg));
+	if (lock_wait) {
+		rtnl_lock();
+		do_unlock = true;
+	}
+
+	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_FREE, -1, -1);
+	req.ring_type = RING_ALLOC_REQ_RING_TYPE_CMPL;
+	req.ring_id = cpu_to_le16(fw_ring_id);
+	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+	if (rc)
+		dev_err(rdev_to_dev(rdev),
+			"Failed to free HW ring:%d :%#x", req.ring_id, rc);
+	if (do_unlock)
+		rtnl_unlock();
+	return rc;
+}
+
+static int bnxt_re_net_ring_alloc(struct bnxt_re_dev *rdev, dma_addr_t *dma_arr,
+				  int pages, int type, u32 ring_mask,
+				  u32 map_index, u16 *fw_ring_id)
+{
+	struct bnxt_en_dev *en_dev = rdev->en_dev;
+	struct hwrm_ring_alloc_input req = {0};
+	struct hwrm_ring_alloc_output resp;
+	struct bnxt_fw_msg fw_msg;
+	int rc = -EINVAL;
+
+	if (!en_dev)
+		return rc;
+
+	memset(&fw_msg, 0, sizeof(fw_msg));
+	rtnl_lock();
+	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_RING_ALLOC, -1, -1);
+	req.enables = 0;
+	req.page_tbl_addr =  cpu_to_le64(dma_arr[0]);
+	if (pages > 1) {
+		/* Page size is in log2 units */
+		req.page_size = BNXT_PAGE_SHIFT;
+		req.page_tbl_depth = 1;
+	}
+	req.fbo = 0;
+	/* Association of ring index with doorbell index and MSIX number */
+	req.logical_id = cpu_to_le16(map_index);
+	req.length = cpu_to_le32(ring_mask + 1);
+	req.ring_type = RING_ALLOC_REQ_RING_TYPE_CMPL;
+	req.int_mode = RING_ALLOC_REQ_INT_MODE_MSIX;
+	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+	if (!rc)
+		*fw_ring_id = le16_to_cpu(resp.ring_id);
+
+	rtnl_unlock();
+	return rc;
+}
+
+static int bnxt_re_net_stats_ctx_free(struct bnxt_re_dev *rdev,
+				      u32 fw_stats_ctx_id, bool lock_wait)
+{
+	struct bnxt_en_dev *en_dev = rdev->en_dev;
+	struct hwrm_stat_ctx_free_input req = {0};
+	struct bnxt_fw_msg fw_msg;
+	bool do_unlock = false;
+	int rc = -EINVAL;
+
+	if (!en_dev)
+		return rc;
+
+	memset(&fw_msg, 0, sizeof(fw_msg));
+	if (lock_wait) {
+		rtnl_lock();
+		do_unlock = true;
+	}
+
+	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_FREE, -1, -1);
+	req.stat_ctx_id = cpu_to_le32(fw_stats_ctx_id);
+	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&req,
+			    sizeof(req), DFLT_HWRM_CMD_TIMEOUT);
+	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+	if (rc)
+		dev_err(rdev_to_dev(rdev),
+			"Failed to free HW stats context %#x", rc);
+
+	if (do_unlock)
+		rtnl_unlock();
+	return rc;
+}
+
+static int bnxt_re_net_stats_ctx_alloc(struct bnxt_re_dev *rdev,
+				       dma_addr_t dma_map,
+				       u32 *fw_stats_ctx_id)
+{
+	struct hwrm_stat_ctx_alloc_output resp = {0};
+	struct hwrm_stat_ctx_alloc_input req = {0};
+	struct bnxt_en_dev *en_dev = rdev->en_dev;
+	struct bnxt_fw_msg fw_msg;
+	int rc = -EINVAL;
+
+	*fw_stats_ctx_id = INVALID_STATS_CTX_ID;
+
+	if (!en_dev)
+		return rc;
+
+	memset(&fw_msg, 0, sizeof(fw_msg));
+	rtnl_lock();
+
+	bnxt_re_init_hwrm_hdr(rdev, (void *)&req, HWRM_STAT_CTX_ALLOC, -1, -1);
+	req.update_period_ms = cpu_to_le32(1000);
+	req.stats_dma_addr = cpu_to_le64(dma_map);
+	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+	if (!rc)
+		*fw_stats_ctx_id = le32_to_cpu(resp.stat_ctx_id);
+
+	rtnl_unlock();
+	return rc;
+}
+
 /* Device */
 
 static bool is_bnxt_re_dev(struct net_device *netdev)
@@ -258,6 +416,62 @@ static struct bnxt_en_dev *bnxt_re_dev_probe(struct net_device *netdev)
 	return en_dev;
 }
 
+static void bnxt_re_unregister_ib(struct bnxt_re_dev *rdev)
+{
+	ib_unregister_device(&rdev->ibdev);
+}
+
+static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
+{
+	struct ib_device *ibdev = &rdev->ibdev;
+
+	/* ib device init */
+	ibdev->owner = THIS_MODULE;
+	ibdev->node_type = RDMA_NODE_IB_CA;
+	strlcpy(ibdev->name, "bnxt_re%d", IB_DEVICE_NAME_MAX);
+	strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA",
+		strlen(BNXT_RE_DESC) + 5);
+	ibdev->phys_port_cnt = 1;
+
+	ibdev->num_comp_vectors	= 1;
+	ibdev->dma_device = &rdev->en_dev->pdev->dev;
+	return ib_register_device(ibdev, NULL);
+}
+
+static ssize_t show_rev(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+	return scnprintf(buf, PAGE_SIZE, "0x%x\n", rdev->en_dev->pdev->vendor);
+}
+
+static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
+			   char *buf)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->dev_attr.fw_ver);
+}
+
+static ssize_t show_hca(struct device *device, struct device_attribute *attr,
+			char *buf)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(device, ibdev.dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%s\n", rdev->ibdev.node_desc);
+}
+
+static DEVICE_ATTR(hw_rev, 0444, show_rev, NULL);
+static DEVICE_ATTR(fw_rev, 0444, show_fw_ver, NULL);
+static DEVICE_ATTR(hca_type, 0444, show_hca, NULL);
+
+static struct device_attribute *bnxt_re_attributes[] = {
+	&dev_attr_hw_rev,
+	&dev_attr_fw_rev,
+	&dev_attr_hca_type
+};
+
 static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
 {
 	int i = BNXT_RE_REF_WAIT_COUNT;
@@ -320,10 +534,109 @@ static struct bnxt_re_dev *bnxt_re_dev_add(struct net_device *netdev,
 	return rdev;
 }
 
+static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
+			       struct creq_func_event *aeqe)
+{
+	switch (aeqe->event) {
+	case CREQ_FUNC_EVENT_EVENT_TX_WQE_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_TX_DATA_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_RX_WQE_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_RX_DATA_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CQ_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_TQM_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CFCQ_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CFCS_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CFCC_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_CFCM_ERROR:
+		break;
+	case CREQ_FUNC_EVENT_EVENT_TIM_ERROR:
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
+{
+	if (rdev->qplib_res.rcfw)
+		bnxt_qplib_cleanup_res(&rdev->qplib_res);
+}
+
+static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
+{
+	int rc = 0;
+
+	bnxt_qplib_init_res(&rdev->qplib_res);
+
+	return rc;
+}
+
+static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
+{
+	if (rdev->qplib_res.rcfw) {
+		bnxt_qplib_free_res(&rdev->qplib_res);
+		rdev->qplib_res.rcfw = NULL;
+	}
+}
+
+static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
+{
+	int rc = 0;
+
+	/* Configure and allocate resources for qplib */
+	rdev->qplib_res.rcfw = &rdev->rcfw;
+	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+	if (rc)
+		goto fail;
+
+	rc = bnxt_qplib_alloc_res(&rdev->qplib_res, rdev->en_dev->pdev,
+				  rdev->netdev, &rdev->dev_attr);
+	if (rc)
+		goto fail;
+
+	return 0;
+
+fail:
+	rdev->qplib_res.rcfw = NULL;
+	return rc;
+}
+
 static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
 {
-	int rc;
+	int i, rc;
+
+	if (test_and_clear_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags)) {
+		for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++)
+			device_remove_file(&rdev->ibdev.dev,
+					   bnxt_re_attributes[i]);
+		/* Cleanup ib dev */
+		bnxt_re_unregister_ib(rdev);
+	}
+	bnxt_re_cleanup_res(rdev);
+	bnxt_re_free_res(rdev, lock_wait);
 
+	if (test_and_clear_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags)) {
+		rc = bnxt_qplib_deinit_rcfw(&rdev->rcfw);
+		if (rc)
+			dev_warn(rdev_to_dev(rdev),
+				 "Failed to deinitialize RCFW: %#x", rc);
+		bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id,
+					   lock_wait);
+		bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+		bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+		bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, lock_wait);
+		bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
+	}
 	if (test_and_clear_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags)) {
 		rc = bnxt_re_free_msix(rdev, lock_wait);
 		if (rc)
@@ -338,6 +651,19 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
 	}
 }
 
+static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
+{
+	u32 i;
+
+	rdev->qplib_ctx.qpc_count = BNXT_RE_MAX_QPC_COUNT;
+	rdev->qplib_ctx.mrw_count = BNXT_RE_MAX_MRW_COUNT;
+	rdev->qplib_ctx.srqc_count = BNXT_RE_MAX_SRQC_COUNT;
+	rdev->qplib_ctx.cq_count = BNXT_RE_MAX_CQ_COUNT;
+	for (i = 0; i < MAX_TQM_ALLOC_REQ; i++)
+		rdev->qplib_ctx.tqm_count[i] =
+		rdev->dev_attr.tqm_alloc_reqs[i];
+}
+
 static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 {
 	int i, j, rc;
@@ -358,7 +684,103 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 	}
 	set_bit(BNXT_RE_FLAG_GOT_MSIX, &rdev->flags);
 
+	/* Establish RCFW Communication Channel to initialize the context
+	 * memory for the function and all child VFs
+	 */
+	rc = bnxt_qplib_alloc_rcfw_channel(rdev->en_dev->pdev, &rdev->rcfw);
+	if (rc)
+		goto fail;
+
+	rc = bnxt_re_net_ring_alloc
+			(rdev, rdev->rcfw.creq.pbl[PBL_LVL_0].pg_map_arr,
+			 rdev->rcfw.creq.pbl[rdev->rcfw.creq.level].pg_count,
+			 HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_CREQE_MAX_CNT - 1,
+			 rdev->msix_entries[BNXT_RE_AEQ_IDX].ring_idx,
+			 &rdev->rcfw.creq_ring_id);
+	if (rc) {
+		pr_err("Failed to allocate CREQ: %#x\n", rc);
+		goto free_rcfw;
+	}
+	rc = bnxt_qplib_enable_rcfw_channel
+				(rdev->en_dev->pdev, &rdev->rcfw,
+				 rdev->msix_entries[BNXT_RE_AEQ_IDX].vector,
+				 rdev->msix_entries[BNXT_RE_AEQ_IDX].db_offset,
+				 0, &bnxt_re_aeq_handler);
+	if (rc) {
+		pr_err("Failed to enable RCFW channel: %#x\n", rc);
+		goto free_ring;
+	}
+
+	rc = bnxt_qplib_get_dev_attr(&rdev->rcfw, &rdev->dev_attr);
+	if (rc)
+		goto disable_rcfw;
+	bnxt_re_set_resource_limits(rdev);
+
+	rc = bnxt_qplib_alloc_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx, 0);
+	if (rc) {
+		pr_err("Failed to allocate QPLIB context: %#x\n", rc);
+		goto disable_rcfw;
+	}
+	rc = bnxt_re_net_stats_ctx_alloc(rdev,
+					 rdev->qplib_ctx.stats.dma_map,
+					 &rdev->qplib_ctx.stats.fw_id);
+	if (rc) {
+		pr_err("Failed to allocate stats context: %#x\n", rc);
+		goto free_ctx;
+	}
+
+	rc = bnxt_qplib_init_rcfw(&rdev->rcfw, &rdev->qplib_ctx, 0);
+	if (rc) {
+		pr_err("Failed to initialize RCFW: %#x\n", rc);
+		goto free_sctx;
+	}
+	set_bit(BNXT_RE_FLAG_RCFW_CHANNEL_EN, &rdev->flags);
+
+	/* Resources based on the 'new' device caps */
+	rc = bnxt_re_alloc_res(rdev);
+	if (rc) {
+		pr_err("Failed to allocate resources: %#x\n", rc);
+		goto fail;
+	}
+	rc = bnxt_re_init_res(rdev);
+	if (rc) {
+		pr_err("Failed to initialize resources: %#x\n", rc);
+		goto fail;
+	}
+
+	/* Register ib dev */
+	rc = bnxt_re_register_ib(rdev);
+	if (rc) {
+		pr_err("Failed to register with IB: %#x\n", rc);
+		goto fail;
+	}
+	dev_info(rdev_to_dev(rdev), "Device registered successfully");
+	for (i = 0; i < ARRAY_SIZE(bnxt_re_attributes); i++) {
+		rc = device_create_file(&rdev->ibdev.dev,
+					bnxt_re_attributes[i]);
+		if (rc) {
+			dev_err(rdev_to_dev(rdev),
+				"Failed to create IB sysfs: %#x", rc);
+			/* Must clean up all created device files */
+			for (j = 0; j < i; j++)
+				device_remove_file(&rdev->ibdev.dev,
+						   bnxt_re_attributes[j]);
+			bnxt_re_unregister_ib(rdev);
+			goto fail;
+		}
+	}
+	set_bit(BNXT_RE_FLAG_IBDEV_REGISTERED, &rdev->flags);
 	return 0;
+free_sctx:
+	bnxt_re_net_stats_ctx_free(rdev, rdev->qplib_ctx.stats.fw_id, true);
+free_ctx:
+	bnxt_qplib_free_ctx(rdev->en_dev->pdev, &rdev->qplib_ctx);
+disable_rcfw:
+	bnxt_qplib_disable_rcfw_channel(&rdev->rcfw);
+free_ring:
+	bnxt_re_net_ring_free(rdev, rdev->rcfw.creq_ring_id, true);
+free_rcfw:
+	bnxt_qplib_free_rcfw_channel(&rdev->rcfw);
 fail:
 	bnxt_re_ib_unreg(rdev, true);
 	return rc;
@@ -427,14 +849,10 @@ static void bnxt_re_task(struct work_struct *work)
 				"Failed to register with IB: %#x", rc);
 		break;
 	case NETDEV_UP:
-
 		break;
 	case NETDEV_DOWN:
-
 		break;
-
 	case NETDEV_CHANGE:
-
 		break;
 	default:
 		break;
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 05/21] bnxt_re: Adding Notification Queue support
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

Completion Notifcations are handled by Notification Queue (NQ). This
patch configures the NQs. Also, configures the Door bell page mapping

v3: Fixes some sparse warnings related to endianness checks

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c  | 161 ++++++++++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h  |  60 ++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h |   6 +
 drivers/infiniband/hw/bnxtre/bnxt_re.h        |   8 ++
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c   |  52 ++++++++-
 5 files changed, 286 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
index 36c4b81..bf2de8e 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
@@ -35,3 +35,164 @@
  *
  * Description: Fast Path Operators
  */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/prefetch.h>
+
+#include "bnxt_re_hsi.h"
+
+#include "bnxt_qplib_res.h"
+#include "bnxt_qplib_rcfw.h"
+#include "bnxt_qplib_sp.h"
+#include "bnxt_qplib_fp.h"
+
+static void bnxt_qplib_service_nq(unsigned long data)
+{
+	struct bnxt_qplib_nq *nq = (struct bnxt_qplib_nq *)data;
+	struct bnxt_qplib_hwq *hwq = &nq->hwq;
+	struct nq_base *nqe, **nq_ptr;
+	u32 sw_cons, raw_cons;
+	u16 type;
+	int budget = nq->budget;
+
+	/* Service the NQ until empty */
+	raw_cons = hwq->cons;
+	while (budget--) {
+		sw_cons = HWQ_CMP(raw_cons, hwq);
+		nq_ptr = (struct nq_base **)hwq->pbl_ptr;
+		nqe = &nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)];
+		if (!NQE_CMP_VALID(nqe, raw_cons, hwq->max_elements))
+			break;
+
+		type = le16_to_cpu(nqe->info10_type) & NQ_BASE_TYPE_MASK;
+		switch (type) {
+		case NQ_BASE_TYPE_CQ_NOTIFICATION:
+			break;
+		case NQ_BASE_TYPE_DBQ_EVENT:
+			break;
+		default:
+			dev_warn(&nq->pdev->dev,
+				 "QPLIB: nqe with type = 0x%x not handled",
+				 type);
+			break;
+		}
+		raw_cons++;
+	}
+	if (hwq->cons != raw_cons) {
+		hwq->cons = raw_cons;
+		NQ_DB_REARM(nq->bar_reg_iomem, hwq->cons, hwq->max_elements);
+	}
+}
+
+static irqreturn_t bnxt_qplib_nq_irq(int irq, void *dev_instance)
+{
+	struct bnxt_qplib_nq *nq = dev_instance;
+	struct bnxt_qplib_hwq *hwq = &nq->hwq;
+	struct nq_base **nq_ptr;
+	u32 sw_cons;
+
+	/* Prefetch the NQ element */
+	sw_cons = HWQ_CMP(hwq->cons, hwq);
+	nq_ptr = (struct nq_base **)nq->hwq.pbl_ptr;
+	prefetch(&nq_ptr[NQE_PG(sw_cons)][NQE_IDX(sw_cons)]);
+
+	/* Fan out to CPU affinitized kthreads? */
+	tasklet_schedule(&nq->worker);
+
+	return IRQ_HANDLED;
+}
+
+void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq)
+{
+	/* Make sure the HW is stopped! */
+	synchronize_irq(nq->vector);
+	tasklet_disable(&nq->worker);
+	tasklet_kill(&nq->worker);
+
+	if (nq->requested) {
+		free_irq(nq->vector, nq);
+		nq->requested = false;
+	}
+	if (nq->bar_reg_iomem)
+		iounmap(nq->bar_reg_iomem);
+	nq->bar_reg_iomem = NULL;
+
+	nq->cqn_handler = NULL;
+	nq->srqn_handler = NULL;
+	nq->vector = 0;
+}
+
+int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
+			 int msix_vector, int bar_reg_offset,
+			 int (*cqn_handler)(struct bnxt_qplib_nq *nq,
+					    void *),
+			 int (*srqn_handler)(struct bnxt_qplib_nq *nq,
+					     void *, u8 event))
+{
+	resource_size_t nq_base;
+	int rc;
+
+	nq->pdev = pdev;
+	nq->vector = msix_vector;
+
+	nq->cqn_handler = cqn_handler;
+
+	nq->srqn_handler = srqn_handler;
+
+	tasklet_init(&nq->worker, bnxt_qplib_service_nq, (unsigned long)nq);
+
+	nq->requested = false;
+	rc = request_irq(nq->vector, bnxt_qplib_nq_irq, 0, "bnxt_qplib_nq", nq);
+	if (rc) {
+		dev_err(&nq->pdev->dev,
+			"Failed to request IRQ for NQ: %#x", rc);
+		bnxt_qplib_disable_nq(nq);
+		goto fail;
+	}
+	nq->requested = true;
+	nq->bar_reg = NQ_CONS_PCI_BAR_REGION;
+	nq->bar_reg_off = bar_reg_offset;
+	nq_base = pci_resource_start(pdev, nq->bar_reg);
+	if (!nq_base) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	nq->bar_reg_iomem = ioremap_nocache(nq_base + nq->bar_reg_off, 4);
+	if (!nq->bar_reg_iomem) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	NQ_DB_REARM(nq->bar_reg_iomem, nq->hwq.cons, nq->hwq.max_elements);
+
+	return 0;
+fail:
+	bnxt_qplib_disable_nq(nq);
+	return rc;
+}
+
+void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq)
+{
+	if (nq->hwq.max_elements)
+		bnxt_qplib_free_hwq(nq->pdev, &nq->hwq);
+}
+
+int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq)
+{
+	nq->pdev = pdev;
+	if (!nq->hwq.max_elements ||
+	    nq->hwq.max_elements > BNXT_QPLIB_NQE_MAX_CNT)
+		nq->hwq.max_elements = BNXT_QPLIB_NQE_MAX_CNT;
+
+	if (bnxt_qplib_alloc_init_hwq(nq->pdev, &nq->hwq, NULL, 0,
+				      &nq->hwq.max_elements,
+				      BNXT_QPLIB_MAX_NQE_ENTRY_SIZE, 0,
+				      PAGE_SIZE, HWQ_TYPE_L2_CMPL))
+		return -ENOMEM;
+
+	nq->budget = 8;
+	return 0;
+}
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
index 0983465..25a4494 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
@@ -39,4 +39,64 @@
 #ifndef __BNXT_QPLIB_FP_H__
 #define __BNXT_QPLIB_FP_H__
 
+#define BNXT_QPLIB_MAX_NQE_ENTRY_SIZE	sizeof(struct nq_base)
+
+#define NQE_CNT_PER_PG		(PAGE_SIZE / BNXT_QPLIB_MAX_NQE_ENTRY_SIZE)
+#define NQE_MAX_IDX_PER_PG	(NQE_CNT_PER_PG - 1)
+#define NQE_PG(x)		(((x) & ~NQE_MAX_IDX_PER_PG) / NQE_CNT_PER_PG)
+#define NQE_IDX(x)		((x) & NQE_MAX_IDX_PER_PG)
+
+#define NQE_CMP_VALID(hdr, raw_cons, cp_bit)			\
+	(!!(le32_to_cpu((hdr)->info63_v[0]) & NQ_BASE_V) ==	\
+	   !((raw_cons) & (cp_bit)))
+
+#define BNXT_QPLIB_NQE_MAX_CNT		(128 * 1024)
+
+#define NQ_CONS_PCI_BAR_REGION		2
+#define NQ_DB_KEY_CP			(0x2 << CMPL_DOORBELL_KEY_SFT)
+#define NQ_DB_IDX_VALID			CMPL_DOORBELL_IDX_VALID
+#define NQ_DB_IRQ_DIS			CMPL_DOORBELL_MASK
+#define NQ_DB_CP_FLAGS_REARM		(NQ_DB_KEY_CP |		\
+					 NQ_DB_IDX_VALID)
+#define NQ_DB_CP_FLAGS			(NQ_DB_KEY_CP    |	\
+					 NQ_DB_IDX_VALID |	\
+					 NQ_DB_IRQ_DIS)
+#define NQ_DB_REARM(db, raw_cons, cp_bit)			\
+	writel(NQ_DB_CP_FLAGS_REARM | ((raw_cons) & ((cp_bit) - 1)), db)
+#define NQ_DB(db, raw_cons, cp_bit)				\
+	writel(NQ_DB_CP_FLAGS | ((raw_cons) & ((cp_bit) - 1)), db)
+
+struct bnxt_qplib_nq {
+	struct pci_dev			*pdev;
+
+	int				vector;
+	int				budget;
+	bool				requested;
+	struct tasklet_struct		worker;
+	struct bnxt_qplib_hwq		hwq;
+
+	u16				bar_reg;
+	u16				bar_reg_off;
+	u16				ring_id;
+	void __iomem			*bar_reg_iomem;
+
+	int				(*cqn_handler)
+						(struct bnxt_qplib_nq *nq,
+						 void *cq);
+	int				(*srqn_handler)
+						(struct bnxt_qplib_nq *nq,
+						 void *srq,
+						 u8 event);
+};
+
+void bnxt_qplib_disable_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_enable_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq,
+			 int msix_vector, int bar_reg_offset,
+			 int (*cqn_handler)(struct bnxt_qplib_nq *nq,
+					    void *cq),
+			 int (*srqn_handler)(struct bnxt_qplib_nq *nq,
+					     void *srq,
+					     u8 event));
+void bnxt_qplib_free_nq(struct bnxt_qplib_nq *nq);
+int bnxt_qplib_alloc_nq(struct pci_dev *pdev, struct bnxt_qplib_nq *nq);
 #endif /* __BNXT_QPLIB_FP_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
index ce122cf..571feda 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
@@ -193,6 +193,12 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
 			      struct scatterlist *sl, int nmap, u32 *elements,
 			      u32 elements_per_page, u32 aux, u32 pg_size,
 			      enum bnxt_qplib_hwq_type hwq_type);
+int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
+			 struct bnxt_qplib_dpi     *dpi,
+			 void                      *app);
+int bnxt_qplib_dealloc_dpi(struct bnxt_qplib_res *res,
+			   struct bnxt_qplib_dpi_tbl *dpi_tbl,
+			   struct bnxt_qplib_dpi *dpi);
 void bnxt_qplib_cleanup_res(struct bnxt_qplib_res *res);
 int bnxt_qplib_init_res(struct bnxt_qplib_res *res);
 void bnxt_qplib_free_res(struct bnxt_qplib_res *res);
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re.h b/drivers/infiniband/hw/bnxtre/bnxt_re.h
index 78e95a5..8d2f42f 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re.h
@@ -60,6 +60,8 @@ struct bnxt_re_work {
 #define BNXT_RE_MIN_MSIX		2
 #define BNXT_RE_MAX_MSIX		16
 #define BNXT_RE_AEQ_IDX			0
+#define BNXT_RE_NQ_IDX			1
+
 struct bnxt_re_dev {
 	struct ib_device		ibdev;
 	struct list_head		list;
@@ -78,9 +80,15 @@ struct bnxt_re_dev {
 
 	int				id;
 
+	/* FP Notification Queue (CQ & SRQ) */
+	struct tasklet_struct		nq_task;
+
 	/* RCFW Channel */
 	struct bnxt_qplib_rcfw		rcfw;
 
+	/* NQ */
+	struct bnxt_qplib_nq		nq;
+
 	/* Device Resources */
 	struct bnxt_qplib_dev_attr	dev_attr;
 	struct bnxt_qplib_ctx		qplib_ctx;
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index f584786..80d3f66 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -568,6 +568,9 @@ static int bnxt_re_aeq_handler(struct bnxt_qplib_rcfw *rcfw,
 
 static void bnxt_re_cleanup_res(struct bnxt_re_dev *rdev)
 {
+	if (rdev->nq.hwq.max_elements)
+		bnxt_qplib_disable_nq(&rdev->nq);
+
 	if (rdev->qplib_res.rcfw)
 		bnxt_qplib_cleanup_res(&rdev->qplib_res);
 }
@@ -578,11 +581,32 @@ static int bnxt_re_init_res(struct bnxt_re_dev *rdev)
 
 	bnxt_qplib_init_res(&rdev->qplib_res);
 
+	if (rdev->msix_entries[BNXT_RE_NQ_IDX].vector <= 0)
+		return -EINVAL;
+
+	rc = bnxt_qplib_enable_nq(rdev->en_dev->pdev, &rdev->nq,
+				  rdev->msix_entries[BNXT_RE_NQ_IDX].vector,
+				  rdev->msix_entries[BNXT_RE_NQ_IDX].db_offset,
+				  NULL,
+				  NULL);
+
+	if (rc)
+		dev_err(rdev_to_dev(rdev), "Failed to enable NQ: %#x", rc);
+
 	return rc;
 }
 
 static void bnxt_re_free_res(struct bnxt_re_dev *rdev, bool lock_wait)
 {
+	if (rdev->nq.hwq.max_elements) {
+		bnxt_re_net_ring_free(rdev, rdev->nq.ring_id, lock_wait);
+		bnxt_qplib_free_nq(&rdev->nq);
+	}
+	if (rdev->qplib_res.dpi_tbl.max) {
+		bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+				       &rdev->qplib_res.dpi_tbl,
+				       &rdev->dpi_privileged);
+	}
 	if (rdev->qplib_res.rcfw) {
 		bnxt_qplib_free_res(&rdev->qplib_res);
 		rdev->qplib_res.rcfw = NULL;
@@ -604,8 +628,34 @@ static int bnxt_re_alloc_res(struct bnxt_re_dev *rdev)
 	if (rc)
 		goto fail;
 
-	return 0;
+	rc = bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+				  &rdev->dpi_privileged,
+				  rdev);
+	if (rc)
+		goto fail;
 
+	rdev->nq.hwq.max_elements = BNXT_RE_MAX_CQ_COUNT +
+				    BNXT_RE_MAX_SRQC_COUNT + 2;
+	rc = bnxt_qplib_alloc_nq(rdev->en_dev->pdev, &rdev->nq);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev),
+			"Failed to allocate NQ memory: %#x", rc);
+		goto fail;
+	}
+	rc = bnxt_re_net_ring_alloc
+			(rdev, rdev->nq.hwq.pbl[PBL_LVL_0].pg_map_arr,
+			 rdev->nq.hwq.pbl[rdev->nq.hwq.level].pg_count,
+			 HWRM_RING_ALLOC_CMPL, BNXT_QPLIB_NQE_MAX_CNT - 1,
+			 rdev->msix_entries[BNXT_RE_NQ_IDX].ring_idx,
+			 &rdev->nq.ring_id);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev),
+			"Failed to allocate NQ ring: %#x", rc);
+		goto free_nq;
+	}
+	return 0;
+free_nq:
+	bnxt_qplib_free_nq(&rdev->nq);
 fail:
 	rdev->qplib_res.rcfw = NULL;
 	return rc;
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 06/21] bnxt_re: Support for PD, ucontext and mmap verbs
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

This patch includes the uverbs ABI header file to enable user verbs.
Also, adds support for the Protection Domain, User Context and mmap
verbs.

v2: Moved the bnxt_re_uverbs_abi.h file to include/uapi/rdma folder.
    Also, Fixed one sparse warning.
v3: Fixes some cross compile and sparse warnings. Changes the
    bnxt_re_uverbs_abi.h file name to bnxtre-abi.h. Aligns the data
    structure to 64 bit and eliminates __packed qualifier from
    bnxtre-abi.h 

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c   |  28 +++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h   |   6 +
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h    |   4 +
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c | 218 ++++++++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h |  23 +++
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c     |   7 +
 include/uapi/rdma/bnxtre-abi.h                  |  59 +++++++
 7 files changed, 345 insertions(+)
 create mode 100644 include/uapi/rdma/bnxtre-abi.h

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
index eaaf67e..3b602c0 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
@@ -527,6 +527,34 @@ static int bnxt_qplib_alloc_pkey_tbl(struct bnxt_qplib_res *res,
 	return 0;
 };
 
+/* PDs */
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pdt, struct bnxt_qplib_pd *pd)
+{
+	u32 bit_num;
+
+	bit_num = find_first_bit(pdt->tbl, pdt->max);
+	if (bit_num == pdt->max)
+		return -ENOMEM;
+
+	/* Found unused PD */
+	clear_bit(bit_num, pdt->tbl);
+	pd->id = bit_num;
+	return 0;
+}
+
+int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
+			  struct bnxt_qplib_pd_tbl *pdt,
+			  struct bnxt_qplib_pd *pd)
+{
+	if (test_and_set_bit(pd->id, pdt->tbl)) {
+		dev_warn(&res->pdev->dev, "Freeing an unused PD? pdn = %d",
+			 pd->id);
+		return -EINVAL;
+	}
+	pd->id = 0;
+	return 0;
+}
+
 static void bnxt_qplib_free_pd_tbl(struct bnxt_qplib_pd_tbl *pdt)
 {
 	kfree(pdt->tbl);
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
index 571feda..c9e376a 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
@@ -186,6 +186,7 @@ struct bnxt_qplib_res {
 	struct bnxt_qplib_dpi_tbl	dpi_tbl;
 };
 
+struct bnxt_qplib_pd;
 struct bnxt_qplib_dev_attr;
 
 void bnxt_qplib_free_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq);
@@ -193,6 +194,11 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
 			      struct scatterlist *sl, int nmap, u32 *elements,
 			      u32 elements_per_page, u32 aux, u32 pg_size,
 			      enum bnxt_qplib_hwq_type hwq_type);
+int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl,
+			struct bnxt_qplib_pd *pd);
+int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
+			  struct bnxt_qplib_pd_tbl *pd_tbl,
+			  struct bnxt_qplib_pd *pd);
 int bnxt_qplib_alloc_dpi(struct bnxt_qplib_dpi_tbl *dpit,
 			 struct bnxt_qplib_dpi     *dpi,
 			 void                      *app);
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
index 0b5adda..7de6600 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
@@ -70,6 +70,10 @@ struct bnxt_qplib_dev_attr {
 	u8				tqm_alloc_reqs[MAX_TQM_ALLOC_REQ];
 };
 
+struct bnxt_qplib_pd {
+	u32				id;
+};
+
 struct bnxt_qplib_gid {
 	u8				data[16];
 };
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
index c01fa40..e947a98 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
@@ -35,3 +35,221 @@
  *
  * Description: IB Verbs interpreter
  */
+
+#include <linux/interrupt.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_user_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_addr.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_cache.h>
+
+#include "bnxt_ulp.h"
+
+#include "bnxt_re_hsi.h"
+#include "bnxt_qplib_res.h"
+#include "bnxt_qplib_sp.h"
+#include "bnxt_qplib_fp.h"
+#include "bnxt_qplib_rcfw.h"
+
+#include "bnxt_re.h"
+#include "bnxt_re_ib_verbs.h"
+#include <rdma/bnxtre-abi.h>
+
+/* Protection Domains */
+int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	int rc;
+
+	if (ib_pd->uobject && pd->dpi.dbr) {
+		struct ib_ucontext *ib_uctx = ib_pd->uobject->context;
+		struct bnxt_re_ucontext *ucntx;
+
+		/* Free DPI only if this is the first PD allocated by the
+		 * application and mark the context dpi as NULL
+		 */
+		ucntx = container_of(ib_uctx, struct bnxt_re_ucontext, ib_uctx);
+
+		rc = bnxt_qplib_dealloc_dpi(&rdev->qplib_res,
+					    &rdev->qplib_res.dpi_tbl,
+					    &pd->dpi);
+		if (rc)
+			dev_err(rdev_to_dev(rdev), "Failed to deallocate HW DPI");
+			/* Don't fail, continue*/
+		ucntx->dpi = NULL;
+	}
+
+	rc = bnxt_qplib_dealloc_pd(&rdev->qplib_res,
+				   &rdev->qplib_res.pd_tbl,
+				   &pd->qplib_pd);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to deallocate HW PD");
+		return rc;
+	}
+
+	kfree(pd);
+	return 0;
+}
+
+struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
+			       struct ib_ucontext *ucontext,
+			       struct ib_udata *udata)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	struct bnxt_re_ucontext *ucntx = container_of(ucontext,
+						      struct bnxt_re_ucontext,
+						      ib_uctx);
+	struct bnxt_re_pd *pd;
+	int rc;
+
+	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
+	if (!pd)
+		return ERR_PTR(-ENOMEM);
+
+	pd->rdev = rdev;
+	if (bnxt_qplib_alloc_pd(&rdev->qplib_res.pd_tbl, &pd->qplib_pd)) {
+		dev_err(rdev_to_dev(rdev), "Failed to allocate HW PD");
+		rc = -ENOMEM;
+		goto fail;
+	}
+
+	if (udata) {
+		struct bnxt_re_pd_resp resp;
+
+		if (!ucntx->dpi) {
+			/* Allocate DPI in alloc_pd to avoid failing of
+			 * ibv_devinfo and family of application when DPIs
+			 * are depleted.
+			 */
+			if (bnxt_qplib_alloc_dpi(&rdev->qplib_res.dpi_tbl,
+						 &pd->dpi, ucntx)) {
+				rc = -ENOMEM;
+				goto dbfail;
+			}
+			ucntx->dpi = &pd->dpi;
+		}
+
+		resp.pdid = pd->qplib_pd.id;
+		/* Still allow mapping this DBR to the new user PD. */
+		resp.dpi = ucntx->dpi->dpi;
+		resp.dbr = (u64)ucntx->dpi->umdbr;
+
+		rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+		if (rc) {
+			dev_err(rdev_to_dev(rdev),
+				"Failed to copy user response\n");
+			goto dbfail;
+		}
+	}
+
+	return &pd->ib_pd;
+dbfail:
+	(void)bnxt_qplib_dealloc_pd(&rdev->qplib_res, &rdev->qplib_res.pd_tbl,
+				    &pd->qplib_pd);
+fail:
+	kfree(pd);
+	return ERR_PTR(rc);
+}
+
+struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
+					   struct ib_udata *udata)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	struct bnxt_re_uctx_resp resp;
+	struct bnxt_re_ucontext *uctx;
+	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+	int rc;
+
+	dev_dbg(rdev_to_dev(rdev), "ABI version requested %d",
+		ibdev->uverbs_abi_ver);
+
+	if (ibdev->uverbs_abi_ver != BNXT_RE_ABI_VERSION) {
+		dev_dbg(rdev_to_dev(rdev), " is different from the device %d ",
+			BNXT_RE_ABI_VERSION);
+		return ERR_PTR(-EPERM);
+	}
+
+	uctx = kzalloc(sizeof(*uctx), GFP_KERNEL);
+	if (!uctx)
+		return ERR_PTR(-ENOMEM);
+
+	uctx->rdev = rdev;
+
+	uctx->shpg = (void *)__get_free_page(GFP_KERNEL);
+	if (!uctx->shpg) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	spin_lock_init(&uctx->sh_lock);
+
+	resp.dev_id = rdev->en_dev->pdev->devfn; /*Temp, Use idr_alloc instead*/
+	resp.max_qp = rdev->qplib_ctx.qpc_count;
+	resp.pg_size = PAGE_SIZE;
+	resp.cqe_sz = sizeof(struct cq_base);
+	resp.max_cqd = dev_attr->max_cq_wqes;
+	resp.rsvd    = 0;
+
+	rc = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to copy user context");
+		rc = -EFAULT;
+		goto cfail;
+	}
+
+	return &uctx->ib_uctx;
+cfail:
+	free_page((unsigned long)uctx->shpg);
+	uctx->shpg = NULL;
+fail:
+	kfree(uctx);
+	return ERR_PTR(rc);
+}
+
+int bnxt_re_dealloc_ucontext(struct ib_ucontext *ib_uctx)
+{
+	struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
+						   struct bnxt_re_ucontext,
+						   ib_uctx);
+	if (uctx->shpg)
+		free_page((unsigned long)uctx->shpg);
+	kfree(uctx);
+	return 0;
+}
+
+/* Helper function to mmap the virtual memory from user app */
+int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma)
+{
+	struct bnxt_re_ucontext *uctx = container_of(ib_uctx,
+						   struct bnxt_re_ucontext,
+						   ib_uctx);
+	struct bnxt_re_dev *rdev = uctx->rdev;
+	u64 pfn;
+
+	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
+		return -EINVAL;
+
+	if (vma->vm_pgoff) {
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		if (io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+				       PAGE_SIZE, vma->vm_page_prot)) {
+			dev_err(rdev_to_dev(rdev), "Failed to map DPI");
+			return -EAGAIN;
+		}
+	} else {
+		pfn = virt_to_phys(uctx->shpg) >> PAGE_SHIFT;
+		if (remap_pfn_range(vma, vma->vm_start,
+				    pfn, PAGE_SIZE, vma->vm_page_prot)) {
+			dev_err(rdev_to_dev(rdev),
+				"Failed to map shared page");
+			return -EAGAIN;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
index 9162774..a133d81 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
@@ -39,4 +39,27 @@
 #ifndef __BNXT_RE_IB_VERBS_H__
 #define __BNXT_RE_IB_VERBS_H__
 
+struct bnxt_re_pd {
+	struct bnxt_re_dev	*rdev;
+	struct ib_pd		ib_pd;
+	struct bnxt_qplib_pd	qplib_pd;
+	struct bnxt_qplib_dpi	dpi;
+};
+
+struct bnxt_re_ucontext {
+	struct bnxt_re_dev	*rdev;
+	struct ib_ucontext	ib_uctx;
+	struct bnxt_qplib_dpi	*dpi;
+	void			*shpg;
+	spinlock_t		sh_lock;	/* protect shpg */
+};
+
+struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
+			       struct ib_ucontext *context,
+			       struct ib_udata *udata);
+int bnxt_re_dealloc_pd(struct ib_pd *pd);
+struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
+					   struct ib_udata *udata);
+int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
+int bnxt_re_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
 #endif /* __BNXT_RE_IB_VERBS_H__ */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index 80d3f66..ec369dd 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -59,6 +59,7 @@
 #include "bnxt_qplib_fp.h"
 #include "bnxt_qplib_rcfw.h"
 #include "bnxt_re.h"
+#include "bnxt_re_ib_verbs.h"
 #include "bnxt.h"
 static char version[] =
 		BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
@@ -435,6 +436,12 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 
 	ibdev->num_comp_vectors	= 1;
 	ibdev->dma_device = &rdev->en_dev->pdev->dev;
+	ibdev->alloc_pd			= bnxt_re_alloc_pd;
+	ibdev->dealloc_pd		= bnxt_re_dealloc_pd;
+	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;
+	ibdev->dealloc_ucontext		= bnxt_re_dealloc_ucontext;
+	ibdev->mmap			= bnxt_re_mmap;
+
 	return ib_register_device(ibdev, NULL);
 }
 
diff --git a/include/uapi/rdma/bnxtre-abi.h b/include/uapi/rdma/bnxtre-abi.h
new file mode 100644
index 0000000..7998eff
--- /dev/null
+++ b/include/uapi/rdma/bnxtre-abi.h
@@ -0,0 +1,59 @@
+/*
+ * Broadcom NetXtreme-E RoCE driver.
+ *
+ * Copyright (c) 2016, Broadcom. All rights reserved.  The term
+ * Broadcom refers to Broadcom Limited and/or its subsidiaries.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Description: Uverbs ABI header file
+ */
+
+#ifndef __BNXT_RE_UVERBS_ABI_H__
+#define __BNXT_RE_UVERBS_ABI_H__
+
+#define BNXT_RE_ABI_VERSION	1
+
+struct bnxt_re_uctx_resp {
+	__u32 dev_id;
+	__u32 max_qp;
+	__u32 pg_size;
+	__u32 cqe_sz;
+	__u32 max_cqd;
+	__u32 rsvd;
+};
+
+struct bnxt_re_pd_resp {
+	__u32 pdid;
+	__u32 dpi;
+	__u64 dbr;
+};
+
+#endif /* __BNXT_RE_UVERBS_ABI_H__*/
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 07/21] bnxt_re: Support for query and modify device verbs
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

Implements the query device and modify device verbs

v3: Fix sparse warnings related to endianness checks

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c   | 17 +++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h   |  1 +
 drivers/infiniband/hw/bnxtre/bnxt_re.h          |  7 ++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c | 90 +++++++++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h |  6 ++
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c     |  2 +
 6 files changed, 123 insertions(+)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
index 3b602c0..7004f97 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
@@ -445,6 +445,23 @@ int bnxt_qplib_alloc_ctx(struct pci_dev *pdev,
 	return rc;
 }
 
+/* GUID */
+void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid)
+{
+	u8 mac[ETH_ALEN];
+
+	/* MAC-48 to EUI-64 mapping */
+	memcpy(mac, dev_addr, ETH_ALEN);
+	guid[0] = mac[0] ^ 2;
+	guid[1] = mac[1];
+	guid[2] = mac[2];
+	guid[3] = 0xff;
+	guid[4] = 0xfe;
+	guid[5] = mac[3];
+	guid[6] = mac[4];
+	guid[7] = mac[5];
+}
+
 static void bnxt_qplib_free_sgid_tbl(struct bnxt_qplib_res *res,
 				     struct bnxt_qplib_sgid_tbl *sgid_tbl)
 {
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
index c9e376a..fc9c38c 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
@@ -194,6 +194,7 @@ int bnxt_qplib_alloc_init_hwq(struct pci_dev *pdev, struct bnxt_qplib_hwq *hwq,
 			      struct scatterlist *sl, int nmap, u32 *elements,
 			      u32 elements_per_page, u32 aux, u32 pg_size,
 			      enum bnxt_qplib_hwq_type hwq_type);
+void bnxt_qplib_get_guid(u8 *dev_addr, u8 *guid);
 int bnxt_qplib_alloc_pd(struct bnxt_qplib_pd_tbl *pd_tbl,
 			struct bnxt_qplib_pd *pd);
 int bnxt_qplib_dealloc_pd(struct bnxt_qplib_res *res,
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re.h b/drivers/infiniband/hw/bnxtre/bnxt_re.h
index 8d2f42f..cac1173 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re.h
@@ -45,6 +45,13 @@
 #define BNXT_RE_REF_WAIT_COUNT		10
 #define BNXT_RE_DESC	"Broadcom NetXtreme-C/E RoCE Driver"
 
+#define BNXT_RE_PAGE_SIZE_4K		BIT(12)
+#define BNXT_RE_PAGE_SIZE_8K		BIT(13)
+#define BNXT_RE_PAGE_SIZE_64K		BIT(16)
+#define BNXT_RE_PAGE_SIZE_2M		BIT(21)
+#define BNXT_RE_PAGE_SIZE_8M		BIT(23)
+#define BNXT_RE_PAGE_SIZE_1G		BIT(30)
+
 #define BNXT_RE_MAX_QPC_COUNT		(64 * 1024)
 #define BNXT_RE_MAX_MRW_COUNT		(64 * 1024)
 #define BNXT_RE_MAX_SRQC_COUNT		(64 * 1024)
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
index e947a98..1c18f61 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
@@ -60,6 +60,96 @@
 #include "bnxt_re_ib_verbs.h"
 #include <rdma/bnxtre-abi.h>
 
+int bnxt_re_query_device(struct ib_device *ibdev,
+			 struct ib_device_attr *ib_attr,
+			 struct ib_udata *udata)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
+
+	memset(ib_attr, 0, sizeof(*ib_attr));
+
+	ib_attr->fw_ver = (u64)(unsigned long)(dev_attr->fw_ver);
+	bnxt_qplib_get_guid(rdev->netdev->dev_addr,
+			    (u8 *)&ib_attr->sys_image_guid);
+	ib_attr->max_mr_size = ~0ull;
+	ib_attr->page_size_cap = BNXT_RE_PAGE_SIZE_4K | BNXT_RE_PAGE_SIZE_8K |
+				 BNXT_RE_PAGE_SIZE_64K | BNXT_RE_PAGE_SIZE_2M |
+				 BNXT_RE_PAGE_SIZE_8M | BNXT_RE_PAGE_SIZE_1G;
+
+	ib_attr->vendor_id = rdev->en_dev->pdev->vendor;
+	ib_attr->vendor_part_id = rdev->en_dev->pdev->device;
+	ib_attr->hw_ver = rdev->en_dev->pdev->subsystem_device;
+	ib_attr->max_qp = dev_attr->max_qp;
+	ib_attr->max_qp_wr = dev_attr->max_qp_wqes;
+	ib_attr->device_cap_flags =
+				    IB_DEVICE_CURR_QP_STATE_MOD
+				    | IB_DEVICE_RC_RNR_NAK_GEN
+				    | IB_DEVICE_SHUTDOWN_PORT
+				    | IB_DEVICE_SYS_IMAGE_GUID
+				    | IB_DEVICE_LOCAL_DMA_LKEY
+				    | IB_DEVICE_RESIZE_MAX_WR
+				    | IB_DEVICE_PORT_ACTIVE_EVENT
+				    | IB_DEVICE_N_NOTIFY_CQ
+				    | IB_DEVICE_MEM_WINDOW
+				    | IB_DEVICE_MEM_WINDOW_TYPE_2B
+				    | IB_DEVICE_MEM_MGT_EXTENSIONS;
+	ib_attr->max_sge = dev_attr->max_qp_sges;
+	ib_attr->max_sge_rd = dev_attr->max_qp_sges;
+	ib_attr->max_cq = dev_attr->max_cq;
+	ib_attr->max_cqe = dev_attr->max_cq_wqes;
+	ib_attr->max_mr = dev_attr->max_mr;
+	ib_attr->max_pd = dev_attr->max_pd;
+	ib_attr->max_qp_rd_atom = dev_attr->max_qp_rd_atom;
+	ib_attr->max_qp_init_rd_atom = dev_attr->max_qp_rd_atom;
+	ib_attr->atomic_cap = IB_ATOMIC_HCA;
+	ib_attr->masked_atomic_cap = IB_ATOMIC_HCA;
+
+	ib_attr->max_ee_rd_atom = 0;
+	ib_attr->max_res_rd_atom = 0;
+	ib_attr->max_ee_init_rd_atom = 0;
+	ib_attr->max_ee = 0;
+	ib_attr->max_rdd = 0;
+	ib_attr->max_mw = dev_attr->max_mw;
+	ib_attr->max_raw_ipv6_qp = 0;
+	ib_attr->max_raw_ethy_qp = dev_attr->max_raw_ethy_qp;
+	ib_attr->max_mcast_grp = 0;
+	ib_attr->max_mcast_qp_attach = 0;
+	ib_attr->max_total_mcast_qp_attach = 0;
+	ib_attr->max_ah = dev_attr->max_ah;
+
+	ib_attr->max_fmr = dev_attr->max_fmr;
+	ib_attr->max_map_per_fmr = 1;	/* ? */
+
+	ib_attr->max_srq = dev_attr->max_srq;
+	ib_attr->max_srq_wr = dev_attr->max_srq_wqes;
+	ib_attr->max_srq_sge = dev_attr->max_srq_sges;
+
+	ib_attr->max_fast_reg_page_list_len = MAX_PBL_LVL_1_PGS;
+
+	ib_attr->max_pkeys = 1;
+	ib_attr->local_ca_ack_delay = 0;
+	return 0;
+}
+
+int bnxt_re_modify_device(struct ib_device *ibdev,
+			  int device_modify_mask,
+			  struct ib_device_modify *device_modify)
+{
+	switch (device_modify_mask) {
+	case IB_DEVICE_MODIFY_SYS_IMAGE_GUID:
+		/* Modify the GUID requires the modification of the GID table */
+		/* GUID should be made as READ-ONLY */
+		break;
+	case IB_DEVICE_MODIFY_NODE_DESC:
+		/* Node Desc should be made as READ-ONLY */
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
 /* Protection Domains */
 int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 {
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
index a133d81..45f9253 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
@@ -54,6 +54,12 @@ struct bnxt_re_ucontext {
 	spinlock_t		sh_lock;	/* protect shpg */
 };
 
+int bnxt_re_query_device(struct ib_device *ibdev,
+			 struct ib_device_attr *ib_attr,
+			 struct ib_udata *udata);
+int bnxt_re_modify_device(struct ib_device *ibdev,
+			  int device_modify_mask,
+			  struct ib_device_modify *device_modify);
 struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
 			       struct ib_ucontext *context,
 			       struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index ec369dd..ec3fce2 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -436,6 +436,8 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 
 	ibdev->num_comp_vectors	= 1;
 	ibdev->dma_device = &rdev->en_dev->pdev->dev;
+	ibdev->query_device		= bnxt_re_query_device;
+	ibdev->modify_device		= bnxt_re_modify_device;
 	ibdev->alloc_pd			= bnxt_re_alloc_pd;
 	ibdev->dealloc_pd		= bnxt_re_dealloc_pd;
 	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 09/21] bnxt_re: Support for GID related verbs
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

Implements add GID, del GID,  get_netdev and pkey related verbs.

v3: Fixes some sparse warning related to endianness check. Removes
    macros which are just wrapper for standard defines.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c   |   5 +
 drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h   |   3 +
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c    | 218 ++++++++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h    |  11 ++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c | 123 +++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h |  18 ++
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c     |   7 +
 7 files changed, 385 insertions(+)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
index 7004f97..d5677b2 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.c
@@ -507,6 +507,11 @@ static void bnxt_qplib_cleanup_sgid_tbl(struct bnxt_qplib_res *res,
 {
 	int i;
 
+	for (i = 0; i < sgid_tbl->max; i++) {
+		if (memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
+			   sizeof(bnxt_qplib_gid_zero)))
+			bnxt_qplib_del_sgid(sgid_tbl, &sgid_tbl->tbl[i], true);
+	}
 	memset(sgid_tbl->tbl, 0, sizeof(struct bnxt_qplib_gid) * sgid_tbl->max);
 	memset(sgid_tbl->hw_id, -1, sizeof(u16) * sgid_tbl->max);
 	sgid_tbl->active = 0;
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
index fc9c38c..6596ee2 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_res.h
@@ -186,6 +186,9 @@ struct bnxt_qplib_res {
 	struct bnxt_qplib_dpi_tbl	dpi_tbl;
 };
 
+#define to_bnxt_qplib(ptr, type, member)	\
+	container_of(ptr, type, member)
+
 struct bnxt_qplib_pd;
 struct bnxt_qplib_dev_attr;
 
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
index 103f9ab..2bdfaf0 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
@@ -46,6 +46,10 @@
 #include "bnxt_qplib_res.h"
 #include "bnxt_qplib_rcfw.h"
 #include "bnxt_qplib_sp.h"
+
+const struct bnxt_qplib_gid bnxt_qplib_gid_zero = {{ 0, 0, 0, 0, 0, 0, 0, 0,
+						     0, 0, 0, 0, 0, 0, 0, 0 } };
+
 /* Device */
 int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 			    struct bnxt_qplib_dev_attr *attr)
@@ -129,6 +133,220 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 	return 0;
 }
 
+/* SGID */
+int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+			struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
+			struct bnxt_qplib_gid *gid)
+{
+	if (index > sgid_tbl->max) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: Index %d exceeded SGID table max (%d)",
+			index, sgid_tbl->max);
+		return -EINVAL;
+	}
+	memcpy(gid, &sgid_tbl->tbl[index], sizeof(*gid));
+	return 0;
+}
+
+int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+			struct bnxt_qplib_gid *gid, bool update)
+{
+	struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
+						   struct bnxt_qplib_res,
+						   sgid_tbl);
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	int index;
+
+	if (!sgid_tbl) {
+		dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+		return -EINVAL;
+	}
+	/* Do we need a sgid_lock here? */
+	if (!sgid_tbl->active) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: SGID table has no active entries");
+		return -ENOMEM;
+	}
+	for (index = 0; index < sgid_tbl->max; index++) {
+		if (!memcmp(&sgid_tbl->tbl[index], gid, sizeof(*gid)))
+			break;
+	}
+	if (index == sgid_tbl->max) {
+		dev_warn(&res->pdev->dev, "GID not found in the SGID table");
+		return 0;
+	}
+	/* Remove GID from the SGID table */
+	if (update) {
+		struct cmdq_delete_gid req;
+		struct creq_delete_gid_resp *resp;
+		u16 cmd_flags = 0;
+
+		RCFW_CMD_PREP(req, DELETE_GID, cmd_flags);
+		if (sgid_tbl->hw_id[index] == 0xFFFF) {
+			dev_err(&res->pdev->dev,
+				"QPLIB: GID entry contains an invalid HW id");
+			return -EINVAL;
+		}
+		req.gid_index = cpu_to_le16(sgid_tbl->hw_id[index]);
+		resp = (struct creq_delete_gid_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL,
+						     0);
+		if (!resp) {
+			dev_err(&res->pdev->dev,
+				"QPLIB: SP: DELETE_GID send failed");
+			return -EINVAL;
+		}
+		if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
+						   le16_to_cpu(req.cookie))) {
+			/* Cmd timed out */
+			dev_err(&res->pdev->dev,
+				"QPLIB: SP: DELETE_GID timed out");
+			return -ETIMEDOUT;
+		}
+		if (resp->status ||
+		    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+			dev_err(&res->pdev->dev,
+				"QPLIB: SP: DELETE_GID failed ");
+			dev_err(&res->pdev->dev,
+				"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+				resp->status, le16_to_cpu(req.cookie),
+				le16_to_cpu(resp->cookie));
+			return -EINVAL;
+		}
+	}
+	memcpy(&sgid_tbl->tbl[index], &bnxt_qplib_gid_zero,
+	       sizeof(bnxt_qplib_gid_zero));
+	sgid_tbl->active--;
+	dev_dbg(&res->pdev->dev,
+		"QPLIB: SGID deleted hw_id[0x%x] = 0x%x active = 0x%x",
+		 index, sgid_tbl->hw_id[index], sgid_tbl->active);
+	sgid_tbl->hw_id[index] = (u16)-1;
+
+	/* unlock */
+	return 0;
+}
+
+int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+			struct bnxt_qplib_gid *gid, u8 *smac, u16 vlan_id,
+			bool update, u32 *index)
+{
+	struct bnxt_qplib_res *res = to_bnxt_qplib(sgid_tbl,
+						   struct bnxt_qplib_res,
+						   sgid_tbl);
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	int i, free_idx, rc = 0;
+
+	if (!sgid_tbl) {
+		dev_err(&res->pdev->dev, "QPLIB: SGID table not allocated");
+		return -EINVAL;
+	}
+	/* Do we need a sgid_lock here? */
+	if (sgid_tbl->active == sgid_tbl->max) {
+		dev_err(&res->pdev->dev, "QPLIB: SGID table is full");
+		return -ENOMEM;
+	}
+	free_idx = sgid_tbl->max;
+	for (i = 0; i < sgid_tbl->max; i++) {
+		if (!memcmp(&sgid_tbl->tbl[i], gid, sizeof(*gid))) {
+			dev_dbg(&res->pdev->dev,
+				"QPLIB: SGID entry already exist in entry %d!",
+				i);
+			*index = i;
+			return -EALREADY;
+		} else if (!memcmp(&sgid_tbl->tbl[i], &bnxt_qplib_gid_zero,
+				   sizeof(bnxt_qplib_gid_zero)) &&
+			   free_idx == sgid_tbl->max) {
+			free_idx = i;
+		}
+	}
+	if (free_idx == sgid_tbl->max) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: SGID table is FULL but count is not MAX??");
+		return -ENOMEM;
+	}
+	if (update) {
+		struct cmdq_add_gid req;
+		struct creq_add_gid_resp *resp;
+		u16 cmd_flags = 0;
+		u32 temp32[4];
+		u16 temp16[3];
+
+		RCFW_CMD_PREP(req, ADD_GID, cmd_flags);
+
+		memcpy(temp32, gid->data, sizeof(struct bnxt_qplib_gid));
+		req.gid[0] = cpu_to_be32(temp32[3]);
+		req.gid[1] = cpu_to_be32(temp32[2]);
+		req.gid[2] = cpu_to_be32(temp32[1]);
+		req.gid[3] = cpu_to_be32(temp32[0]);
+		if (vlan_id != 0xFFFF)
+			req.vlan = cpu_to_le16((vlan_id &
+					CMDQ_ADD_GID_VLAN_VLAN_ID_MASK) |
+					CMDQ_ADD_GID_VLAN_TPID_TPID_8100 |
+					CMDQ_ADD_GID_VLAN_VLAN_EN);
+
+		/* MAC in network format */
+		memcpy(temp16, smac, 6);
+		req.src_mac[0] = cpu_to_be16(temp16[0]);
+		req.src_mac[1] = cpu_to_be16(temp16[1]);
+		req.src_mac[2] = cpu_to_be16(temp16[2]);
+
+		resp = (struct creq_add_gid_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+		if (!resp) {
+			dev_err(&res->pdev->dev,
+				"QPLIB: SP: ADD_GID send failed");
+			return -EINVAL;
+		}
+		if (!bnxt_qplib_rcfw_wait_for_resp(rcfw,
+						   le16_to_cpu(req.cookie))) {
+			/* Cmd timed out */
+			dev_err(&res->pdev->dev,
+				"QPIB: SP: ADD_GID timed out");
+			return -ETIMEDOUT;
+		}
+		if (resp->status ||
+		    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+			dev_err(&res->pdev->dev, "QPLIB: SP: ADD_GID failed ");
+			dev_err(&res->pdev->dev,
+				"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+				resp->status, le16_to_cpu(req.cookie),
+				le16_to_cpu(resp->cookie));
+			return -EINVAL;
+		}
+		sgid_tbl->hw_id[free_idx] = le32_to_cpu(resp->xid);
+	}
+	/* Add GID to the sgid_tbl */
+	memcpy(&sgid_tbl->tbl[free_idx], gid, sizeof(*gid));
+	sgid_tbl->active++;
+	dev_dbg(&res->pdev->dev,
+		"QPLIB: SGID added hw_id[0x%x] = 0x%x active = 0x%x",
+		 free_idx, sgid_tbl->hw_id[free_idx], sgid_tbl->active);
+
+	*index = free_idx;
+	/* unlock */
+	return rc;
+}
+
+/* pkeys */
+int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
+			struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
+			u16 *pkey)
+{
+	if (index == 0xFFFF) {
+		*pkey = 0xFFFF;
+		return 0;
+	}
+	if (index > pkey_tbl->max) {
+		dev_err(&res->pdev->dev,
+			"QPLIB: Index %d exceeded PKEY table max (%d)",
+			index, pkey_tbl->max);
+		return -EINVAL;
+	}
+	memcpy(pkey, &pkey_tbl->tbl[index], sizeof(*pkey));
+	return 0;
+}
+
 int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
 			struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
 			bool update)
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
index 7de6600..8e20924 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
@@ -78,6 +78,17 @@ struct bnxt_qplib_gid {
 	u8				data[16];
 };
 
+int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
+			struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
+			struct bnxt_qplib_gid *gid);
+int bnxt_qplib_del_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+			struct bnxt_qplib_gid *gid, bool update);
+int bnxt_qplib_add_sgid(struct bnxt_qplib_sgid_tbl *sgid_tbl,
+			struct bnxt_qplib_gid *gid, u8 *mac, u16 vlan_id,
+			bool update, u32 *index);
+int bnxt_qplib_get_pkey(struct bnxt_qplib_res *res,
+			struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 index,
+			u16 *pkey);
 int bnxt_qplib_del_pkey(struct bnxt_qplib_res *res,
 			struct bnxt_qplib_pkey_tbl *pkey_tbl, u16 *pkey,
 			bool update);
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
index ede0e54..5185487 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
@@ -60,6 +60,22 @@
 #include "bnxt_re_ib_verbs.h"
 #include <rdma/bnxtre-abi.h>
 
+/* Device */
+struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	struct net_device *netdev = NULL;
+
+	rcu_read_lock();
+	if (rdev)
+		netdev = rdev->netdev;
+	if (netdev)
+		dev_hold(netdev);
+
+	rcu_read_unlock();
+	return netdev;
+}
+
 int bnxt_re_query_device(struct ib_device *ibdev,
 			 struct ib_device_attr *ib_attr,
 			 struct ib_udata *udata)
@@ -272,6 +288,113 @@ int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
 	immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 	return 0;
 }
+
+int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
+		       u16 index, u16 *pkey)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+
+	/* Ignore port_num */
+
+	memset(pkey, 0, sizeof(*pkey));
+	return bnxt_qplib_get_pkey(&rdev->qplib_res,
+				   &rdev->qplib_res.pkey_tbl, index, pkey);
+}
+
+int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
+		      int index, union ib_gid *gid)
+{
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	int rc = 0;
+
+	/* Ignore port_num */
+	memset(gid, 0, sizeof(*gid));
+	rc = bnxt_qplib_get_sgid(&rdev->qplib_res,
+				 &rdev->qplib_res.sgid_tbl, index,
+				 (struct bnxt_qplib_gid *)gid);
+	return rc;
+}
+
+int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
+		    unsigned int index, void **context)
+{
+	int rc = 0;
+	struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+
+	/* Delete the entry from the hardware */
+	ctx = *context;
+	if (!ctx)
+		return -EINVAL;
+
+	if (sgid_tbl && sgid_tbl->active) {
+		if (ctx->idx >= sgid_tbl->max)
+			return -EINVAL;
+		ctx->refcnt--;
+		if (!ctx->refcnt) {
+			rc = bnxt_qplib_del_sgid
+					(sgid_tbl,
+					 &sgid_tbl->tbl[ctx->idx], true);
+			if (rc)
+				dev_err(rdev_to_dev(rdev),
+					"Failed to remove GID: %#x", rc);
+			ctx_tbl = sgid_tbl->ctx;
+			ctx_tbl[ctx->idx] = NULL;
+			kfree(ctx);
+		}
+	} else {
+		return -EINVAL;
+	}
+	return rc;
+}
+
+int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
+		    unsigned int index, const union ib_gid *gid,
+		    const struct ib_gid_attr *attr, void **context)
+{
+	int rc;
+	u32 tbl_idx = 0;
+	u16 vlan_id = 0xFFFF;
+	struct bnxt_re_gid_ctx *ctx, **ctx_tbl;
+	struct bnxt_re_dev *rdev = to_bnxt_re_dev(ibdev, ibdev);
+	struct bnxt_qplib_sgid_tbl *sgid_tbl = &rdev->qplib_res.sgid_tbl;
+
+	if ((attr->ndev) && is_vlan_dev(attr->ndev))
+		vlan_id = vlan_dev_vlan_id(attr->ndev);
+
+	rc = bnxt_qplib_add_sgid(sgid_tbl, (struct bnxt_qplib_gid *)gid,
+				 rdev->qplib_res.netdev->dev_addr,
+				 vlan_id, true, &tbl_idx);
+	if (rc == -EALREADY) {
+		ctx_tbl = sgid_tbl->ctx;
+		ctx_tbl[tbl_idx]->refcnt++;
+		*context = ctx_tbl[tbl_idx];
+		return 0;
+	}
+
+	if (rc < 0) {
+		dev_err(rdev_to_dev(rdev), "Failed to add GID: %#x", rc);
+		return rc;
+	}
+
+	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+	ctx_tbl = sgid_tbl->ctx;
+	ctx->idx = tbl_idx;
+	ctx->refcnt = 1;
+	ctx_tbl[tbl_idx] = ctx;
+
+	return rc;
+}
+
+enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
+					    u8 port_num)
+{
+	return IB_LINK_LAYER_ETHERNET;
+}
+
 /* Protection Domains */
 int bnxt_re_dealloc_pd(struct ib_pd *ib_pd)
 {
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
index 255a0ad..c10fd63 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
@@ -39,6 +39,11 @@
 #ifndef __BNXT_RE_IB_VERBS_H__
 #define __BNXT_RE_IB_VERBS_H__
 
+struct bnxt_re_gid_ctx {
+	u32			idx;
+	u32			refcnt;
+};
+
 struct bnxt_re_pd {
 	struct bnxt_re_dev	*rdev;
 	struct ib_pd		ib_pd;
@@ -54,6 +59,8 @@ struct bnxt_re_ucontext {
 	spinlock_t		sh_lock;	/* protect shpg */
 };
 
+struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num);
+
 int bnxt_re_query_device(struct ib_device *ibdev,
 			 struct ib_device_attr *ib_attr,
 			 struct ib_udata *udata);
@@ -67,6 +74,17 @@ int bnxt_re_modify_port(struct ib_device *ibdev, u8 port_num,
 			struct ib_port_modify *port_modify);
 int bnxt_re_get_port_immutable(struct ib_device *ibdev, u8 port_num,
 			       struct ib_port_immutable *immutable);
+int bnxt_re_query_pkey(struct ib_device *ibdev, u8 port_num,
+		       u16 index, u16 *pkey);
+int bnxt_re_del_gid(struct ib_device *ibdev, u8 port_num,
+		    unsigned int index, void **context);
+int bnxt_re_add_gid(struct ib_device *ibdev, u8 port_num,
+		    unsigned int index, const union ib_gid *gid,
+		    const struct ib_gid_attr *attr, void **context);
+int bnxt_re_query_gid(struct ib_device *ibdev, u8 port_num,
+		      int index, union ib_gid *gid);
+enum rdma_link_layer bnxt_re_get_link_layer(struct ib_device *ibdev,
+					    u8 port_num);
 struct ib_pd *bnxt_re_alloc_pd(struct ib_device *ibdev,
 			       struct ib_ucontext *context,
 			       struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index afadf21..8893ebf 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -442,6 +442,13 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->query_port		= bnxt_re_query_port;
 	ibdev->modify_port		= bnxt_re_modify_port;
 	ibdev->get_port_immutable	= bnxt_re_get_port_immutable;
+	ibdev->query_pkey		= bnxt_re_query_pkey;
+	ibdev->query_gid		= bnxt_re_query_gid;
+	ibdev->get_netdev		= bnxt_re_get_netdev;
+	ibdev->add_gid			= bnxt_re_add_gid;
+	ibdev->del_gid			= bnxt_re_del_gid;
+	ibdev->get_link_layer		= bnxt_re_get_link_layer;
+
 	ibdev->alloc_pd			= bnxt_re_alloc_pd;
 	ibdev->dealloc_pd		= bnxt_re_dealloc_pd;
 	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 12/21] bnxt_re: Support memory registration verbs
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

This patch implements the kernel and user memory region registration
supported by the bnxt_re driver.
This includes the user MR, FRMR, FMR and DMA MR support.

v3: Moves bnxt_qplib_map_tc2cos to DCB patch as the functionality is used
    by DCB patch. Also, removed some unwanted macros.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c    | 289 ++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h    |  41 +++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c | 376 ++++++++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h |  44 +++
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c     |  11 +
 5 files changed, 761 insertions(+)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
index 132dd29..4f77af8 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
@@ -510,3 +510,292 @@ int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah)
 	}
 	return 0;
 }
+
+/* MRW */
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_deallocate_key req;
+	struct creq_deallocate_key_resp *resp;
+	u16 cmd_flags = 0;
+
+	if (mrw->lkey == 0xFFFFFFFF) {
+		dev_info(&res->pdev->dev,
+			 "QPLIB: SP: Free a reserved lkey MRW");
+		return 0;
+	}
+
+	RCFW_CMD_PREP(req, DEALLOCATE_KEY, cmd_flags);
+
+	req.mrw_flags = mrw->type;
+
+	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1)  ||
+	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+		req.key = cpu_to_le32(mrw->rkey);
+	else
+		req.key = cpu_to_le32(mrw->lkey);
+
+	resp = (struct creq_deallocate_key_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: FREE_MR failed ");
+		dev_err(&res->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+	/* Free the qplib's MRW memory */
+	if (mrw->hwq.max_elements)
+		bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+
+	return 0;
+}
+
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_allocate_mrw req;
+	struct creq_allocate_mrw_resp *resp;
+	u16 cmd_flags = 0;
+	unsigned long tmp;
+
+	RCFW_CMD_PREP(req, ALLOCATE_MRW, cmd_flags);
+
+	req.pd_id = cpu_to_le32(mrw->pd->id);
+	req.mrw_flags = mrw->type;
+	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR &&
+	     mrw->flags & BNXT_QPLIB_FR_PMR) ||
+	    mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A ||
+	    mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B)
+		req.access = CMDQ_ALLOCATE_MRW_ACCESS_CONSUMER_OWNED_KEY;
+	tmp = (unsigned long)mrw;
+	req.mrw_handle = cpu_to_le64(tmp);
+
+	resp = (struct creq_allocate_mrw_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, 0);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW send failed");
+		return -EINVAL;
+	}
+	if (!bnxt_qplib_rcfw_wait_for_resp(rcfw, le16_to_cpu(req.cookie))) {
+		/* Cmd timed out */
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: ALLOC_MRW failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+	if ((mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE1)  ||
+	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2A) ||
+	    (mrw->type == CMDQ_ALLOCATE_MRW_MRW_FLAGS_MW_TYPE2B))
+		mrw->rkey = le32_to_cpu(resp->xid);
+	else
+		mrw->lkey = le32_to_cpu(resp->xid);
+	return 0;
+}
+
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+			 bool block)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_deregister_mr req;
+	struct creq_deregister_mr_resp *resp;
+	u16 cmd_flags = 0;
+	int rc;
+
+	RCFW_CMD_PREP(req, DEREGISTER_MR, cmd_flags);
+
+	req.lkey = cpu_to_le32(mrw->lkey);
+	resp = (struct creq_deregister_mr_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, block);
+	if (!resp) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR send failed");
+		return -EINVAL;
+	}
+	if (block)
+		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
+						    le16_to_cpu(req.cookie));
+	else
+		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
+						   le16_to_cpu(req.cookie));
+	if (!rc) {
+		/* Cmd timed out */
+		dev_err(&res->pdev->dev, "QPLIB: SP: DEREG_MR timed out");
+		return -ETIMEDOUT;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&rcfw->pdev->dev, "QPLIB: SP: DEREG_MR failed ");
+		dev_err(&rcfw->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+
+	/* Free the qplib's MR memory */
+	if (mrw->hwq.max_elements) {
+		mrw->va = 0;
+		mrw->total_size = 0;
+		bnxt_qplib_free_hwq(res->pdev, &mrw->hwq);
+	}
+
+	return 0;
+}
+
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+		      u64 *pbl_tbl, int num_pbls, bool block)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_register_mr req;
+	struct creq_register_mr_resp *resp;
+	u16 cmd_flags = 0, level;
+	int pg_ptrs, pages, i, rc;
+	dma_addr_t **pbl_ptr;
+	u32 pg_size;
+
+	if (num_pbls) {
+		pg_ptrs = roundup_pow_of_two(num_pbls);
+		pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+		if (!pages)
+			pages++;
+
+		if (pages > MAX_PBL_LVL_1_PGS) {
+			dev_err(&res->pdev->dev, "QPLIB: SP: Reg MR pages ");
+			dev_err(&res->pdev->dev,
+				"requested (0x%x) exceeded max (0x%x)",
+				pages, MAX_PBL_LVL_1_PGS);
+			return -ENOMEM;
+		}
+		/* Free the hwq if it already exist, must be a rereg */
+		if (mr->hwq.max_elements)
+			bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+
+		mr->hwq.max_elements = pages;
+		rc = bnxt_qplib_alloc_init_hwq(res->pdev, &mr->hwq, NULL, 0,
+					       &mr->hwq.max_elements,
+					       PAGE_SIZE, 0, PAGE_SIZE,
+					       HWQ_TYPE_CTX);
+		if (rc) {
+			dev_err(&res->pdev->dev,
+				"SP: Reg MR memory allocation failed");
+			return -ENOMEM;
+		}
+		/* Write to the hwq */
+		pbl_ptr = (dma_addr_t **)mr->hwq.pbl_ptr;
+		for (i = 0; i < num_pbls; i++)
+			pbl_ptr[PTR_PG(i)][PTR_IDX(i)] =
+				(pbl_tbl[i] & PAGE_MASK) | PTU_PTE_VALID;
+	}
+
+	RCFW_CMD_PREP(req, REGISTER_MR, cmd_flags);
+
+	/* Configure the request */
+	if (mr->hwq.level == PBL_LVL_MAX) {
+		level = 0;
+		req.pbl = 0;
+		pg_size = PAGE_SIZE;
+	} else {
+		level = mr->hwq.level + 1;
+		req.pbl = cpu_to_le64(mr->hwq.pbl[PBL_LVL_0].pg_map_arr[0]);
+		pg_size = mr->hwq.pbl[PBL_LVL_0].pg_size;
+	}
+	req.log2_pg_size_lvl = (level << CMDQ_REGISTER_MR_LVL_SFT) |
+			       ((ilog2(pg_size) <<
+				 CMDQ_REGISTER_MR_LOG2_PG_SIZE_SFT) &
+				CMDQ_REGISTER_MR_LOG2_PG_SIZE_MASK);
+	req.access = (mr->flags & 0xFFFF);
+	req.va = cpu_to_le64(mr->va);
+	req.key = cpu_to_le32(mr->lkey);
+	req.mr_size = cpu_to_le64(mr->total_size);
+
+	resp = (struct creq_register_mr_resp *)
+			bnxt_qplib_rcfw_send_message(rcfw, (void *)&req,
+						     NULL, block);
+	if (!resp) {
+		dev_err(&res->pdev->dev, "SP: REG_MR send failed");
+		rc = -EINVAL;
+		goto fail;
+	}
+	if (block)
+		rc = bnxt_qplib_rcfw_block_for_resp(rcfw,
+						    le16_to_cpu(req.cookie));
+	else
+		rc = bnxt_qplib_rcfw_wait_for_resp(rcfw,
+						   le16_to_cpu(req.cookie));
+	if (!rc) {
+		/* Cmd timed out */
+		dev_err(&res->pdev->dev, "SP: REG_MR timed out");
+		rc = -ETIMEDOUT;
+		goto fail;
+	}
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: REG_MR failed ");
+		dev_err(&res->pdev->dev,
+			"QPLIB: SP: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		rc = -EINVAL;
+		goto fail;
+	}
+	return 0;
+
+fail:
+	if (mr->hwq.max_elements)
+		bnxt_qplib_free_hwq(res->pdev, &mr->hwq);
+	return rc;
+}
+
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+					struct bnxt_qplib_frpl *frpl,
+					int max_pg_ptrs)
+{
+	int pg_ptrs, pages, rc;
+
+	/* Re-calculate the max to fit the HWQ allocation model */
+	pg_ptrs = roundup_pow_of_two(max_pg_ptrs);
+	pages = pg_ptrs >> MAX_PBL_LVL_1_PGS_SHIFT;
+	if (!pages)
+		pages++;
+
+	if (pages > MAX_PBL_LVL_1_PGS)
+		return -ENOMEM;
+
+	frpl->hwq.max_elements = pages;
+	rc = bnxt_qplib_alloc_init_hwq(res->pdev, &frpl->hwq, NULL, 0,
+				       &frpl->hwq.max_elements, PAGE_SIZE, 0,
+				       PAGE_SIZE, HWQ_TYPE_CTX);
+	if (!rc)
+		frpl->max_pg_ptrs = pg_ptrs;
+
+	return rc;
+}
+
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+				       struct bnxt_qplib_frpl *frpl)
+{
+	bnxt_qplib_free_hwq(res->pdev, &frpl->hwq);
+	return 0;
+}
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
index 26eac17..3358f6d 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
@@ -94,6 +94,34 @@ struct bnxt_qplib_ah {
 	u8				nw_type;
 };
 
+struct bnxt_qplib_mrw {
+	struct bnxt_qplib_pd		*pd;
+	int				type;
+	u32				flags;
+#define BNXT_QPLIB_FR_PMR		0x80000000
+	u32				lkey;
+	u32				rkey;
+#define BNXT_QPLIB_RSVD_LKEY		0xFFFFFFFF
+	u64				va;
+	u64				total_size;
+	u32				npages;
+	u64				mr_handle;
+	struct bnxt_qplib_hwq		hwq;
+};
+
+struct bnxt_qplib_frpl {
+	int				max_pg_ptrs;
+	struct bnxt_qplib_hwq		hwq;
+};
+
+#define BNXT_QPLIB_ACCESS_LOCAL_WRITE	BIT(0)
+#define BNXT_QPLIB_ACCESS_REMOTE_READ	BIT(1)
+#define BNXT_QPLIB_ACCESS_REMOTE_WRITE	BIT(2)
+#define BNXT_QPLIB_ACCESS_REMOTE_ATOMIC	BIT(3)
+#define BNXT_QPLIB_ACCESS_MW_BIND	BIT(4)
+#define BNXT_QPLIB_ACCESS_ZERO_BASED	BIT(5)
+#define BNXT_QPLIB_ACCESS_ON_DEMAND	BIT(6)
+
 int bnxt_qplib_get_sgid(struct bnxt_qplib_res *res,
 			struct bnxt_qplib_sgid_tbl *sgid_tbl, int index,
 			struct bnxt_qplib_gid *gid);
@@ -115,4 +143,17 @@ int bnxt_qplib_get_dev_attr(struct bnxt_qplib_rcfw *rcfw,
 			    struct bnxt_qplib_dev_attr *attr);
 int bnxt_qplib_create_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
 int bnxt_qplib_destroy_ah(struct bnxt_qplib_res *res, struct bnxt_qplib_ah *ah);
+int bnxt_qplib_alloc_mrw(struct bnxt_qplib_res *res,
+			 struct bnxt_qplib_mrw *mrw);
+int bnxt_qplib_dereg_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mrw,
+			 bool block);
+int bnxt_qplib_reg_mr(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr,
+		      u64 *pbl_tbl, int num_pbls, bool block);
+int bnxt_qplib_free_mrw(struct bnxt_qplib_res *res, struct bnxt_qplib_mrw *mr);
+int bnxt_qplib_alloc_fast_reg_mr(struct bnxt_qplib_res *res,
+				 struct bnxt_qplib_mrw *mr, int max);
+int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
+					struct bnxt_qplib_frpl *frpl, int max);
+int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
+				       struct bnxt_qplib_frpl *frpl);
 #endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
index c085dbd..9efa12f 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
@@ -639,6 +639,48 @@ int bnxt_re_query_ah(struct ib_ah *ib_ah, struct ib_ah_attr *ah_attr)
 	return 0;
 }
 
+static int __from_ib_access_flags(int iflags)
+{
+	int qflags = 0;
+
+	if (iflags & IB_ACCESS_LOCAL_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_LOCAL_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_READ)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_READ;
+	if (iflags & IB_ACCESS_REMOTE_WRITE)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_WRITE;
+	if (iflags & IB_ACCESS_REMOTE_ATOMIC)
+		qflags |= BNXT_QPLIB_ACCESS_REMOTE_ATOMIC;
+	if (iflags & IB_ACCESS_MW_BIND)
+		qflags |= BNXT_QPLIB_ACCESS_MW_BIND;
+	if (iflags & IB_ZERO_BASED)
+		qflags |= BNXT_QPLIB_ACCESS_ZERO_BASED;
+	if (iflags & IB_ACCESS_ON_DEMAND)
+		qflags |= BNXT_QPLIB_ACCESS_ON_DEMAND;
+	return qflags;
+};
+
+static enum ib_access_flags __to_ib_access_flags(int qflags)
+{
+	enum ib_access_flags iflags = 0;
+
+	if (qflags & BNXT_QPLIB_ACCESS_LOCAL_WRITE)
+		iflags |= IB_ACCESS_LOCAL_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_WRITE)
+		iflags |= IB_ACCESS_REMOTE_WRITE;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_READ)
+		iflags |= IB_ACCESS_REMOTE_READ;
+	if (qflags & BNXT_QPLIB_ACCESS_REMOTE_ATOMIC)
+		iflags |= IB_ACCESS_REMOTE_ATOMIC;
+	if (qflags & BNXT_QPLIB_ACCESS_MW_BIND)
+		iflags |= IB_ACCESS_MW_BIND;
+	if (qflags & BNXT_QPLIB_ACCESS_ZERO_BASED)
+		iflags |= IB_ZERO_BASED;
+	if (qflags & BNXT_QPLIB_ACCESS_ON_DEMAND)
+		iflags |= IB_ACCESS_ON_DEMAND;
+	return iflags;
+};
+
 /* Completion Queues */
 int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
 {
@@ -785,6 +827,340 @@ int bnxt_re_req_notify_cq(struct ib_cq *ib_cq,
 	return 0;
 }
 
+/* Memory Regions */
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *ib_pd, int mr_access_flags)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_mr *mr;
+	u64 pbl = 0;
+	int rc;
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+	/* Allocate and register 0 as the address */
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc)
+		goto fail;
+
+	mr->qplib_mr.hwq.level = PBL_LVL_MAX;
+	mr->qplib_mr.total_size = -1; /* Infinte length */
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, &pbl, 0, false);
+	if (rc)
+		goto fail_mr;
+
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	if (mr_access_flags & (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ |
+			       IB_ACCESS_REMOTE_ATOMIC))
+		mr->ib_mr.rkey = mr->ib_mr.lkey;
+	atomic_inc(&rdev->mr_count);
+
+	return &mr->ib_mr;
+
+fail_mr:
+	bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+	kfree(mr);
+	return ERR_PTR(rc);
+}
+
+int bnxt_re_dereg_mr(struct ib_mr *ib_mr)
+{
+	struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+	struct bnxt_re_dev *rdev = mr->rdev;
+	int rc = 0;
+
+	if (mr->npages && mr->pages) {
+		rc = bnxt_qplib_free_fast_reg_page_list(&rdev->qplib_res,
+							&mr->qplib_frpl);
+		kfree(mr->pages);
+		mr->npages = 0;
+		mr->pages = NULL;
+	}
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+
+	if (!IS_ERR(mr->ib_umem) && mr->ib_umem)
+		ib_umem_release(mr->ib_umem);
+
+	kfree(mr);
+	atomic_dec(&rdev->mr_count);
+	return rc;
+}
+
+static int bnxt_re_set_page(struct ib_mr *ib_mr, u64 addr)
+{
+	struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+	if (unlikely(mr->npages == mr->qplib_frpl.max_pg_ptrs))
+		return -ENOMEM;
+
+	mr->pages[mr->npages++] = addr;
+	return 0;
+}
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+		      unsigned int *sg_offset)
+{
+	struct bnxt_re_mr *mr = container_of(ib_mr, struct bnxt_re_mr, ib_mr);
+
+	mr->npages = 0;
+	return ib_sg_to_pages(ib_mr, sg, sg_nents, sg_offset, bnxt_re_set_page);
+}
+
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type type,
+			       u32 max_num_sg)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_mr *mr = NULL;
+	int rc;
+
+	if (type != IB_MR_TYPE_MEM_REG) {
+		dev_dbg(rdev_to_dev(rdev), "MR type 0x%x not supported", type);
+		return ERR_PTR(-EINVAL);
+	}
+	if (max_num_sg > MAX_PBL_LVL_1_PGS)
+		return ERR_PTR(-EINVAL);
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.flags = BNXT_QPLIB_FR_PMR;
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc)
+		goto fail;
+
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	mr->ib_mr.rkey = mr->ib_mr.lkey;
+
+	mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
+	if (!mr->pages) {
+		rc = -ENOMEM;
+		goto fail;
+	}
+	rc = bnxt_qplib_alloc_fast_reg_page_list(&rdev->qplib_res,
+						 &mr->qplib_frpl, max_num_sg);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev),
+			"Failed to allocate HW FR page list");
+		goto fail_mr;
+	}
+
+	atomic_inc(&rdev->mr_count);
+	return &mr->ib_mr;
+
+fail_mr:
+	bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+fail:
+	kfree(mr->pages);
+	kfree(mr);
+	return ERR_PTR(rc);
+}
+
+/* Fast Memory Regions */
+struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *ib_pd, int mr_access_flags,
+				 struct ib_fmr_attr *fmr_attr)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_fmr *fmr;
+	int rc;
+
+	if (fmr_attr->max_pages > MAX_PBL_LVL_2_PGS ||
+	    fmr_attr->max_maps > rdev->dev_attr.max_map_per_fmr) {
+		dev_err(rdev_to_dev(rdev), "Allocate FMR exceeded Max limit");
+		return ERR_PTR(-ENOMEM);
+	}
+	fmr = kzalloc(sizeof(*fmr), GFP_KERNEL);
+	if (!fmr)
+		return ERR_PTR(-ENOMEM);
+
+	fmr->rdev = rdev;
+	fmr->qplib_fmr.pd = &pd->qplib_pd;
+	fmr->qplib_fmr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_PMR;
+
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
+	if (rc)
+		goto fail;
+
+	fmr->qplib_fmr.flags = __from_ib_access_flags(mr_access_flags);
+	fmr->ib_fmr.lkey = fmr->qplib_fmr.lkey;
+	fmr->ib_fmr.rkey = fmr->ib_fmr.lkey;
+
+	atomic_inc(&rdev->mr_count);
+	return &fmr->ib_fmr;
+fail:
+	kfree(fmr);
+	return ERR_PTR(rc);
+}
+
+int bnxt_re_map_phys_fmr(struct ib_fmr *ib_fmr, u64 *page_list, int list_len,
+			 u64 iova)
+{
+	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
+					     ib_fmr);
+	struct bnxt_re_dev *rdev = fmr->rdev;
+	int rc;
+
+	fmr->qplib_fmr.va = iova;
+	fmr->qplib_fmr.total_size = list_len * PAGE_SIZE;
+
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &fmr->qplib_fmr, page_list,
+			       list_len, true);
+	if (rc)
+		dev_err(rdev_to_dev(rdev), "Failed to map FMR for lkey = 0x%x!",
+			fmr->ib_fmr.lkey);
+	return rc;
+}
+
+int bnxt_re_unmap_fmr(struct list_head *fmr_list)
+{
+	struct bnxt_re_dev *rdev;
+	struct bnxt_re_fmr *fmr;
+	struct ib_fmr *ib_fmr;
+	int rc = 0;
+
+	/* Validate each FMRs inside the fmr_list */
+	list_for_each_entry(ib_fmr, fmr_list, list) {
+		fmr = container_of(ib_fmr, struct bnxt_re_fmr, ib_fmr);
+		rdev = fmr->rdev;
+
+		if (rdev) {
+			rc = bnxt_qplib_dereg_mrw(&rdev->qplib_res,
+						  &fmr->qplib_fmr, true);
+			if (rc)
+				break;
+		}
+	}
+	return rc;
+}
+
+int bnxt_re_dealloc_fmr(struct ib_fmr *ib_fmr)
+{
+	struct bnxt_re_fmr *fmr = container_of(ib_fmr, struct bnxt_re_fmr,
+					       ib_fmr);
+	struct bnxt_re_dev *rdev = fmr->rdev;
+	int rc;
+
+	rc = bnxt_qplib_free_mrw(&rdev->qplib_res, &fmr->qplib_fmr);
+	if (rc)
+		dev_err(rdev_to_dev(rdev), "Failed to free FMR");
+
+	kfree(fmr);
+	atomic_dec(&rdev->mr_count);
+	return rc;
+}
+
+/* uverbs */
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *ib_pd, u64 start, u64 length,
+				  u64 virt_addr, int mr_access_flags,
+				  struct ib_udata *udata)
+{
+	struct bnxt_re_pd *pd = container_of(ib_pd, struct bnxt_re_pd, ib_pd);
+	struct bnxt_re_dev *rdev = pd->rdev;
+	struct bnxt_re_mr *mr;
+	struct ib_umem *umem;
+	u64 *pbl_tbl, *pbl_tbl_orig;
+	int i, umem_pgs, pages, page_shift, rc;
+	struct scatterlist *sg;
+	int entry;
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->rdev = rdev;
+	mr->qplib_mr.pd = &pd->qplib_pd;
+	mr->qplib_mr.flags = __from_ib_access_flags(mr_access_flags);
+	mr->qplib_mr.type = CMDQ_ALLOCATE_MRW_MRW_FLAGS_MR;
+
+	umem = ib_umem_get(ib_pd->uobject->context, start, length,
+			   mr_access_flags, 0);
+	if (IS_ERR(umem)) {
+		dev_err(rdev_to_dev(rdev), "Failed to get umem");
+		rc = -EFAULT;
+		goto free_mr;
+	}
+	mr->ib_umem = umem;
+
+	rc = bnxt_qplib_alloc_mrw(&rdev->qplib_res, &mr->qplib_mr);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to allocate MR");
+		goto release_umem;
+	}
+	/* The fixed portion of the rkey is the same as the lkey */
+	mr->ib_mr.rkey = mr->qplib_mr.rkey;
+
+	mr->qplib_mr.va = virt_addr;
+	umem_pgs = ib_umem_page_count(umem);
+	if (!umem_pgs) {
+		dev_err(rdev_to_dev(rdev), "umem is invalid!");
+		rc = -EINVAL;
+		goto free_mrw;
+	}
+	mr->qplib_mr.total_size = length;
+
+	pbl_tbl = kcalloc(umem_pgs, sizeof(u64 *), GFP_KERNEL);
+	if (!pbl_tbl) {
+		rc = -EINVAL;
+		goto free_mrw;
+	}
+	pbl_tbl_orig = pbl_tbl;
+
+	page_shift = ilog2(umem->page_size);
+	if (umem->hugetlb) {
+		dev_err(rdev_to_dev(rdev), "umem hugetlb not supported!");
+		rc = -EFAULT;
+		goto fail;
+	}
+	if (umem->page_size != PAGE_SIZE) {
+		dev_err(rdev_to_dev(rdev), "umem page size unsupported!");
+		rc = -EFAULT;
+		goto fail;
+	}
+	/* Map umem buf ptrs to the PBL */
+	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
+		pages = sg_dma_len(sg) >> page_shift;
+		for (i = 0; i < pages; i++, pbl_tbl++)
+			*pbl_tbl = sg_dma_address(sg) + (i << page_shift);
+	}
+	rc = bnxt_qplib_reg_mr(&rdev->qplib_res, &mr->qplib_mr, pbl_tbl_orig,
+			       umem_pgs, false);
+	if (rc) {
+		dev_err(rdev_to_dev(rdev), "Failed to register user MR");
+		goto fail;
+	}
+
+	kfree(pbl_tbl_orig);
+
+	mr->ib_mr.lkey = mr->qplib_mr.lkey;
+	mr->ib_mr.rkey = mr->qplib_mr.lkey;
+	atomic_inc(&rdev->mr_count);
+
+	return &mr->ib_mr;
+fail:
+	kfree(pbl_tbl_orig);
+free_mrw:
+	bnxt_qplib_free_mrw(&rdev->qplib_res, &mr->qplib_mr);
+release_umem:
+	ib_umem_release(umem);
+free_mr:
+	kfree(mr);
+	return ERR_PTR(rc);
+}
+
 struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 					   struct ib_udata *udata)
 {
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
index 14c9e02..ba9a4c9 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
@@ -70,6 +70,34 @@ struct bnxt_re_cq {
 	struct ib_umem		*umem;
 };
 
+struct bnxt_re_mr {
+	struct bnxt_re_dev	*rdev;
+	struct ib_mr		ib_mr;
+	struct ib_umem		*ib_umem;
+	struct bnxt_qplib_mrw	qplib_mr;
+	u32			npages;
+	u64			*pages;
+	struct bnxt_qplib_frpl	qplib_frpl;
+};
+
+struct bnxt_re_frpl {
+	struct bnxt_re_dev		*rdev;
+	struct bnxt_qplib_frpl		qplib_frpl;
+	u64				*page_list;
+};
+
+struct bnxt_re_fmr {
+	struct bnxt_re_dev	*rdev;
+	struct ib_fmr		ib_fmr;
+	struct bnxt_qplib_mrw	qplib_fmr;
+};
+
+struct bnxt_re_mw {
+	struct bnxt_re_dev	*rdev;
+	struct ib_mw		ib_mw;
+	struct bnxt_qplib_mrw	qplib_mw;
+};
+
 struct bnxt_re_ucontext {
 	struct bnxt_re_dev	*rdev;
 	struct ib_ucontext	ib_uctx;
@@ -119,6 +147,22 @@ struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 				struct ib_udata *udata);
 int bnxt_re_destroy_cq(struct ib_cq *cq);
 int bnxt_re_req_notify_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags);
+struct ib_mr *bnxt_re_get_dma_mr(struct ib_pd *pd, int mr_access_flags);
+
+int bnxt_re_map_mr_sg(struct ib_mr *ib_mr, struct scatterlist *sg, int sg_nents,
+		      unsigned int *sg_offset);
+struct ib_mr *bnxt_re_alloc_mr(struct ib_pd *ib_pd, enum ib_mr_type mr_type,
+			       u32 max_num_sg);
+int bnxt_re_dereg_mr(struct ib_mr *mr);
+struct ib_fmr *bnxt_re_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+				 struct ib_fmr_attr *fmr_attr);
+int bnxt_re_map_phys_fmr(struct ib_fmr *fmr, u64 *page_list, int list_len,
+			 u64 iova);
+int bnxt_re_unmap_fmr(struct list_head *fmr_list);
+int bnxt_re_dealloc_fmr(struct ib_fmr *fmr);
+struct ib_mr *bnxt_re_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+				  u64 virt_addr, int mr_access_flags,
+				  struct ib_udata *udata);
 struct ib_ucontext *bnxt_re_alloc_ucontext(struct ib_device *ibdev,
 					   struct ib_udata *udata);
 int bnxt_re_dealloc_ucontext(struct ib_ucontext *context);
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index f270b4c..8ef465c 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -459,6 +459,17 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->create_cq		= bnxt_re_create_cq;
 	ibdev->destroy_cq		= bnxt_re_destroy_cq;
 	ibdev->req_notify_cq		= bnxt_re_req_notify_cq;
+
+	ibdev->get_dma_mr		= bnxt_re_get_dma_mr;
+	ibdev->dereg_mr			= bnxt_re_dereg_mr;
+	ibdev->alloc_mr			= bnxt_re_alloc_mr;
+	ibdev->map_mr_sg		= bnxt_re_map_mr_sg;
+	ibdev->alloc_fmr		= bnxt_re_alloc_fmr;
+	ibdev->map_phys_fmr		= bnxt_re_map_phys_fmr;
+	ibdev->unmap_fmr		= bnxt_re_unmap_fmr;
+	ibdev->dealloc_fmr		= bnxt_re_dealloc_fmr;
+
+	ibdev->reg_user_mr		= bnxt_re_reg_user_mr;
 	ibdev->alloc_ucontext		= bnxt_re_alloc_ucontext;
 	ibdev->dealloc_ucontext		= bnxt_re_dealloc_ucontext;
 	ibdev->mmap			= bnxt_re_mmap;
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 14/21] bnxt_re: Support post_send verb
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

Enables the ib_post_send fastpath verb for posting Send work
requests on QPs.

v2: Fixed some sparse warnings
v3: Fixes endianness related warnings reported by sparse. Changes
    some of the macros to inline functions.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c    | 268 ++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h    |   8 +
 drivers/infiniband/hw/bnxtre/bnxt_re.h          |   4 +
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c | 544 +++++++++++++++++++++++-
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h |   2 +
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c     |   1 +
 6 files changed, 824 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
index 8fc5928..d1ccf66 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
@@ -1078,6 +1078,274 @@ int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res,
 	return 0;
 }
 
+void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
+				struct bnxt_qplib_sge *sge)
+{
+	struct bnxt_qplib_q *sq = &qp->sq;
+	u32 sw_prod;
+
+	memset(sge, 0, sizeof(*sge));
+
+	if (qp->sq_hdr_buf) {
+		sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+		sge->addr = (dma_addr_t)(qp->sq_hdr_buf_map +
+					 sw_prod * qp->sq_hdr_buf_size);
+		sge->lkey = 0xFFFFFFFF;
+		sge->size = qp->sq_hdr_buf_size;
+		return qp->sq_hdr_buf + sw_prod * sge->size;
+	}
+	return NULL;
+}
+
+void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_q *sq = &qp->sq;
+	struct dbr_dbr db_msg = { 0 };
+	u32 sw_prod;
+
+	sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+
+	db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
+				   DBR_DBR_INDEX_MASK);
+	db_msg.type_xid =
+		cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+			    DBR_DBR_TYPE_SQ);
+	/* Flush all the WQE writes to HW */
+	wmb();
+	__iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
+			 struct bnxt_qplib_swqe *wqe)
+{
+	struct bnxt_qplib_q *sq = &qp->sq;
+	struct bnxt_qplib_swq *swq;
+	struct sq_send *hw_sq_send_hdr, **hw_sq_send_ptr;
+	struct sq_sge *hw_sge;
+	u32 sw_prod;
+	u8 wqe_size16;
+	int i, rc = 0, data_len = 0, pkt_num = 0;
+	__le32 temp32;
+
+	if (qp->state != CMDQ_MODIFY_QP_NEW_STATE_RTS) {
+		rc = -EINVAL;
+		goto done;
+	}
+	if (HWQ_CMP((sq->hwq.prod + 1), &sq->hwq) ==
+	    HWQ_CMP(sq->hwq.cons, &sq->hwq)) {
+		rc = -ENOMEM;
+		goto done;
+	}
+	sw_prod = HWQ_CMP(sq->hwq.prod, &sq->hwq);
+	swq = &sq->swq[sw_prod];
+	swq->wr_id = wqe->wr_id;
+	swq->type = wqe->type;
+	swq->flags = wqe->flags;
+	if (qp->sig_type)
+		swq->flags |= SQ_SEND_FLAGS_SIGNAL_COMP;
+	swq->start_psn = sq->psn & BTH_PSN_MASK;
+
+	hw_sq_send_ptr = (struct sq_send **)sq->hwq.pbl_ptr;
+	hw_sq_send_hdr = &hw_sq_send_ptr[get_sqe_pg(sw_prod)]
+					[get_sqe_idx(sw_prod)];
+
+	memset(hw_sq_send_hdr, 0, BNXT_QPLIB_MAX_SQE_ENTRY_SIZE);
+
+	if (wqe->flags & BNXT_QPLIB_SWQE_FLAGS_INLINE) {
+		/* Copy the inline data */
+		if (wqe->inline_len > BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
+			dev_warn(&sq->hwq.pdev->dev,
+				 "QPLIB: Inline data length > 96 detected");
+			data_len = BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH;
+		} else {
+			data_len = wqe->inline_len;
+		}
+		memcpy(hw_sq_send_hdr->data, wqe->inline_data, data_len);
+		wqe_size16 = (data_len + 15) >> 4;
+	} else {
+		for (i = 0, hw_sge = (struct sq_sge *)hw_sq_send_hdr->data;
+		     i < wqe->num_sge; i++, hw_sge++) {
+			hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
+			hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
+			hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+			data_len += wqe->sg_list[i].size;
+		}
+		/* Each SGE entry = 1 WQE size16 */
+		wqe_size16 = wqe->num_sge;
+	}
+
+	/* Specifics */
+	switch (wqe->type) {
+	case BNXT_QPLIB_SWQE_TYPE_SEND:
+		if (qp->type == CMDQ_CREATE_QP1_TYPE_GSI) {
+			/* Assemble info for Raw Ethertype QPs */
+			struct sq_send_raweth_qp1 *sqe =
+				(struct sq_send_raweth_qp1 *)hw_sq_send_hdr;
+
+			sqe->wqe_type = wqe->type;
+			sqe->flags = wqe->flags;
+			sqe->wqe_size = wqe_size16 +
+				((offsetof(typeof(*sqe), data) + 15) >> 4);
+			sqe->cfa_action = cpu_to_le16(wqe->rawqp1.cfa_action);
+			sqe->lflags = cpu_to_le16(wqe->rawqp1.lflags);
+			sqe->length = cpu_to_le32(data_len);
+			sqe->cfa_meta = cpu_to_le32((wqe->rawqp1.cfa_meta &
+				SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_MASK) <<
+				SQ_SEND_RAWETH_QP1_CFA_META_VLAN_VID_SFT);
+
+			break;
+		}
+		/* else, just fall thru */
+	case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM:
+	case BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV:
+	{
+		struct sq_send *sqe = (struct sq_send *)hw_sq_send_hdr;
+
+		sqe->wqe_type = wqe->type;
+		sqe->flags = wqe->flags;
+		sqe->wqe_size = wqe_size16 +
+				((offsetof(typeof(*sqe), data) + 15) >> 4);
+		sqe->inv_key_or_imm_data = cpu_to_le32(
+						wqe->send.inv_key);
+		if (qp->type == CMDQ_CREATE_QP_TYPE_UD) {
+			sqe->q_key = cpu_to_le32(wqe->send.q_key);
+			sqe->dst_qp = cpu_to_le32(
+					wqe->send.dst_qp & SQ_SEND_DST_QP_MASK);
+			sqe->length = cpu_to_le32(data_len);
+			sqe->avid = cpu_to_le32(wqe->send.avid &
+						SQ_SEND_AVID_MASK);
+			sq->psn = (sq->psn + 1) & BTH_PSN_MASK;
+		} else {
+			sqe->length = cpu_to_le32(data_len);
+			sqe->dst_qp = 0;
+			sqe->avid = 0;
+			if (qp->mtu)
+				pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+			if (!pkt_num)
+				pkt_num = 1;
+			sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+		}
+		break;
+	}
+	case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE:
+	case BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM:
+	case BNXT_QPLIB_SWQE_TYPE_RDMA_READ:
+	{
+		struct sq_rdma *sqe = (struct sq_rdma *)hw_sq_send_hdr;
+
+		sqe->wqe_type = wqe->type;
+		sqe->flags = wqe->flags;
+		sqe->wqe_size = wqe_size16 +
+				((offsetof(typeof(*sqe), data) + 15) >> 4);
+		sqe->imm_data = cpu_to_le32(wqe->rdma.inv_key);
+		sqe->length = cpu_to_le32((u32)data_len);
+		sqe->remote_va = cpu_to_le64(wqe->rdma.remote_va);
+		sqe->remote_key = cpu_to_le32(wqe->rdma.r_key);
+		if (qp->mtu)
+			pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+		if (!pkt_num)
+			pkt_num = 1;
+		sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+		break;
+	}
+	case BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP:
+	case BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD:
+	{
+		struct sq_atomic *sqe = (struct sq_atomic *)hw_sq_send_hdr;
+
+		sqe->wqe_type = wqe->type;
+		sqe->flags = wqe->flags;
+		sqe->remote_key = cpu_to_le32(wqe->atomic.r_key);
+		sqe->remote_va = cpu_to_le64(wqe->atomic.remote_va);
+		sqe->swap_data = cpu_to_le64(wqe->atomic.swap_data);
+		sqe->cmp_data = cpu_to_le64(wqe->atomic.cmp_data);
+		if (qp->mtu)
+			pkt_num = (data_len + qp->mtu - 1) / qp->mtu;
+		if (!pkt_num)
+			pkt_num = 1;
+		sq->psn = (sq->psn + pkt_num) & BTH_PSN_MASK;
+		break;
+	}
+	case BNXT_QPLIB_SWQE_TYPE_LOCAL_INV:
+	{
+		struct sq_localinvalidate *sqe =
+				(struct sq_localinvalidate *)hw_sq_send_hdr;
+
+		sqe->wqe_type = wqe->type;
+		sqe->flags = wqe->flags;
+		sqe->inv_l_key = cpu_to_le32(wqe->local_inv.inv_l_key);
+
+		break;
+	}
+	case BNXT_QPLIB_SWQE_TYPE_FAST_REG_MR:
+	{
+		struct sq_fr_pmr *sqe = (struct sq_fr_pmr *)hw_sq_send_hdr;
+
+		sqe->wqe_type = wqe->type;
+		sqe->flags = wqe->flags;
+		sqe->access_cntl = wqe->frmr.access_cntl |
+				   SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE;
+		sqe->zero_based_page_size_log =
+			(wqe->frmr.pg_sz_log & SQ_FR_PMR_PAGE_SIZE_LOG_MASK) <<
+			SQ_FR_PMR_PAGE_SIZE_LOG_SFT |
+			(wqe->frmr.zero_based ? SQ_FR_PMR_ZERO_BASED : 0);
+		sqe->l_key = cpu_to_le32(wqe->frmr.l_key);
+		temp32 = cpu_to_le32(wqe->frmr.length);
+		memcpy(sqe->length, &temp32, sizeof(wqe->frmr.length));
+		sqe->numlevels_pbl_page_size_log =
+			((wqe->frmr.pbl_pg_sz_log <<
+					SQ_FR_PMR_PBL_PAGE_SIZE_LOG_SFT) &
+					SQ_FR_PMR_PBL_PAGE_SIZE_LOG_MASK) |
+			((wqe->frmr.levels << SQ_FR_PMR_NUMLEVELS_SFT) &
+					SQ_FR_PMR_NUMLEVELS_MASK);
+
+		for (i = 0; i < wqe->frmr.page_list_len; i++)
+			wqe->frmr.pbl_ptr[i] = cpu_to_le64(
+						wqe->frmr.page_list[i] |
+						PTU_PTE_VALID);
+		sqe->pblptr = cpu_to_le64(wqe->frmr.pbl_dma_ptr);
+		sqe->va = cpu_to_le64(wqe->frmr.va);
+
+		break;
+	}
+	case BNXT_QPLIB_SWQE_TYPE_BIND_MW:
+	{
+		struct sq_bind *sqe = (struct sq_bind *)hw_sq_send_hdr;
+
+		sqe->wqe_type = wqe->type;
+		sqe->flags = wqe->flags;
+		sqe->access_cntl = wqe->bind.access_cntl;
+		sqe->mw_type_zero_based = wqe->bind.mw_type |
+			(wqe->bind.zero_based ? SQ_BIND_ZERO_BASED : 0);
+		sqe->parent_l_key = cpu_to_le32(wqe->bind.parent_l_key);
+		sqe->l_key = cpu_to_le32(wqe->bind.r_key);
+		sqe->va = cpu_to_le64(wqe->bind.va);
+		temp32 = cpu_to_le32(wqe->bind.length);
+		memcpy(&sqe->length, &temp32, sizeof(wqe->bind.length));
+		break;
+	}
+	default:
+		/* Bad wqe, return error */
+		rc = -EINVAL;
+		goto done;
+	}
+	swq->next_psn = sq->psn & BTH_PSN_MASK;
+	if (swq->psn_search) {
+		swq->psn_search->opcode_start_psn = cpu_to_le32(
+			((swq->start_psn << SQ_PSN_SEARCH_START_PSN_SFT) &
+			 SQ_PSN_SEARCH_START_PSN_MASK) |
+			((wqe->type << SQ_PSN_SEARCH_OPCODE_SFT) &
+			 SQ_PSN_SEARCH_OPCODE_MASK));
+		swq->psn_search->flags_next_psn = cpu_to_le32(
+			((swq->next_psn << SQ_PSN_SEARCH_NEXT_PSN_SFT) &
+			 SQ_PSN_SEARCH_NEXT_PSN_MASK));
+	}
+
+	sq->hwq.prod++;
+done:
+	return rc;
+}
+
 /* CQ */
 
 /* Spinlock must be held */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
index e62c750..0a87920 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
@@ -38,6 +38,9 @@
 
 #ifndef __BNXT_QPLIB_FP_H__
 #define __BNXT_QPLIB_FP_H__
+
+#define BNXT_QPLIB_ETHTYPE_ROCEV1      0x8915
+
 struct bnxt_qplib_sge {
 	u64				addr;
 	u32				lkey;
@@ -415,6 +418,11 @@ int bnxt_qplib_create_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_modify_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
+void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
+				struct bnxt_qplib_sge *sge);
+void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp);
+int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
+			 struct bnxt_qplib_swqe *wqe);
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re.h b/drivers/infiniband/hw/bnxtre/bnxt_re.h
index f53ebd9..6281e23 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re.h
@@ -131,6 +131,10 @@ struct bnxt_re_dev {
 #define to_bnxt_re_dev(ptr, member)	\
 	container_of((ptr), struct bnxt_re_dev, member)
 
+#define BNXT_RE_ROCE_V1_PACKET		0
+#define BNXT_RE_ROCEV2_IPV4_PACKET	2
+#define BNXT_RE_ROCEV2_IPV6_PACKET	3
+
 static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev)
 {
 	if (rdev)
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
index 9df6fb8..ff406e9 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
@@ -60,6 +60,20 @@
 #include "bnxt_re_ib_verbs.h"
 #include <rdma/bnxtre-abi.h>
 
+static int bnxt_re_build_sgl(struct ib_sge *ib_sg_list,
+			     struct bnxt_qplib_sge *sg_list, int num)
+{
+	int i, total = 0;
+
+	for (i = 0; i < num; i++) {
+		sg_list[i].addr = ib_sg_list[i].addr;
+		sg_list[i].lkey = ib_sg_list[i].lkey;
+		sg_list[i].size = ib_sg_list[i].length;
+		total += sg_list[i].size;
+	}
+	return total;
+}
+
 /* Device */
 struct net_device *bnxt_re_get_netdev(struct ib_device *ibdev, u8 port_num)
 {
@@ -698,8 +712,6 @@ static u8 __from_ib_qp_type(enum ib_qp_type type)
 		return CMDQ_CREATE_QP_TYPE_RC;
 	case IB_QPT_UD:
 		return CMDQ_CREATE_QP_TYPE_UD;
-	case IB_QPT_RAW_ETHERTYPE:
-		return CMDQ_CREATE_QP_TYPE_RAW_ETHERTYPE;
 	default:
 		return IB_QPT_MAX;
 	}
@@ -873,7 +885,6 @@ struct ib_qp *bnxt_re_create_qp(struct ib_pd *ib_pd,
 	struct bnxt_re_dev *rdev = pd->rdev;
 	struct bnxt_qplib_dev_attr *dev_attr = &rdev->dev_attr;
 	struct bnxt_re_qp *qp;
-	struct bnxt_re_srq *srq;
 	struct bnxt_re_cq *cq;
 	int rc, entries;
 
@@ -1445,6 +1456,533 @@ int bnxt_re_query_qp(struct ib_qp *ib_qp, struct ib_qp_attr *qp_attr,
 	return 0;
 }
 
+/* Routine for sending QP1 packets for RoCE V1 an V2
+ */
+static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
+				     struct ib_send_wr *wr,
+				     struct bnxt_qplib_swqe *wqe,
+				     int payload_size)
+{
+	struct ib_device *ibdev = &qp->rdev->ibdev;
+	struct bnxt_re_ah *ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah,
+					     ib_ah);
+	struct bnxt_qplib_ah *qplib_ah = &ah->qplib_ah;
+	struct bnxt_qplib_sge sge;
+	union ib_gid sgid;
+	u8 nw_type;
+	u16 ether_type;
+	struct ib_gid_attr sgid_attr;
+	union ib_gid dgid;
+	bool is_eth = false;
+	bool is_vlan = false;
+	bool is_grh = false;
+	bool is_udp = false;
+	u8 ip_version = 0;
+	u16 vlan_id = 0xFFFF;
+	void *buf;
+	int i, rc = 0, size;
+
+	memset(&qp->qp1_hdr, 0, sizeof(qp->qp1_hdr));
+
+	rc = ib_get_cached_gid(ibdev, 1,
+			       qplib_ah->host_sgid_index, &sgid,
+			       &sgid_attr);
+	if (rc) {
+		dev_err(rdev_to_dev(qp->rdev),
+			"Failed to query gid at index %d",
+			qplib_ah->host_sgid_index);
+		return rc;
+	}
+	if (sgid_attr.ndev) {
+		if (is_vlan_dev(sgid_attr.ndev))
+			vlan_id = vlan_dev_vlan_id(sgid_attr.ndev);
+		dev_put(sgid_attr.ndev);
+	}
+	/* Get network header type for this GID */
+	nw_type = ib_gid_to_network_type(sgid_attr.gid_type, &sgid);
+	switch (nw_type) {
+	case RDMA_NETWORK_IPV4:
+		nw_type = BNXT_RE_ROCEV2_IPV4_PACKET;
+		break;
+	case RDMA_NETWORK_IPV6:
+		nw_type = BNXT_RE_ROCEV2_IPV6_PACKET;
+		break;
+	default:
+		nw_type = BNXT_RE_ROCE_V1_PACKET;
+		break;
+	}
+	memcpy(&dgid.raw, &qplib_ah->dgid, 16);
+	is_udp = sgid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+	if (is_udp) {
+		if (ipv6_addr_v4mapped((struct in6_addr *)&sgid)) {
+			ip_version = 4;
+			ether_type = ETH_P_IP;
+		} else {
+			ip_version = 6;
+			ether_type = ETH_P_IPV6;
+		}
+		is_grh = false;
+	} else {
+		ether_type = BNXT_QPLIB_ETHTYPE_ROCEV1;
+		is_grh = true;
+	}
+
+	is_eth = true;
+	is_vlan = (vlan_id && (vlan_id < 0x1000)) ? true : false;
+
+	ib_ud_header_init(payload_size, !is_eth, is_eth, is_vlan, is_grh,
+			  ip_version, is_udp, 0, &qp->qp1_hdr);
+
+	/* ETH */
+	ether_addr_copy(qp->qp1_hdr.eth.dmac_h, ah->qplib_ah.dmac);
+	ether_addr_copy(qp->qp1_hdr.eth.smac_h, qp->qplib_qp.smac);
+
+	/* For vlan, check the sgid for vlan existence */
+
+	if (!is_vlan) {
+		qp->qp1_hdr.eth.type = cpu_to_be16(ether_type);
+	} else {
+		qp->qp1_hdr.vlan.type = cpu_to_be16(ether_type);
+		qp->qp1_hdr.vlan.tag = cpu_to_be16(vlan_id);
+	}
+
+	if (is_grh || (ip_version == 6)) {
+		memcpy(qp->qp1_hdr.grh.source_gid.raw, sgid.raw, sizeof(sgid));
+		memcpy(qp->qp1_hdr.grh.destination_gid.raw, qplib_ah->dgid.data,
+		       sizeof(sgid));
+		qp->qp1_hdr.grh.hop_limit     = qplib_ah->hop_limit;
+	}
+
+	if (ip_version == 4) {
+		qp->qp1_hdr.ip4.tos = 0;
+		qp->qp1_hdr.ip4.id = 0;
+		qp->qp1_hdr.ip4.frag_off = htons(IP_DF);
+		qp->qp1_hdr.ip4.ttl = qplib_ah->hop_limit;
+
+		memcpy(&qp->qp1_hdr.ip4.saddr, sgid.raw + 12, 4);
+		memcpy(&qp->qp1_hdr.ip4.daddr, qplib_ah->dgid.data + 12, 4);
+		qp->qp1_hdr.ip4.check = ib_ud_ip4_csum(&qp->qp1_hdr);
+	}
+
+	if (is_udp) {
+		qp->qp1_hdr.udp.dport = htons(ROCE_V2_UDP_DPORT);
+		qp->qp1_hdr.udp.sport = htons(0x8CD1);
+		qp->qp1_hdr.udp.csum = 0;
+	}
+
+	/* BTH */
+	if (wr->opcode == IB_WR_SEND_WITH_IMM) {
+		qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY_WITH_IMMEDIATE;
+		qp->qp1_hdr.immediate_present = 1;
+	} else {
+		qp->qp1_hdr.bth.opcode = IB_OPCODE_UD_SEND_ONLY;
+	}
+	if (wr->send_flags & IB_SEND_SOLICITED)
+		qp->qp1_hdr.bth.solicited_event = 1;
+	/* pad_count */
+	qp->qp1_hdr.bth.pad_count = (4 - payload_size) & 3;
+
+	/* P_key for QP1 is for all members */
+	qp->qp1_hdr.bth.pkey = cpu_to_be16(0xFFFF);
+	qp->qp1_hdr.bth.destination_qpn = IB_QP1;
+	qp->qp1_hdr.bth.ack_req = 0;
+	qp->send_psn++;
+	qp->send_psn &= BTH_PSN_MASK;
+	qp->qp1_hdr.bth.psn = cpu_to_be32(qp->send_psn);
+	/* DETH */
+	/* Use the priviledged Q_Key for QP1 */
+	qp->qp1_hdr.deth.qkey = cpu_to_be32(IB_QP1_QKEY);
+	qp->qp1_hdr.deth.source_qpn = IB_QP1;
+
+	/* Pack the QP1 to the transmit buffer */
+	buf = bnxt_qplib_get_qp1_sq_buf(&qp->qplib_qp, &sge);
+	if (buf) {
+		size = ib_ud_header_pack(&qp->qp1_hdr, buf);
+		for (i = wqe->num_sge; i; i--) {
+			wqe->sg_list[i].addr = wqe->sg_list[i - 1].addr;
+			wqe->sg_list[i].lkey = wqe->sg_list[i - 1].lkey;
+			wqe->sg_list[i].size = wqe->sg_list[i - 1].size;
+		}
+
+		/*
+		 * Max Header buf size for IPV6 RoCE V2 is 86,
+		 * which is same as the QP1 SQ header buffer.
+		 * Header buf size for IPV4 RoCE V2 can be 66.
+		 * ETH(14) + VLAN(4)+ IP(20) + UDP (8) + BTH(20).
+		 * Subtract 20 bytes from QP1 SQ header buf size
+		 */
+		if (is_udp && ip_version == 4)
+			sge.size -= 20;
+		/*
+		 * Max Header buf size for RoCE V1 is 78.
+		 * ETH(14) + VLAN(4) + GRH(40) + BTH(20).
+		 * Subtract 8 bytes from QP1 SQ header buf size
+		 */
+		if (!is_udp)
+			sge.size -= 8;
+
+		/* Subtract 4 bytes for non vlan packets */
+		if (!is_vlan)
+			sge.size -= 4;
+
+		wqe->sg_list[0].addr = sge.addr;
+		wqe->sg_list[0].lkey = sge.lkey;
+		wqe->sg_list[0].size = sge.size;
+		wqe->num_sge++;
+
+	} else {
+		dev_err(rdev_to_dev(qp->rdev), "QP1 buffer is empty!");
+		rc = -ENOMEM;
+	}
+	return rc;
+}
+
+static int is_ud_qp(struct bnxt_re_qp *qp)
+{
+	return qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD;
+}
+
+static int bnxt_re_build_send_wqe(struct bnxt_re_qp *qp,
+				  struct ib_send_wr *wr,
+				  struct bnxt_qplib_swqe *wqe)
+{
+	struct bnxt_re_ah *ah = NULL;
+
+	if (is_ud_qp(qp)) {
+		ah = container_of(ud_wr(wr)->ah, struct bnxt_re_ah, ib_ah);
+		wqe->send.q_key = ud_wr(wr)->remote_qkey;
+		wqe->send.dst_qp = ud_wr(wr)->remote_qpn;
+		wqe->send.avid = ah->qplib_ah.id;
+	}
+	switch (wr->opcode) {
+	case IB_WR_SEND:
+		wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND;
+		break;
+	case IB_WR_SEND_WITH_IMM:
+		wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_IMM;
+		wqe->send.imm_data = wr->ex.imm_data;
+		break;
+	case IB_WR_SEND_WITH_INV:
+		wqe->type = BNXT_QPLIB_SWQE_TYPE_SEND_WITH_INV;
+		wqe->send.inv_key = wr->ex.invalidate_rkey;
+		break;
+	default:
+		return -EINVAL;
+	}
+	if (wr->send_flags & IB_SEND_SIGNALED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+	if (wr->send_flags & IB_SEND_FENCE)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	if (wr->send_flags & IB_SEND_SOLICITED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+	if (wr->send_flags & IB_SEND_INLINE)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE;
+
+	return 0;
+}
+
+static int bnxt_re_build_rdma_wqe(struct ib_send_wr *wr,
+				  struct bnxt_qplib_swqe *wqe)
+{
+	switch (wr->opcode) {
+	case IB_WR_RDMA_WRITE:
+		wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE;
+		break;
+	case IB_WR_RDMA_WRITE_WITH_IMM:
+		wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_WRITE_WITH_IMM;
+		wqe->rdma.imm_data = wr->ex.imm_data;
+		break;
+	case IB_WR_RDMA_READ:
+		wqe->type = BNXT_QPLIB_SWQE_TYPE_RDMA_READ;
+		wqe->rdma.inv_key = wr->ex.invalidate_rkey;
+		break;
+	default:
+		return -EINVAL;
+	}
+	wqe->rdma.remote_va = rdma_wr(wr)->remote_addr;
+	wqe->rdma.r_key = rdma_wr(wr)->rkey;
+	if (wr->send_flags & IB_SEND_SIGNALED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+	if (wr->send_flags & IB_SEND_FENCE)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	if (wr->send_flags & IB_SEND_SOLICITED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+	if (wr->send_flags & IB_SEND_INLINE)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_INLINE;
+
+	return 0;
+}
+
+static int bnxt_re_build_atomic_wqe(struct ib_send_wr *wr,
+				    struct bnxt_qplib_swqe *wqe)
+{
+	switch (wr->opcode) {
+	case IB_WR_ATOMIC_CMP_AND_SWP:
+		wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_CMP_AND_SWP;
+		wqe->atomic.swap_data = atomic_wr(wr)->swap;
+		break;
+	case IB_WR_ATOMIC_FETCH_AND_ADD:
+		wqe->type = BNXT_QPLIB_SWQE_TYPE_ATOMIC_FETCH_AND_ADD;
+		wqe->atomic.cmp_data = atomic_wr(wr)->compare_add;
+		break;
+	default:
+		return -EINVAL;
+	}
+	wqe->atomic.remote_va = atomic_wr(wr)->remote_addr;
+	wqe->atomic.r_key = atomic_wr(wr)->rkey;
+	if (wr->send_flags & IB_SEND_SIGNALED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+	if (wr->send_flags & IB_SEND_FENCE)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	if (wr->send_flags & IB_SEND_SOLICITED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+	return 0;
+}
+
+static int bnxt_re_build_inv_wqe(struct ib_send_wr *wr,
+				 struct bnxt_qplib_swqe *wqe)
+{
+	wqe->type = BNXT_QPLIB_SWQE_TYPE_LOCAL_INV;
+	wqe->local_inv.inv_l_key = wr->ex.invalidate_rkey;
+
+	if (wr->send_flags & IB_SEND_SIGNALED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+	if (wr->send_flags & IB_SEND_FENCE)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	if (wr->send_flags & IB_SEND_SOLICITED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SOLICIT_EVENT;
+
+	return 0;
+}
+
+static int bnxt_re_build_reg_wqe(struct ib_reg_wr *wr,
+				 struct bnxt_qplib_swqe *wqe)
+{
+	struct bnxt_re_mr *mr = container_of(wr->mr, struct bnxt_re_mr, ib_mr);
+	struct bnxt_qplib_frpl *qplib_frpl = &mr->qplib_frpl;
+	int access = wr->access;
+
+	wqe->frmr.pbl_ptr = (__le64 *)qplib_frpl->hwq.pbl_ptr[0];
+	wqe->frmr.pbl_dma_ptr = qplib_frpl->hwq.pbl_dma_ptr[0];
+	wqe->frmr.page_list = mr->pages;
+	wqe->frmr.page_list_len = mr->npages;
+	wqe->frmr.levels = qplib_frpl->hwq.level + 1;
+	wqe->type = BNXT_QPLIB_SWQE_TYPE_REG_MR;
+
+	if (wr->wr.send_flags & IB_SEND_FENCE)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_UC_FENCE;
+	if (wr->wr.send_flags & IB_SEND_SIGNALED)
+		wqe->flags |= BNXT_QPLIB_SWQE_FLAGS_SIGNAL_COMP;
+
+	if (access & IB_ACCESS_LOCAL_WRITE)
+		wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_LOCAL_WRITE;
+	if (access & IB_ACCESS_REMOTE_READ)
+		wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_READ;
+	if (access & IB_ACCESS_REMOTE_WRITE)
+		wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_WRITE;
+	if (access & IB_ACCESS_REMOTE_ATOMIC)
+		wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_REMOTE_ATOMIC;
+	if (access & IB_ACCESS_MW_BIND)
+		wqe->frmr.access_cntl |= SQ_FR_PMR_ACCESS_CNTL_WINDOW_BIND;
+
+	wqe->frmr.l_key = wr->key;
+	wqe->frmr.length = wr->mr->length;
+	wqe->frmr.pbl_pg_sz_log = (wr->mr->page_size >> PAGE_SHIFT_4K) - 1;
+	wqe->frmr.va = wr->mr->iova;
+	return 0;
+}
+
+static int bnxt_re_copy_inline_data(struct bnxt_re_dev *rdev,
+				    struct ib_send_wr *wr,
+				    struct bnxt_qplib_swqe *wqe)
+{
+	/*  Copy the inline data to the data  field */
+	u8 *in_data;
+	u32 i, sge_len;
+	void *sge_addr;
+
+	in_data = wqe->inline_data;
+	for (i = 0; i < wr->num_sge; i++) {
+		sge_addr = (void *)(unsigned long)
+				wr->sg_list[i].addr;
+		sge_len = wr->sg_list[i].length;
+
+		if ((sge_len + wqe->inline_len) >
+		    BNXT_QPLIB_SWQE_MAX_INLINE_LENGTH) {
+			dev_err(rdev_to_dev(rdev),
+				"Inline data size requested > supported value");
+			return -EINVAL;
+		}
+		sge_len = wr->sg_list[i].length;
+
+		memcpy(in_data, sge_addr, sge_len);
+		in_data += wr->sg_list[i].length;
+		wqe->inline_len += wr->sg_list[i].length;
+	}
+	return wqe->inline_len;
+}
+
+static int bnxt_re_copy_wr_payload(struct bnxt_re_dev *rdev,
+				   struct ib_send_wr *wr,
+				   struct bnxt_qplib_swqe *wqe)
+{
+	int payload_sz = 0;
+
+	if (wr->send_flags & IB_SEND_INLINE)
+		payload_sz = bnxt_re_copy_inline_data(rdev, wr, wqe);
+	else
+		payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe->sg_list,
+					       wqe->num_sge);
+
+	return payload_sz;
+}
+
+static int bnxt_re_post_send_shadow_qp(struct bnxt_re_dev *rdev,
+				       struct bnxt_re_qp *qp,
+				struct ib_send_wr *wr)
+{
+	struct bnxt_qplib_swqe wqe;
+	int rc = 0, payload_sz = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->sq_lock, flags);
+	memset(&wqe, 0, sizeof(wqe));
+	while (wr) {
+		/* House keeping */
+		memset(&wqe, 0, sizeof(wqe));
+
+		/* Common */
+		wqe.num_sge = wr->num_sge;
+		if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
+			dev_err(rdev_to_dev(rdev),
+				"Limit exceeded for Send SGEs");
+			rc = -EINVAL;
+			goto bad;
+		}
+
+		payload_sz = bnxt_re_copy_wr_payload(qp->rdev, wr, &wqe);
+		if (payload_sz < 0) {
+			rc = -EINVAL;
+			goto bad;
+		}
+		wqe.wr_id = wr->wr_id;
+
+		wqe.type = BNXT_QPLIB_SWQE_TYPE_SEND;
+
+		rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
+		if (!rc)
+			rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+bad:
+		if (rc) {
+			dev_err(rdev_to_dev(rdev),
+				"Post send failed opcode = %#x rc = %d",
+				wr->opcode, rc);
+			break;
+		}
+		wr = wr->next;
+	}
+	bnxt_qplib_post_send_db(&qp->qplib_qp);
+	spin_unlock_irqrestore(&qp->sq_lock, flags);
+	return rc;
+}
+
+int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
+		      struct ib_send_wr **bad_wr)
+{
+	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+	struct bnxt_qplib_swqe wqe;
+	int rc = 0, payload_sz = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->sq_lock, flags);
+	while (wr) {
+		/* House keeping */
+		memset(&wqe, 0, sizeof(wqe));
+
+		/* Common */
+		wqe.num_sge = wr->num_sge;
+		if (wr->num_sge > qp->qplib_qp.sq.max_sge) {
+			dev_err(rdev_to_dev(qp->rdev),
+				"Limit exceeded for Send SGEs");
+			rc = -EINVAL;
+			goto bad;
+		}
+
+		payload_sz = bnxt_re_copy_wr_payload(qp->rdev, wr, &wqe);
+		if (payload_sz < 0) {
+			rc = -EINVAL;
+			goto bad;
+		}
+		wqe.wr_id = wr->wr_id;
+
+		switch (wr->opcode) {
+		case IB_WR_SEND:
+		case IB_WR_SEND_WITH_IMM:
+			if (ib_qp->qp_type == IB_QPT_GSI) {
+				rc = bnxt_re_build_qp1_send_v2(qp, wr, &wqe,
+							       payload_sz);
+				if (rc)
+					goto bad;
+				wqe.rawqp1.lflags |=
+					SQ_SEND_RAWETH_QP1_LFLAGS_ROCE_CRC;
+			}
+			switch (wr->send_flags) {
+			case IB_SEND_IP_CSUM:
+				wqe.rawqp1.lflags |=
+					SQ_SEND_RAWETH_QP1_LFLAGS_IP_CHKSUM;
+				break;
+			default:
+				break;
+			}
+			/* Fall thru to build the wqe */
+		case IB_WR_SEND_WITH_INV:
+			rc = bnxt_re_build_send_wqe(qp, wr, &wqe);
+			break;
+		case IB_WR_RDMA_WRITE:
+		case IB_WR_RDMA_WRITE_WITH_IMM:
+		case IB_WR_RDMA_READ:
+			rc = bnxt_re_build_rdma_wqe(wr, &wqe);
+			break;
+		case IB_WR_ATOMIC_CMP_AND_SWP:
+		case IB_WR_ATOMIC_FETCH_AND_ADD:
+			rc = bnxt_re_build_atomic_wqe(wr, &wqe);
+			break;
+		case IB_WR_RDMA_READ_WITH_INV:
+			dev_err(rdev_to_dev(qp->rdev),
+				"RDMA Read with Invalidate is not supported");
+			rc = -EINVAL;
+			goto bad;
+		case IB_WR_LOCAL_INV:
+			rc = bnxt_re_build_inv_wqe(wr, &wqe);
+			break;
+		case IB_WR_REG_MR:
+			rc = bnxt_re_build_reg_wqe(reg_wr(wr), &wqe);
+			break;
+		default:
+			/* Unsupported WRs */
+			dev_err(rdev_to_dev(qp->rdev),
+				"WR (%#x) is not supported", wr->opcode);
+			rc = -EINVAL;
+			goto bad;
+		}
+		if (!rc)
+			rc = bnxt_qplib_post_send(&qp->qplib_qp, &wqe);
+bad:
+		if (rc) {
+			dev_err(rdev_to_dev(qp->rdev),
+				"post_send failed op:%#x qps = %#x rc = %d\n",
+				wr->opcode, qp->qplib_qp.state, rc);
+			*bad_wr = wr;
+			break;
+		}
+		wr = wr->next;
+	}
+	bnxt_qplib_post_send_db(&qp->qplib_qp);
+	spin_unlock_irqrestore(&qp->sq_lock, flags);
+
+	return rc;
+}
+
 /* Completion Queues */
 int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
 {
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
index 75ee88a..becdcdc 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
@@ -162,6 +162,8 @@ int bnxt_re_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 		     int qp_attr_mask, struct ib_qp_init_attr *qp_init_attr);
 int bnxt_re_destroy_qp(struct ib_qp *qp);
+int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
+		      struct ib_send_wr **bad_send_wr);
 struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 				const struct ib_cq_init_attr *attr,
 				struct ib_ucontext *context,
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index 2396730..03a34c0 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -462,6 +462,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->query_qp			= bnxt_re_query_qp;
 	ibdev->destroy_qp		= bnxt_re_destroy_qp;
 
+	ibdev->post_send		= bnxt_re_post_send;
 	ibdev->create_cq		= bnxt_re_create_cq;
 	ibdev->destroy_cq		= bnxt_re_destroy_cq;
 	ibdev->req_notify_cq		= bnxt_re_req_notify_cq;
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 15/21] bnxt_re: Support post_recv
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

Enables the fastpath verb ib_post_recv.

v3: Fixes sparse warnings

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c    | 100 +++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h    |   8 ++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c | 123 ++++++++++++++++++++++++
 drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h |   2 +
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c     |   2 +
 5 files changed, 235 insertions(+)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
index d1ccf66..eeafb2a 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.c
@@ -1097,6 +1097,37 @@ void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
 	return NULL;
 }
 
+u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_q *rq = &qp->rq;
+
+	return HWQ_CMP(rq->hwq.prod, &rq->hwq);
+}
+
+dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp, u32 index)
+{
+	return (qp->rq_hdr_buf_map + index * qp->rq_hdr_buf_size);
+}
+
+void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
+				struct bnxt_qplib_sge *sge)
+{
+	struct bnxt_qplib_q *rq = &qp->rq;
+	u32 sw_prod;
+
+	memset(sge, 0, sizeof(*sge));
+
+	if (qp->rq_hdr_buf) {
+		sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+		sge->addr = (dma_addr_t)(qp->rq_hdr_buf_map +
+					 sw_prod * qp->rq_hdr_buf_size);
+		sge->lkey = 0xFFFFFFFF;
+		sge->size = qp->rq_hdr_buf_size;
+		return qp->rq_hdr_buf + sw_prod * sge->size;
+	}
+	return NULL;
+}
+
 void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp)
 {
 	struct bnxt_qplib_q *sq = &qp->sq;
@@ -1346,6 +1377,75 @@ int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 	return rc;
 }
 
+void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp)
+{
+	struct bnxt_qplib_q *rq = &qp->rq;
+	struct dbr_dbr db_msg = { 0 };
+	u32 sw_prod;
+
+	sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+	db_msg.index = cpu_to_le32((sw_prod << DBR_DBR_INDEX_SFT) &
+				   DBR_DBR_INDEX_MASK);
+	db_msg.type_xid =
+		cpu_to_le32(((qp->id << DBR_DBR_XID_SFT) & DBR_DBR_XID_MASK) |
+			    DBR_DBR_TYPE_RQ);
+
+	/* Flush the writes to HW Rx WQE before the ringing Rx DB */
+	wmb();
+	__iowrite64_copy(qp->dpi->dbr, &db_msg, sizeof(db_msg) / sizeof(u64));
+}
+
+int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
+			 struct bnxt_qplib_swqe *wqe)
+{
+	struct bnxt_qplib_q *rq = &qp->rq;
+	struct rq_wqe *rqe, **rqe_ptr;
+	struct sq_sge *hw_sge;
+	u32 sw_prod;
+	int i, rc = 0;
+
+	if (qp->state == CMDQ_MODIFY_QP_NEW_STATE_ERR) {
+		dev_err(&rq->hwq.pdev->dev,
+			"QPLIB: FP: QP (0x%x) is in the 0x%x state",
+			qp->id, qp->state);
+		rc = -EINVAL;
+		goto done;
+	}
+	if (HWQ_CMP((rq->hwq.prod + 1), &rq->hwq) ==
+	    HWQ_CMP(rq->hwq.cons, &rq->hwq)) {
+		dev_err(&rq->hwq.pdev->dev,
+			"QPLIB: FP: QP (0x%x) RQ is full!", qp->id);
+		rc = -EINVAL;
+		goto done;
+	}
+	sw_prod = HWQ_CMP(rq->hwq.prod, &rq->hwq);
+	rq->swq[sw_prod].wr_id = wqe->wr_id;
+
+	rqe_ptr = (struct rq_wqe **)rq->hwq.pbl_ptr;
+	rqe = &rqe_ptr[RQE_PG(sw_prod)][RQE_IDX(sw_prod)];
+
+	memset(rqe, 0, BNXT_QPLIB_MAX_RQE_ENTRY_SIZE);
+
+	/* Calculate wqe_size16 and data_len */
+	for (i = 0, hw_sge = (struct sq_sge *)rqe->data;
+	     i < wqe->num_sge; i++, hw_sge++) {
+		hw_sge->va_or_pa = cpu_to_le64(wqe->sg_list[i].addr);
+		hw_sge->l_key = cpu_to_le32(wqe->sg_list[i].lkey);
+		hw_sge->size = cpu_to_le32(wqe->sg_list[i].size);
+	}
+	rqe->wqe_type = wqe->type;
+	rqe->flags = wqe->flags;
+	rqe->wqe_size = wqe->num_sge +
+			((offsetof(typeof(*rqe), data) + 15) >> 4);
+
+	/* Supply the rqe->wr_id index to the wr_id_tbl for now */
+	rqe->wr_id[0] = cpu_to_le32(sw_prod);
+
+	rq->hwq.prod++;
+done:
+	return rc;
+}
+
 /* CQ */
 
 /* Spinlock must be held */
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
index 0a87920..e160050 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_fp.h
@@ -420,9 +420,17 @@ int bnxt_qplib_query_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 int bnxt_qplib_destroy_qp(struct bnxt_qplib_res *res, struct bnxt_qplib_qp *qp);
 void *bnxt_qplib_get_qp1_sq_buf(struct bnxt_qplib_qp *qp,
 				struct bnxt_qplib_sge *sge);
+void *bnxt_qplib_get_qp1_rq_buf(struct bnxt_qplib_qp *qp,
+				struct bnxt_qplib_sge *sge);
+u32 bnxt_qplib_get_rq_prod_index(struct bnxt_qplib_qp *qp);
+dma_addr_t bnxt_qplib_get_qp_buf_from_index(struct bnxt_qplib_qp *qp,
+					    u32 index);
 void bnxt_qplib_post_send_db(struct bnxt_qplib_qp *qp);
 int bnxt_qplib_post_send(struct bnxt_qplib_qp *qp,
 			 struct bnxt_qplib_swqe *wqe);
+void bnxt_qplib_post_recv_db(struct bnxt_qplib_qp *qp);
+int bnxt_qplib_post_recv(struct bnxt_qplib_qp *qp,
+			 struct bnxt_qplib_swqe *wqe);
 int bnxt_qplib_create_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 int bnxt_qplib_destroy_cq(struct bnxt_qplib_res *res, struct bnxt_qplib_cq *cq);
 
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
index ff406e9..20a66ec 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.c
@@ -1637,6 +1637,51 @@ static int bnxt_re_build_qp1_send_v2(struct bnxt_re_qp *qp,
 	return rc;
 }
 
+/* For the MAD layer, it only provides the recv SGE the size of
+ * ib_grh + MAD datagram.  No Ethernet headers, Ethertype, BTH, DETH,
+ * nor RoCE iCRC.  The Cu+ solution must provide buffer for the entire
+ * receive packet (334 bytes) with no VLAN and then copy the GRH
+ * and the MAD datagram out to the provided SGE.
+ */
+static int bnxt_re_build_qp1_shadow_qp_recv(struct bnxt_re_qp *qp,
+					    struct ib_recv_wr *wr,
+					    struct bnxt_qplib_swqe *wqe,
+					    int payload_size)
+{
+	struct bnxt_qplib_sge ref, sge;
+	u32 rq_prod_index;
+	struct bnxt_re_sqp_entries *sqp_entry;
+
+	rq_prod_index = bnxt_qplib_get_rq_prod_index(&qp->qplib_qp);
+
+	if (bnxt_qplib_get_qp1_rq_buf(&qp->qplib_qp, &sge)) {
+		/* Create 1 SGE to receive the entire
+		 * ethernet packet
+		 */
+		/* Save the reference from ULP */
+		ref.addr = wqe->sg_list[0].addr;
+		ref.lkey = wqe->sg_list[0].lkey;
+		ref.size = wqe->sg_list[0].size;
+
+		sqp_entry = &qp->rdev->sqp_tbl[rq_prod_index];
+
+		/* SGE 1 */
+		wqe->sg_list[0].addr = sge.addr;
+		wqe->sg_list[0].lkey = sge.lkey;
+		wqe->sg_list[0].size = BNXT_QPLIB_MAX_QP1_RQ_HDR_SIZE_V2;
+		sge.size -= wqe->sg_list[0].size;
+
+		sqp_entry->sge.addr = ref.addr;
+		sqp_entry->sge.lkey = ref.lkey;
+		sqp_entry->sge.size = ref.size;
+		/* Store the wrid for reporting completion */
+		sqp_entry->wrid = wqe->wr_id;
+		/* change the wqe->wrid to table index */
+		wqe->wr_id = rq_prod_index;
+	}
+	return 0;
+}
+
 static int is_ud_qp(struct bnxt_re_qp *qp)
 {
 	return qp->qplib_qp.type == CMDQ_CREATE_QP_TYPE_UD;
@@ -1983,6 +2028,84 @@ int bnxt_re_post_send(struct ib_qp *ib_qp, struct ib_send_wr *wr,
 	return rc;
 }
 
+static int bnxt_re_post_recv_shadow_qp(struct bnxt_re_dev *rdev,
+				       struct bnxt_re_qp *qp,
+				       struct ib_recv_wr *wr)
+{
+	struct bnxt_qplib_swqe wqe;
+	int rc = 0, payload_sz = 0;
+
+	memset(&wqe, 0, sizeof(wqe));
+	while (wr) {
+		/* House keeping */
+		memset(&wqe, 0, sizeof(wqe));
+
+		/* Common */
+		wqe.num_sge = wr->num_sge;
+		if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
+			dev_err(rdev_to_dev(rdev),
+				"Limit exceeded for Receive SGEs");
+			rc = -EINVAL;
+			goto bad;
+		}
+		payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
+					       wr->num_sge);
+		wqe.wr_id = wr->wr_id;
+		wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+		if (!rc)
+			rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe);
+bad:
+		if (rc)
+			break;
+
+		wr = wr->next;
+	}
+	bnxt_qplib_post_recv_db(&qp->qplib_qp);
+	return rc;
+}
+
+int bnxt_re_post_recv(struct ib_qp *ib_qp, struct ib_recv_wr *wr,
+		      struct ib_recv_wr **bad_wr)
+{
+	struct bnxt_re_qp *qp = container_of(ib_qp, struct bnxt_re_qp, ib_qp);
+	struct bnxt_qplib_swqe wqe;
+	int rc = 0, payload_sz = 0;
+
+	while (wr) {
+		/* House keeping */
+		memset(&wqe, 0, sizeof(wqe));
+
+		/* Common */
+		wqe.num_sge = wr->num_sge;
+		if (wr->num_sge > qp->qplib_qp.rq.max_sge) {
+			dev_err(rdev_to_dev(qp->rdev),
+				"Limit exceeded for Receive SGEs");
+			rc = -EINVAL;
+			goto bad;
+		}
+
+		payload_sz = bnxt_re_build_sgl(wr->sg_list, wqe.sg_list,
+					       wr->num_sge);
+		wqe.wr_id = wr->wr_id;
+		wqe.type = BNXT_QPLIB_SWQE_TYPE_RECV;
+
+		if (ib_qp->qp_type == IB_QPT_GSI)
+			rc = bnxt_re_build_qp1_shadow_qp_recv(qp, wr, &wqe,
+							      payload_sz);
+		if (!rc)
+			rc = bnxt_qplib_post_recv(&qp->qplib_qp, &wqe);
+bad:
+		if (rc) {
+			*bad_wr = wr;
+			break;
+		}
+		wr = wr->next;
+	}
+	bnxt_qplib_post_recv_db(&qp->qplib_qp);
+	return rc;
+}
+
 /* Completion Queues */
 int bnxt_re_destroy_cq(struct ib_cq *ib_cq)
 {
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
index becdcdc..9f3dd49 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_ib_verbs.h
@@ -164,6 +164,8 @@ int bnxt_re_query_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr,
 int bnxt_re_destroy_qp(struct ib_qp *qp);
 int bnxt_re_post_send(struct ib_qp *qp, struct ib_send_wr *send_wr,
 		      struct ib_send_wr **bad_send_wr);
+int bnxt_re_post_recv(struct ib_qp *qp, struct ib_recv_wr *recv_wr,
+		      struct ib_recv_wr **bad_recv_wr);
 struct ib_cq *bnxt_re_create_cq(struct ib_device *ibdev,
 				const struct ib_cq_init_attr *attr,
 				struct ib_ucontext *context,
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index 03a34c0..adaa663 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -463,6 +463,8 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 	ibdev->destroy_qp		= bnxt_re_destroy_qp;
 
 	ibdev->post_send		= bnxt_re_post_send;
+	ibdev->post_recv		= bnxt_re_post_recv;
+
 	ibdev->create_cq		= bnxt_re_create_cq;
 	ibdev->destroy_cq		= bnxt_re_destroy_cq;
 	ibdev->req_notify_cq		= bnxt_re_req_notify_cq;
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 18/21] bnxt_re: Support for DCB
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

This patch queries the configured RoCE APP Priority on the host
using the dcbnl API and programs the RoCE FW with the corresponding
Traffic Class(es) for the priority.

v2: Fixed some sparse warning and cleanup of function
bnxt_re_query_hwrm_pri2cos

v3: Adds bnxt_qplib_map_tc2cos as a part of this patch. Uses
    ROCE_V2_UDP_DPORT instead of BNXT_RE_ROCE_V2_PORT_NO.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c |  37 +++++++
 drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h |   1 +
 drivers/infiniband/hw/bnxtre/bnxt_re.h       |   5 +
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c  | 140 +++++++++++++++++++++++++++
 4 files changed, 183 insertions(+)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
index 4f77af8..aaafcf99 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.c
@@ -799,3 +799,40 @@ int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
 	bnxt_qplib_free_hwq(res->pdev, &frpl->hwq);
 	return 0;
 }
+
+int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids)
+{
+	struct bnxt_qplib_rcfw *rcfw = res->rcfw;
+	struct cmdq_map_tc_to_cos req;
+	struct creq_map_tc_to_cos_resp *resp;
+	u16 cmd_flags = 0;
+	int tleft;
+
+	RCFW_CMD_PREP(req, MAP_TC_TO_COS, cmd_flags);
+	req.cos0 = cpu_to_le16(cids[0]);
+	req.cos1 = cpu_to_le16(cids[1]);
+
+	resp = bnxt_qplib_rcfw_send_message(rcfw, (void *)&req, NULL, 0);
+	if (!resp) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS send failed");
+		return -EINVAL;
+	}
+
+	tleft = bnxt_qplib_rcfw_block_for_resp(rcfw, le16_to_cpu(req.cookie));
+	if (!tleft) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS timed out");
+		return -ETIMEDOUT;
+	}
+
+	if (resp->status ||
+	    le16_to_cpu(resp->cookie) != le16_to_cpu(req.cookie)) {
+		dev_err(&res->pdev->dev, "QPLIB: SP: MAP_TC2COS failed ");
+		dev_err(&res->pdev->dev,
+			"QPLIB: with status 0x%x cmdq 0x%x resp 0x%x",
+			resp->status, le16_to_cpu(req.cookie),
+			le16_to_cpu(resp->cookie));
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
index 3358f6d..96437bb 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_qplib_sp.h
@@ -156,4 +156,5 @@ int bnxt_qplib_alloc_fast_reg_page_list(struct bnxt_qplib_res *res,
 					struct bnxt_qplib_frpl *frpl, int max);
 int bnxt_qplib_free_fast_reg_page_list(struct bnxt_qplib_res *res,
 				       struct bnxt_qplib_frpl *frpl);
+int bnxt_qplib_map_tc2cos(struct bnxt_qplib_res *res, u16 *cids);
 #endif /* __BNXT_QPLIB_SP_H__*/
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re.h b/drivers/infiniband/hw/bnxtre/bnxt_re.h
index 6281e23..5565be8 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re.h
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re.h
@@ -45,6 +45,8 @@
 #define BNXT_RE_REF_WAIT_COUNT		10
 #define BNXT_RE_DESC	"Broadcom NetXtreme-C/E RoCE Driver"
 
+#define BNXT_RE_ROCE_V1_ETH_TYPE	0x8915
+
 #define BNXT_RE_PAGE_SIZE_4K		BIT(12)
 #define BNXT_RE_PAGE_SIZE_8K		BIT(13)
 #define BNXT_RE_PAGE_SIZE_64K		BIT(16)
@@ -95,6 +97,9 @@ struct bnxt_re_dev {
 
 	int				id;
 
+	struct delayed_work		worker;
+	u8				cur_prio_map;
+
 	/* FP Notification Queue (CQ & SRQ) */
 	struct tasklet_struct		nq_task;
 
diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index 3a45b53..ba082f1 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -44,6 +44,7 @@
 #include <linux/rculist.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
+#include <net/dcbnl.h>
 #include <net/ipv6.h>
 #include <net/addrconf.h>
 
@@ -743,6 +744,50 @@ static void bnxt_re_dispatch_event(struct ib_device *ibdev, struct ib_qp *qp,
 	ib_dispatch_event(&ib_event);
 }
 
+#define HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN      0x02
+static int bnxt_re_query_hwrm_pri2cos(struct bnxt_re_dev *rdev, u8 dir,
+				      u64 *cid_map)
+{
+	struct hwrm_queue_pri2cos_qcfg_input req = {0};
+	struct bnxt *bp = netdev_priv(rdev->netdev);
+	struct hwrm_queue_pri2cos_qcfg_output resp;
+	struct bnxt_en_dev *en_dev = rdev->en_dev;
+	struct bnxt_fw_msg fw_msg;
+	u32 flags = 0;
+	u8 *qcfgmap, *tmp_map;
+	int rc = 0, i;
+
+	if (!cid_map)
+		return -EINVAL;
+
+	memset(&fw_msg, 0, sizeof(fw_msg));
+	bnxt_re_init_hwrm_hdr(rdev, (void *)&req,
+			      HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
+	flags |= (dir & 0x01);
+	flags |= HWRM_QUEUE_PRI2COS_QCFG_INPUT_FLAGS_IVLAN;
+	req.flags = cpu_to_le32(flags);
+	req.port_id = bp->pf.port_id;
+
+	bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp,
+			    sizeof(resp), DFLT_HWRM_CMD_TIMEOUT);
+	rc = en_dev->en_ops->bnxt_send_fw_msg(en_dev, BNXT_ROCE_ULP, &fw_msg);
+	if (rc)
+		return rc;
+
+	if (resp.queue_cfg_info) {
+		dev_warn(rdev_to_dev(rdev),
+			 "Asymmetric cos queue configuration detected");
+		dev_warn(rdev_to_dev(rdev),
+			 " on device, QoS may not be fully functional\n");
+	}
+	qcfgmap = &resp.pri0_cos_queue_id;
+	tmp_map = (u8 *)cid_map;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+		tmp_map[i] = qcfgmap[i];
+
+	return rc;
+}
+
 static bool bnxt_re_is_qp1_or_shadow_qp(struct bnxt_re_dev *rdev,
 					struct bnxt_re_qp *qp)
 {
@@ -783,6 +828,80 @@ static void bnxt_re_dev_stop(struct bnxt_re_dev *rdev, bool qp_wait)
 	}
 }
 
+static u32 bnxt_re_get_priority_mask(struct bnxt_re_dev *rdev)
+{
+	u32 prio_map = 0, tmp_map = 0;
+	struct net_device *netdev;
+	struct dcb_app app;
+
+	netdev = rdev->netdev;
+
+	memset(&app, 0, sizeof(app));
+	app.selector = IEEE_8021QAZ_APP_SEL_ETHERTYPE;
+	app.protocol = BNXT_RE_ROCE_V1_ETH_TYPE;
+	tmp_map = dcb_ieee_getapp_mask(netdev, &app);
+	prio_map = tmp_map;
+
+	app.selector = IEEE_8021QAZ_APP_SEL_DGRAM;
+	app.protocol = ROCE_V2_UDP_DPORT;
+	tmp_map = dcb_ieee_getapp_mask(netdev, &app);
+	prio_map |= tmp_map;
+
+	if (!prio_map)
+		prio_map = -EFAULT;
+	return prio_map;
+}
+
+static void bnxt_re_parse_cid_map(u8 prio_map, u8 *cid_map, u16 *cosq)
+{
+	u16 prio;
+	u8 id;
+
+	for (prio = 0, id = 0; prio < 8; prio++) {
+		if (prio_map & (1 << prio)) {
+			cosq[id] = cid_map[prio];
+			id++;
+			if (id == 2) /* Max 2 tcs supported */
+				break;
+		}
+	}
+}
+
+static int bnxt_re_setup_qos(struct bnxt_re_dev *rdev)
+{
+	u8 prio_map = 0;
+	u64 cid_map;
+	int rc;
+
+	/* Get priority for roce */
+	rc = bnxt_re_get_priority_mask(rdev);
+	if (rc < 0)
+		return rc;
+	prio_map = (u8)rc;
+
+	if (prio_map == rdev->cur_prio_map)
+		return 0;
+	rdev->cur_prio_map = prio_map;
+	/* Get cosq id for this priority */
+	rc = bnxt_re_query_hwrm_pri2cos(rdev, 0, &cid_map);
+	if (rc) {
+		dev_warn(rdev_to_dev(rdev), "no cos for p_mask %x\n", prio_map);
+		return rc;
+	}
+	/* Parse CoS IDs for app priority */
+	bnxt_re_parse_cid_map(prio_map, (u8 *)&cid_map, rdev->cosq);
+
+	/* Config BONO. */
+	rc = bnxt_qplib_map_tc2cos(&rdev->qplib_res, rdev->cosq);
+	if (rc) {
+		dev_warn(rdev_to_dev(rdev), "no tc for cos{%x, %x}\n",
+			 rdev->cosq[0], rdev->cosq[1]);
+		return rc;
+	}
+
+	return 0;
+}
+
 static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
 {
 	int i, rc;
@@ -794,6 +913,9 @@ static void bnxt_re_ib_unreg(struct bnxt_re_dev *rdev, bool lock_wait)
 		/* Cleanup ib dev */
 		bnxt_re_unregister_ib(rdev);
 	}
+	if (test_and_clear_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags))
+		cancel_delayed_work(&rdev->worker);
+
 	bnxt_re_cleanup_res(rdev);
 	bnxt_re_free_res(rdev, lock_wait);
 
@@ -836,6 +958,16 @@ static void bnxt_re_set_resource_limits(struct bnxt_re_dev *rdev)
 		rdev->dev_attr.tqm_alloc_reqs[i];
 }
 
+/* worker thread for polling periodic events. Now used for QoS programming*/
+static void bnxt_re_worker(struct work_struct *work)
+{
+	struct bnxt_re_dev *rdev = container_of(work, struct bnxt_re_dev,
+						worker.work);
+
+	bnxt_re_setup_qos(rdev);
+	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+}
+
 static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 {
 	int i, j, rc;
@@ -920,6 +1052,14 @@ static int bnxt_re_ib_reg(struct bnxt_re_dev *rdev)
 		goto fail;
 	}
 
+	rc = bnxt_re_setup_qos(rdev);
+	if (rc)
+		pr_info("RoCE priority not yet configured\n");
+
+	INIT_DELAYED_WORK(&rdev->worker, bnxt_re_worker);
+	set_bit(BNXT_RE_FLAG_QOS_WORK_REG, &rdev->flags);
+	schedule_delayed_work(&rdev->worker, msecs_to_jiffies(30000));
+
 	/* Register ib dev */
 	rc = bnxt_re_register_ib(rdev);
 	if (rc) {
-- 
2.5.5

^ permalink raw reply related

* [PATCH for bnxt_re V3 19/21] bnxt_re: Set uverbs command mask
From: Selvin Xavier @ 2016-12-20  9:13 UTC (permalink / raw)
  To: dledford, linux-rdma
  Cc: netdev, michael.chan, Selvin Xavier, Eddie Wai, Devesh Sharma,
	Somnath Kotur, Sriharsha Basavapatna
In-Reply-To: <1482225211-22423-1-git-send-email-selvin.xavier@broadcom.com>

This patch exports available uverbs command mask to the IB stack.
Also, populates some of the missing parameters in the ibdev structure
used for registration.

Signed-off-by: Eddie Wai <eddie.wai@broadcom.com>
Signed-off-by: Devesh Sharma <devesh.sharma@broadcom.com>
Signed-off-by: Somnath Kotur <somnath.kotur@broadcom.com>
Signed-off-by: Sriharsha Basavapatna <sriharsha.basavapatna@broadcom.com>
Signed-off-by: Selvin Xavier <selvin.xavier@broadcom.com>
---
 drivers/infiniband/hw/bnxtre/bnxt_re_main.c | 35 +++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
index ba082f1..035e85a 100644
--- a/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
+++ b/drivers/infiniband/hw/bnxtre/bnxt_re_main.c
@@ -61,6 +61,7 @@
 #include "bnxt_qplib_rcfw.h"
 #include "bnxt_re.h"
 #include "bnxt_re_ib_verbs.h"
+#include <rdma/bnxtre-abi.h>
 #include "bnxt.h"
 static char version[] =
 		BNXT_RE_DESC " v" ROCE_DRV_MODULE_VERSION "\n";
@@ -435,8 +436,42 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
 		strlen(BNXT_RE_DESC) + 5);
 	ibdev->phys_port_cnt = 1;
 
+	bnxt_qplib_get_guid(rdev->netdev->dev_addr, (u8 *)&ibdev->node_guid);
+
 	ibdev->num_comp_vectors	= 1;
 	ibdev->dma_device = &rdev->en_dev->pdev->dev;
+	ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY;
+
+	/* User space */
+	ibdev->uverbs_abi_ver = BNXT_RE_ABI_VERSION;
+	ibdev->uverbs_cmd_mask =
+			(1ull << IB_USER_VERBS_CMD_GET_CONTEXT)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_DEVICE)	|
+			(1ull << IB_USER_VERBS_CMD_QUERY_PORT)		|
+			(1ull << IB_USER_VERBS_CMD_ALLOC_PD)		|
+			(1ull << IB_USER_VERBS_CMD_DEALLOC_PD)		|
+			(1ull << IB_USER_VERBS_CMD_REG_MR)		|
+			(1ull << IB_USER_VERBS_CMD_REREG_MR)		|
+			(1ull << IB_USER_VERBS_CMD_DEREG_MR)		|
+			(1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
+			(1ull << IB_USER_VERBS_CMD_CREATE_CQ)		|
+			(1ull << IB_USER_VERBS_CMD_RESIZE_CQ)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_CQ)		|
+			(1ull << IB_USER_VERBS_CMD_CREATE_QP)		|
+			(1ull << IB_USER_VERBS_CMD_MODIFY_QP)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_QP)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_QP)		|
+			(1ull << IB_USER_VERBS_CMD_CREATE_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_MODIFY_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
+			(1ull << IB_USER_VERBS_CMD_CREATE_AH)		|
+			(1ull << IB_USER_VERBS_CMD_MODIFY_AH)		|
+			(1ull << IB_USER_VERBS_CMD_QUERY_AH)		|
+			(1ull << IB_USER_VERBS_CMD_DESTROY_AH);
+	/* POLL_CQ and REQ_NOTIFY_CQ is directly handled in libbnxt_re */
+
+	/* Kernel verbs */
 	ibdev->query_device		= bnxt_re_query_device;
 	ibdev->modify_device		= bnxt_re_modify_device;
 
-- 
2.5.5

^ permalink raw reply related

* Re: Potential issues (security and otherwise) with the current cgroup-bpf API
From: Daniel Mack @ 2016-12-20 10:21 UTC (permalink / raw)
  To: Andy Lutomirski, Alexei Starovoitov
  Cc: Andy Lutomirski, Mickaël Salaün, Kees Cook, Jann Horn,
	Tejun Heo, David Ahern, David S. Miller, Thomas Graf,
	Michael Kerrisk, Peter Zijlstra, Linux API,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Network Development
In-Reply-To: <CALCETrXymvAo-9zhQe=amToz_fs9XGniK2KLZv5Fxc66qcUx6A-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

Hi,

On 12/20/2016 04:50 AM, Andy Lutomirski wrote:
> On Mon, Dec 19, 2016 at 7:18 PM, Alexei Starovoitov
> <alexei.starovoitov-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>> On Mon, Dec 19, 2016 at 04:25:32PM -0800, Andy Lutomirski wrote:
>>> I think we're still talking past each other.  A big part of the point
>>> of changing it is that none of this is specific to bpf.  You could (in
>>
>> the hooks and context passed into the program is very much bpf specific.
>> That's what I've been trying to convey all along.
> 
> You mean BPF_CGROUP_RUN_PROG_INET_SOCK(sk)?  There is nothing bpf
> specfic about the hook except that the name of this macro has "BPF" in
> it.  There is nothing whatsoever that's bpf-specific about the context
> -- sk is not bpf-specific at all.
> 
> The only thing bpf-specific about it is that it currently only invokes
> bpf programs.  That could easily change.

I'm not sure if I follow. The code as it currently stands only supports
attaching bpf programs to cgroups which have been created using
BPF_PROG_LOAD. If cgroups would support other program types in the
future, then they would need to be stored in different data types
anyway, and the bpf syscall multiplexer would be the wrong entry point
to access them anyway.

Whether we add bpf-specific code to the cgroup file parsers or
cgroup-specific code to the bpf layer does not make much of a semantic
difference, does it? As a matter of fact, my very first implementation
of this patch set implemented a cgroup controller that would allow
writing strings like "ingress 5" to its control file, where 5 is the fd
number that came out of BPF_PROG_LOAD. The main reason we decided to
ditch that was that echoing fd numbers into a text file seemed way worse
than going through a proper syscall layer with it, and ioctls are
unavailable on pseudo-fs.

The idea was rather to allow attaching bpf programs to other things than
just cgroups as well, which is why we called the member of 'union
bpf_attr' 'target_fd', and a cgroup is just one type a target here.

>> i'm assuming 'baadf00d' is bpf program fd expressed a text string?
>> and kernel needs to parse above? will you allow capital and lower
>> case for 'bpf:' ? and mixed case too? spaces and tabs allowed or not?
>> can program fd expressed as decimal or hex or both?
>> how do you return the error? as a text string for user space
>> to parse?
> 
> No.  The kernel does not parse it because you cannot write this to the
> file.  You set a bpf filter with ioctl and pass an fd.

An ioctl on what file, exactly?

> If you *read*
> the file, you get the same bpf program hash that fdinfo on the bpf
> object would show -- this is for debugging and (eventually) CRIU.

We need a debugging facility at some point, I agree to that. As the code
currently stands, that would rather need to go into the bpf(2) syscall
though, as setting a program through bpf(2) and reading it through
cgroupfs is really nasty.

>> so you're proposing to add a bunch of hard coded logic to the kernel.
>> First to parse such text into some sort of syntax tree or list/set
>> and then have hard coded logic specifically for these two use cases?
>> While above two can be implemented as trivial bpf programs already?!
>> That goes 180% degree vs bpf philosophy. bpf is about moving
>> the specific code out of the kernel and keeping kernel generic that
>> it can solve as many use cases as possible by being programmable.
> 
> I'm not seriously proposing implementing these.  My point is that
> *bpf*, while wonderful, is not the be-all-and-end-all of kernel
> configurability, and other types of hooks might want to be hooked in
> here.

Sure, but nobody claimed it to be that be-all-and-end-all thing. It's
just one thing that a cgroup is now able to accommodate, and because
that new feature is specific to bpf, we decided to hook up the uapi to
the bpf syscall.

> So if I set up a cgroup that's monitored and call it /cgroup/a and
> enable delegation and if the program running there wants to do its own
> monitoring in /cgroup/a/b (via delegation), then you really want the
> outer monitor to silently drop events coming from /cgroup/a/b?

That's a fair point, and we've discussed it as well. The issue is, as
Alexei already pointed out, that we do not want to traverse the tree up
to the root for nested cgroups due to the runtime costs in the
networking fast-path. After all, we're running the bpf program for each
packet in flight. Hence, we opted for the approach to only look at the
leaf node for now, with the ability to open it up further in the future
using flags during attach etc.

> The current approach to bpf hooks will bite you down the road.  David
> Ahern is already proposing using it for something that is not tracing
> at all, and someone will want that in a container, and there will be a
> problem.

Hmm, I thought we've sorted out the concerns about that by making sure
that we

a) lock-down the API sufficiently so it doesn't cause any security
issues in its current form, and

b) make it possible to extend the functionality in the future by adding
flags to the command struct etc.

And I hoped we achieved that after discussing it for so long.

> How about slowing down a wee bit and trying to come up with cgroup
> hook semantics that work for all of these use cases?

I'm all for discussing things, but I don't this was done in a rush.

I do agree though that adding functionality to cgroups that is not
limited to resource control is a delicate thing to do, which is why I
cc'ed cgroups@ in my patches. I should have also added linux-api@ I
guess, sorry I missed that.

> I think my proposal is quite close to workable.

So let's talk about how to proceed. I've seen different bits of your
proposal in different mails, and I think a summary of it would help the
discussion.


Thanks,
Daniel

^ permalink raw reply

* Re: [PATCH net] netfilter: check duplicate config when initializing in ipt_CLUSTERIP
From: Xin Long @ 2016-12-20 11:14 UTC (permalink / raw)
  To: Pablo Neira Ayuso
  Cc: network dev, netfilter-devel, davem, Marcelo Ricardo Leitner
In-Reply-To: <20161220004803.GA14656@salvia>

On Tue, Dec 20, 2016 at 8:48 AM, Pablo Neira Ayuso <pablo@netfilter.org> wrote:
> On Thu, Dec 15, 2016 at 12:31:40PM +0800, Xin Long wrote:
>> @@ -185,6 +186,17 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
>>       atomic_set(&c->refcount, 1);
>>       atomic_set(&c->entries, 1);
>>
>> +     spin_lock_bh(&cn->lock);
>> +     if (__clusterip_config_find(net, ip)) {
>> +             spin_unlock_bh(&cn->lock);
>> +             kfree(c);
>> +
>> +             return NULL;
>> +     }
>
> This is going to result in ENOMEM error report to userspace on race,
> which can be confusing. Time for clusterip_config_init() to return
> PTR_ERR()?
will post v2 with PTR_ERR, thanks.

>
>> +
>> +     list_add_rcu(&c->list, &cn->configs);
>> +     spin_unlock_bh(&cn->lock);
>> +
>>  #ifdef CONFIG_PROC_FS
>>       {
>>               char buffer[16];

^ permalink raw reply

* [PATCHv2 net] netfilter: check duplicate config when initializing in ipt_CLUSTERIP
From: Xin Long @ 2016-12-20 11:14 UTC (permalink / raw)
  To: network dev, netfilter-devel; +Cc: davem, pablo, Marcelo Ricardo Leitner

Now when adding an ipt_CLUSTERIP rule, it only checks duplicate config in
clusterip_config_find_get(). But after that, there may be still another
thread to insert a config with the same ip, then it leaves proc_create_data
to do duplicate check.

It's more reasonable to check duplicate config by ipt_CLUSTERIP itself,
instead of checking it by proc fs duplicate file check. Before, when proc
fs allowed duplicate name files in a directory, It could even crash kernel
because of use-after-free.

This patch is to check duplicate config under the protection of clusterip
net lock when initializing a new config and correct the return err.

Note that it also moves proc file node creation after adding new config, as
proc_create_data may sleep, it couldn't be called under the clusterip_net
lock. clusterip_config_find_get returns NULL if c->pde is null to make sure
it can't be used until the proc file node creation is done.

v1->v2:
  correct the err clusterip_config_init returns.

Suggested-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 21db00d..a6b8c1a 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -144,7 +144,7 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
 	rcu_read_lock_bh();
 	c = __clusterip_config_find(net, clusterip);
 	if (c) {
-		if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+		if (!c->pde || unlikely(!atomic_inc_not_zero(&c->refcount)))
 			c = NULL;
 		else if (entry)
 			atomic_inc(&c->entries);
@@ -166,14 +166,15 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
 
 static struct clusterip_config *
 clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
-			struct net_device *dev)
+		      struct net_device *dev)
 {
+	struct net *net = dev_net(dev);
 	struct clusterip_config *c;
-	struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
+	struct clusterip_net *cn = net_generic(net, clusterip_net_id);
 
 	c = kzalloc(sizeof(*c), GFP_ATOMIC);
 	if (!c)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	c->dev = dev;
 	c->clusterip = ip;
@@ -185,6 +186,17 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 	atomic_set(&c->refcount, 1);
 	atomic_set(&c->entries, 1);
 
+	spin_lock_bh(&cn->lock);
+	if (__clusterip_config_find(net, ip)) {
+		spin_unlock_bh(&cn->lock);
+		kfree(c);
+
+		return ERR_PTR(-EBUSY);
+	}
+
+	list_add_rcu(&c->list, &cn->configs);
+	spin_unlock_bh(&cn->lock);
+
 #ifdef CONFIG_PROC_FS
 	{
 		char buffer[16];
@@ -195,16 +207,16 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
 					  cn->procdir,
 					  &clusterip_proc_fops, c);
 		if (!c->pde) {
+			spin_lock_bh(&cn->lock);
+			list_del_rcu(&c->list);
+			spin_unlock_bh(&cn->lock);
 			kfree(c);
-			return NULL;
+
+			return ERR_PTR(-ENOMEM);
 		}
 	}
 #endif
 
-	spin_lock_bh(&cn->lock);
-	list_add_rcu(&c->list, &cn->configs);
-	spin_unlock_bh(&cn->lock);
-
 	return c;
 }
 
@@ -410,9 +422,9 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 
 			config = clusterip_config_init(cipinfo,
 							e->ip.dst.s_addr, dev);
-			if (!config) {
+			if (IS_ERR(config)) {
 				dev_put(dev);
-				return -ENOMEM;
+				return PTR_ERR(config);
 			}
 			dev_mc_add(config->dev, config->clustermac);
 		}
-- 
2.1.0

^ permalink raw reply related

* [PATCH] stmmac: CSR clock configuration fix
From: Joao Pinto @ 2016-12-20 11:21 UTC (permalink / raw)
  To: peppe.cavallaro, davem
  Cc: hock.leong.kweh, niklas.cassel, pavel, linux-kernel, netdev,
	Joao Pinto

When testing stmmac with my QoS reference design I checked a problem in the
CSR clock configuration that was impossibilitating the phy discovery, since
every read operation returned 0x0000ffff. This patch fixes the issue.

Signed-off-by: Joao Pinto <jpinto@synopsys.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index 23322fd..fda01f7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -81,8 +81,8 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	value |= (phyaddr << priv->hw->mii.addr_shift)
 		& priv->hw->mii.addr_mask;
 	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
-	value |= (priv->clk_csr & priv->hw->mii.clk_csr_mask)
-		<< priv->hw->mii.clk_csr_shift;
+	value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
+		& priv->hw->mii.clk_csr_mask;
 	if (priv->plat->has_gmac4)
 		value |= MII_GMAC4_READ;
 
@@ -122,8 +122,8 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 		& priv->hw->mii.addr_mask;
 	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
 
-	value |= ((priv->clk_csr & priv->hw->mii.clk_csr_mask)
-		<< priv->hw->mii.clk_csr_shift);
+	value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift)
+		& priv->hw->mii.clk_csr_mask;
 	if (priv->plat->has_gmac4)
 		value |= MII_GMAC4_WRITE;
 
-- 
2.9.3

^ permalink raw reply related

* Re: [PATCHv2 net 1/2] sctp: reduce indent level in sctp_copy_local_addr_list
From: Marcelo Ricardo Leitner @ 2016-12-20 11:29 UTC (permalink / raw)
  To: Xin Long; +Cc: network dev, linux-sctp, davem, Neil Horman
In-Reply-To: <b4d6428c565955e9e81e3b4623a6458e7567e593.1482212764.git.lucien.xin@gmail.com>

On Tue, Dec 20, 2016 at 01:49:49PM +0800, Xin Long wrote:
> This patch is to reduce indent level by using continue when the addr
> is not allowed, and also drop end_copy by using break.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

> ---
>  net/sctp/protocol.c | 37 +++++++++++++++++++------------------
>  1 file changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index 7b523e3..da5d82b 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -205,26 +205,27 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
>  	list_for_each_entry_rcu(addr, &net->sctp.local_addr_list, list) {
>  		if (!addr->valid)
>  			continue;
> -		if (sctp_in_scope(net, &addr->a, scope)) {
> -			/* Now that the address is in scope, check to see if
> -			 * the address type is really supported by the local
> -			 * sock as well as the remote peer.
> -			 */
> -			if ((((AF_INET == addr->a.sa.sa_family) &&
> -			      (copy_flags & SCTP_ADDR4_PEERSUPP))) ||
> -			    (((AF_INET6 == addr->a.sa.sa_family) &&
> -			      (copy_flags & SCTP_ADDR6_ALLOWED) &&
> -			      (copy_flags & SCTP_ADDR6_PEERSUPP)))) {
> -				error = sctp_add_bind_addr(bp, &addr->a,
> -						    sizeof(addr->a),
> -						    SCTP_ADDR_SRC, GFP_ATOMIC);
> -				if (error)
> -					goto end_copy;
> -			}
> -		}
> +		if (!sctp_in_scope(net, &addr->a, scope))
> +			continue;
> +
> +		/* Now that the address is in scope, check to see if
> +		 * the address type is really supported by the local
> +		 * sock as well as the remote peer.
> +		 */
> +		if (addr->a.sa.sa_family == AF_INET &&
> +		    !(copy_flags & SCTP_ADDR4_PEERSUPP))
> +			continue;
> +		if (addr->a.sa.sa_family == AF_INET6 &&
> +		    (!(copy_flags & SCTP_ADDR6_ALLOWED) ||
> +		     !(copy_flags & SCTP_ADDR6_PEERSUPP)))
> +			continue;
> +
> +		error = sctp_add_bind_addr(bp, &addr->a, sizeof(addr->a),
> +					   SCTP_ADDR_SRC, GFP_ATOMIC);
> +		if (error)
> +			break;
>  	}
>  
> -end_copy:
>  	rcu_read_unlock();
>  	return error;
>  }
> -- 
> 2.1.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* Re: [PATCHv2 net 2/2] sctp: not copying duplicate addrs to the assoc's bind address list
From: Marcelo Ricardo Leitner @ 2016-12-20 11:30 UTC (permalink / raw)
  To: Xin Long; +Cc: network dev, linux-sctp, davem, Neil Horman
In-Reply-To: <5a0037123617b525dfe456db1055770f39fb1193.1482212764.git.lucien.xin@gmail.com>

On Tue, Dec 20, 2016 at 01:49:50PM +0800, Xin Long wrote:
> sctp.local_addr_list is a global address list that is supposed to include
> all the local addresses. sctp updates this list according to NETDEV_UP/
> NETDEV_DOWN notifications.
> 
> However, if multiple NICs have the same address, the global list would
> have duplicate addresses. Even if for one NIC, promote secondaries in
> __inet_del_ifa can also lead to accumulating duplicate addresses.
> 
> When sctp binds address 'ANY' and creates a connection, it copies all
> the addresses from global list into asoc's bind addr list, which makes
> sctp pack the duplicate addresses into INIT/INIT_ACK packets.
> 
> This patch is to filter the duplicate addresses when copying the addrs
> from global list in sctp_copy_local_addr_list and unpacking addr_param
> from cookie in sctp_raw_to_bind_addrs to asoc's bind addr list.
> 
> Note that we can't filter the duplicate addrs when global address list
> gets updated, As NETDEV_DOWN event may remove an addr that still exists
> in another NIC.
> 
> Signed-off-by: Xin Long <lucien.xin@gmail.com>

Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>

> ---
>  net/sctp/bind_addr.c | 3 +++
>  net/sctp/protocol.c  | 3 +++
>  2 files changed, 6 insertions(+)
> 
> diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
> index 401c607..1ebc184 100644
> --- a/net/sctp/bind_addr.c
> +++ b/net/sctp/bind_addr.c
> @@ -292,6 +292,8 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
>  		}
>  
>  		af->from_addr_param(&addr, rawaddr, htons(port), 0);
> +		if (sctp_bind_addr_state(bp, &addr) != -1)
> +			goto next;
>  		retval = sctp_add_bind_addr(bp, &addr, sizeof(addr),
>  					    SCTP_ADDR_SRC, gfp);
>  		if (retval) {
> @@ -300,6 +302,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
>  			break;
>  		}
>  
> +next:
>  		len = ntohs(param->length);
>  		addrs_len -= len;
>  		raw_addr_list += len;
> diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
> index da5d82b..616a942 100644
> --- a/net/sctp/protocol.c
> +++ b/net/sctp/protocol.c
> @@ -220,6 +220,9 @@ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp,
>  		     !(copy_flags & SCTP_ADDR6_PEERSUPP)))
>  			continue;
>  
> +		if (sctp_bind_addr_state(bp, &addr->a) != -1)
> +			continue;
> +
>  		error = sctp_add_bind_addr(bp, &addr->a, sizeof(addr->a),
>  					   SCTP_ADDR_SRC, GFP_ATOMIC);
>  		if (error)
> -- 
> 2.1.0
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-sctp" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply

* [PATCH][V2] qed: fix memory leak of a qed_spq_entry on error failure paths
From: Colin King @ 2016-12-20 11:44 UTC (permalink / raw)
  To: Yuval Mintz, Ariel Elior, everest-linux-l2, netdev; +Cc: linux-kernel

From: Colin Ian King <colin.king@canonical.com>

A qed_spq_entry entry is allocated by qed_sp_init_request but is not
kfree'd if an error occurs, causing a memory leak. Fix this by
returning the previously allocated spq entry and also setting *pp_ent
to NULL to be safe.

Thanks to Yuval Mintz for suggestions on how to improve my original
fix.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/net/ethernet/qlogic/qed/qed_sp_commands.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index a39ef2e..d2034fa 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -55,8 +55,10 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 		break;
 
 	case QED_SPQ_MODE_BLOCK:
-		if (!p_data->p_comp_data)
-			return -EINVAL;
+		if (!p_data->p_comp_data) {
+			rc = -EINVAL;
+			goto err;
+		}
 
 		p_ent->comp_cb.cookie = p_data->p_comp_data->cookie;
 		break;
@@ -71,7 +73,8 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 	default:
 		DP_NOTICE(p_hwfn, "Unknown SPQE completion mode %d\n",
 			  p_ent->comp_mode);
-		return -EINVAL;
+		rc = -EINVAL;
+		goto err;
 	}
 
 	DP_VERBOSE(p_hwfn, QED_MSG_SPQ,
@@ -85,6 +88,10 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn,
 	memset(&p_ent->ramrod, 0, sizeof(p_ent->ramrod));
 
 	return 0;
+err:
+	qed_spq_return_entry(p_hwfn, *pp_ent);
+	*pp_ent = NULL;
+	return rc;
 }
 
 static enum tunnel_clss qed_tunn_get_clss_type(u8 type)
-- 
2.10.2

^ permalink raw reply related

* Re: Soft lockup in tc_classify
From: Daniel Borkmann @ 2016-12-20 11:47 UTC (permalink / raw)
  To: Shahar Klein, Cong Wang
  Cc: Or Gerlitz, Linux Netdev List, Roi Dayan, David Miller,
	Jiri Pirko, John Fastabend, Hadar Hen Zion
In-Reply-To: <5a985705-11e5-1575-a049-723accb97608@mellanox.com>

Hi Shahar,

On 12/20/2016 07:22 AM, Shahar Klein wrote:
> On 12/19/2016 7:58 PM, Cong Wang wrote:
>> On Mon, Dec 19, 2016 at 8:39 AM, Shahar Klein <shahark@mellanox.com> wrote:
>>> On 12/13/2016 12:51 AM, Cong Wang wrote:
>>>>
>>>> On Mon, Dec 12, 2016 at 1:18 PM, Or Gerlitz <gerlitz.or@gmail.com> wrote:
>>>>>
>>>>> On Mon, Dec 12, 2016 at 3:28 PM, Daniel Borkmann <daniel@iogearbox.net>
>>>>> wrote:
>>>>>
>>>>>> Note that there's still the RCU fix missing for the deletion race that
>>>>>> Cong will still send out, but you say that the only thing you do is to
>>>>>> add a single rule, but no other operation in involved during that test?
>>>>>
>>>>> What's missing to have the deletion race fixed? making a patch or
>>>>> testing to a patch which was sent?
>>>>
>>>> If you think it would help for this problem, here is my patch rebased
>>>> on the latest net-next.
>>>>
>>>> Again, I don't see how it could help this case yet, especially I don't
>>>> see how we could have a loop in this singly linked list.
>>>
>>> I've applied cong's patch and hit a different lockup(full log attached):
>>
>> Are you sure this is really different? For me, it is still inside the loop
>> in tc_classify(), with only a slightly different offset.
>>
>>>
>>> Daniel suggested I'll add a print:
>>>                 case RTM_DELTFILTER:
>>> -                   err = tp->ops->delete(tp, fh);
>>> +                 printk(KERN_ERR "DEBUGG:SK %s:%d\n", __func__, __LINE__);
>>> +                 err = tp->ops->delete(tp, fh, &last);
>>>                         if (err == 0) {
>>>
>>> and I couldn't see this print in the output.....
>>
>> Hmm, that is odd, if this never prints, then my patch should not make any
>> difference.
>>
>> There are still two other cases where we could change tp->next, so do you
>> mind to add two more printk's for debugging?
>>
>> Attached is the delta patch.
>>
>> Thanks!
>
> I've added a slightly different debug print:
> @@ -368,11 +375,12 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
>                  if (tp_created) {
>                          RCU_INIT_POINTER(tp->next, rtnl_dereference(*back));
>                          rcu_assign_pointer(*back, tp);
> +                 printk(KERN_ERR "DEBUGG:SK add/change filter by: %pf tp=%p tp->next=%p\n", tp->ops->get, tp, tp->next);
>                  }
>                  tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false);

I'm curious, could you be a bit more verbose why you didn't go with Cong's
debug patch?

In particular, why you removed the hunk from the condition 'n->nlmsg_type ==
RTM_DELTFILTER && t->tcm_handle == 0' where we delete the whole tp instance?

Is it because if you have that printk() there, then the issue doesn't trigger
for you anymore? Or any other reason?

How many CPUs does your test machine have, I suspect more than 1, right?

So iff RTM_DELTFILTER with tcm_handle of 0 really played a role in this, I'm
wondering whether there was a subtle deletion + add race where the newly added
filter on the other CPU still saw a stale pointer in the list. But just a wild
guess at this point.

Hmm, could you try this below to see whether the issue still appears?

Thanks,
Daniel

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 3fbba79..4eee1cb 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -317,7 +317,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n)
  		if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) {
  			struct tcf_proto *next = rtnl_dereference(tp->next);

-			RCU_INIT_POINTER(*back, next);
+			rcu_assign_pointer(*back, next);

  			tfilter_notify(net, skb, n, tp, fh,
  				       RTM_DELTFILTER, false);


> full output attached:
>
> [  283.290271] Mirror/redirect action on
> [  283.305031] DEBUGG:SK add/change filter by: fl_get [cls_flower] tp=ffff9432d704df60 tp->next=          (null)
> [  283.322563] DEBUGG:SK add/change filter by: fl_get [cls_flower] tp=ffff9436e718d240 tp->next=          (null)
> [  283.359997] GACT probability on
> [  283.365923] DEBUGG:SK add/change filter by: fl_get [cls_flower] tp=ffff9436e718d3c0 tp->next=ffff9436e718d240
> [  283.378725] DEBUGG:SK add/change filter by: fl_get [cls_flower] tp=ffff9436e718d3c0 tp->next=ffff9436e718d3c0
> [  283.391310] DEBUGG:SK add/change filter by: fl_get [cls_flower] tp=ffff9436e718d3c0 tp->next=ffff9436e718d3c0
> [  283.403923] DEBUGG:SK add/change filter by: fl_get [cls_flower] tp=ffff9436e718d3c0 tp->next=ffff9436e718d3c0
> [  283.416542] DEBUGG:SK add/change filter by: fl_get [cls_flower] tp=ffff9436e718d3c0 tp->next=ffff9436e718d3c0
> [  308.538571] NMI watchdog: BUG: soft lockup - CPU#0 stuck for 23s! [swapper/0:0]
>
> Thanks
> Shahar

^ permalink raw reply related

* Re: wl1251 & mac address & calibration data
From: Kalle Valo @ 2016-12-20 11:47 UTC (permalink / raw)
  To: Arend Van Spriel
  Cc: Pali Rohár, Daniel Wagner, Luis R. Rodriguez, Tom Gundersen,
	Johannes Berg, Ming Lei, Mimi Zohar, Bjorn Andersson,
	Rafał Miłecki, Sebastian Reichel, Pavel Machek,
	Michal Kazior, Ivaylo Dimitrov, Aaro Koskinen, Tony Lindgren,
	linux-wireless, Network Development, linux-kernel@vger.kernel.org
In-Reply-To: <fd180271-1cd4-9891-e3d5-ae9cd0fb088b@broadcom.com>

Arend Van Spriel <arend.vanspriel@broadcom.com> writes:

> On 18-12-2016 13:09, Pali Rohár wrote:
>
>> File wl1251-nvs.bin is provided by linux-firmware package and contains 
>> default data which should be overriden by model specific calibrated 
>> data.
>
> Ah. Someone thought it was a good idea to provide the "one ring to rule
> them all". Nice.

Yes, that was a bad idea. wl1251-nvs.bin in linux-firmware.git should be
renamed to wl1251-nvs.bin.example, or something like that, as it should
be only installed to a real system only if there's no real calibration
data available (only for developers to use, not real users).

>> But overwriting that one file is not possible as it next update of 
>> linux-firmware package will overwrite it back. It break any normal usage 
>> of package management.
>> 
>> Also it is ridiculously broken by design if some "boot" files needs to 
>> be overwritten to initialize hardware properly. To not break booting you 
>> need to overwrite that file before first boot. But without booting 
>> device you cannot read calibration data. So some hack with autoreboot 
>> after boot is needed.

Providing the calibration data via Device Tree is the proper way to
solve this. Yes yes, I know N900 doesn't support it but that's a
deficiency in N900, not Linux.

>> And how to detect that we have real overwritten calibration data and
>> not default one from linux-firmware? Any heuristic or checks will be
>> broken here. And no, nothing like you need to reboot your device now
>> (and similar concept) from windows world is not accepted.
>
> Well. After reading and creating calibration data you could just rebind
> the driver to the device to have it probed again.

Or load wl1251 as a module and make sure calibration data is installed
before the module is loaded. LEDE does that with ath10k:

https://git.lede-project.org/?p=source.git;a=blob;f=target/linux/ar71xx/base-files/etc/hotplug.d/firmware/11-ath10k-caldata;h=97875bd79a579a0010da3f60324b6ec966fe9c6a;hb=HEAD

> But yeah, the default one from linux-firmware should never have been
> there in the first place.

Agreed.

-- 
Kalle Valo

^ permalink raw reply

* Re: [PATCH net-next] ixgbevf: fix 'Etherleak' in ixgbevf
From: Weilong Chen @ 2016-12-20 11:50 UTC (permalink / raw)
  To: Alexander Duyck
  Cc: Jeff Kirsher, intel-wired-lan, Netdev,
	linux-kernel@vger.kernel.org, wangkefeng.wang
In-Reply-To: <CAKgT0UfA9VG4NQZLLxXvobtfp22yPpPssSM4Ge94KFa1RVnhyg@mail.gmail.com>

Hi,

Thanks for you reply.
We test you patch, but the problem is still there, it seems do not work.

I'm not sure why ixgbe use the limit 17. The kenel use ETH_ZLEN (60) 
with out FCS. A lot of drivers such as e1000 use it. Any explaination?

Thanks.

On 2016/12/16 0:13, Alexander Duyck wrote:
> On Thu, Dec 15, 2016 at 3:40 AM, Weilong Chen <chenweilong@huawei.com> wrote:
>> Nessus report the vf appears to leak memory in network packets.
>> Fix this by padding all small packets manually.
>>
>> And the CVE-2003-0001.
>> https://ofirarkin.files.wordpress.com/2008/11/atstake_etherleak_report.pdf
>>
>> Signed-off-by: Weilong Chen <chenweilong@huawei.com>
>> ---
>>  drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 7 +++++++
>>  1 file changed, 7 insertions(+)
>>
>> diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>> index 6d4bef5..137a154 100644
>> --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>> +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
>> @@ -3654,6 +3654,13 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
>>                 return NETDEV_TX_OK;
>>         }
>>
>> +       /* On PCI/PCI-X HW, if packet size is less than ETH_ZLEN,
>> +        * packets may get corrupted during padding by HW.
>> +        * To WA this issue, pad all small packets manually.
>> +        */
>> +       if (eth_skb_pad(skb))
>> +               return NETDEV_TX_OK;
>> +
>
> So the patch description for this probably isn't correct.  It looks
> like the problem isn't leaking data it is the fact that the frames
> aren't being padded to prevent malicious events.  The only issue is
> the patch is padding by a bit too much.  I would recommend replacing
> this with the following from ixgbe:
>
>         /*
>          * The minimum packet size for olinfo paylen is 17 so pad the skb
>          * in order to meet this minimum size requirement.
>          */
>         if (skb_put_padto(skb, 17))
>                 return NETDEV_TX_OK;
>
>
>>         tx_ring = adapter->tx_ring[skb->queue_mapping];
>>
>>         /* need: 1 descriptor per page * PAGE_SIZE/IXGBE_MAX_DATA_PER_TXD,
>> --
>> 1.7.12
>>
>
> .
>

^ permalink raw reply

* Re: mlx4: Bug in XDP_TX + 16 rx-queues
From: Tariq Toukan @ 2016-12-20 12:02 UTC (permalink / raw)
  To: Martin KaFai Lau, Tariq Toukan
  Cc: Saeed Mahameed, netdev@vger.kernel.org, Alexei Starovoitov
In-Reply-To: <20161219233709.GA29858@kafai-mba.local>

Thanks Martin, nice catch!


On 20/12/2016 1:37 AM, Martin KaFai Lau wrote:
> Hi Tariq,
>
> On Sat, Dec 17, 2016 at 02:18:03AM -0800, Martin KaFai Lau wrote:
>> Hi All,
>>
>> I have been debugging with XDP_TX and 16 rx-queues.
>>
>> 1) When 16 rx-queues is used and an XDP prog is doing XDP_TX,
>> it seems that the packet cannot be XDP_TX out if the pkt
>> is received from some particular CPUs (/rx-queues).
>>
>> 2) If 8 rx-queues is used, it does not have problem.
>>
>> 3) The 16 rx-queues problem also went away after reverting these
>> two patches:
>> 15fca2c8eb41 net/mlx4_en: Add ethtool statistics for XDP cases
>> 67f8b1dcb9ee net/mlx4_en: Refactor the XDP forwarding rings scheme
>>
> After taking a closer look at 67f8b1dcb9ee ("net/mlx4_en: Refactor the XDP forwarding rings scheme")
> and armed with the fact that '>8 rx-queues does not work', I have
> made the attached change that fixed the issue.
>
> Making change in mlx4_en_fill_qp_context() could be an easier fix
> but I think this change will be easier for discussion purpose.
>
> I don't want to lie that I know anything about how this variable
> works in CX3.  If this change makes sense, I can cook up a diff.
> Otherwise, can you shed some light on what could be happening
> and hopefully can lead to a diff?
>
> Thanks
> --Martin
>
>
> diff --git i/drivers/net/ethernet/mellanox/mlx4/en_netdev.c w/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> index bcd955339058..b3bfb987e493 100644
> --- i/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> +++ w/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
> @@ -1638,10 +1638,10 @@ int mlx4_en_start_port(struct net_device *dev)
>
>   	/* Configure tx cq's and rings */
>   	for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
> -		u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1;
The bug lies in this line.
Number of rings per UP in case of TX_XDP should be 
priv->tx_ring_num[TX_XDP ], not 1.
Please try the following fix.
I can prepare and send it once the window opens again.

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c 
b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index bcd955339058..edbe200ac2fa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1638,7 +1638,8 @@ int mlx4_en_start_port(struct net_device *dev)

         /* Configure tx cq's and rings */
         for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
-               u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up 
: 1;
+               u8 num_tx_rings_p_up = t == TX ?
+                       priv->num_tx_rings_p_up : priv->tx_ring_num[t];

                 for (i = 0; i < priv->tx_ring_num[t]; i++) {
                         /* Configure cq */

> -
>   		for (i = 0; i < priv->tx_ring_num[t]; i++) {
>   			/* Configure cq */
> +			int user_prio;
> +
>   			cq = priv->tx_cq[t][i];
>   			err = mlx4_en_activate_cq(priv, cq, i);
>   			if (err) {
> @@ -1660,9 +1660,14 @@ int mlx4_en_start_port(struct net_device *dev)
>
>   			/* Configure ring */
>   			tx_ring = priv->tx_ring[t][i];
> +			if (t != TX_XDP)
> +				user_prio = i / priv->num_tx_rings_p_up;
> +			else
> +				user_prio = i & 0x07;
> +
>   			err = mlx4_en_activate_tx_ring(priv, tx_ring,
>   						       cq->mcq.cqn,
> -						       i / num_tx_rings_p_up);
> +						       user_prio);
>   			if (err) {
>   				en_err(priv, "Failed allocating Tx ring\n");
>   				mlx4_en_deactivate_cq(priv, cq);
Regards,
Tariq Toukan.

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox