Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
* [PATCH RFC v2 1/3] rdma_cm: add rdma_reject_msg() helper function
From: Steve Wise @ 2016-10-20 22:40 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ,
	linux-nvme-IAPFreCvJWM7uuMidbF8XUB+6BGkLq7r,
	sagig-NQWnxTmZq1alnMjI0IkVqw, hch-jcswGhMUV9g, axboe-b10kYP2dOMg
In-Reply-To: <cover.1477003235.git.swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>

rdma_reject_msg() returns a pointer to a string message associated with
the transport reject reason codes.

Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
---
 drivers/infiniband/core/cm.c   | 46 ++++++++++++++++++++++++++++++++++++++++++
 drivers/infiniband/core/cma.c  | 13 ++++++++++++
 drivers/infiniband/core/iwcm.c | 18 +++++++++++++++++
 include/rdma/ib_cm.h           |  6 ++++++
 include/rdma/iw_cm.h           |  6 ++++++
 include/rdma/rdma_cm.h         |  8 ++++++++
 6 files changed, 97 insertions(+)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c995255..0918c17 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -57,6 +57,52 @@ MODULE_AUTHOR("Sean Hefty");
 MODULE_DESCRIPTION("InfiniBand CM");
 MODULE_LICENSE("Dual BSD/GPL");
 
+static const char * const ib_rej_reason_strs[] = {
+	[IB_CM_REJ_NO_QP]			= "no qp",
+	[IB_CM_REJ_NO_EEC]			= "no eec",
+	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
+	[IB_CM_REJ_TIMEOUT]			= "timeout",
+	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
+	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm id",
+	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
+	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service id",
+	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
+	[IB_CM_REJ_STALE_CONN]			= "stale conn",
+	[IB_CM_REJ_RDC_NOT_EXIST]		= "rdc not exist",
+	[IB_CM_REJ_INVALID_GID]			= "invalid gid",
+	[IB_CM_REJ_INVALID_LID]			= "invalid lid",
+	[IB_CM_REJ_INVALID_SL]			= "invalid sl",
+	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
+	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
+	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet rate",
+	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt gid",
+	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt lid",
+	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt sl",
+	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic class",
+	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
+	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
+	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port cm redirect",
+	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
+	[IB_CM_REJ_INVALID_MTU]			= "invalid mtu",
+	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp resources",
+	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
+	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid rnr retry",
+	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm id",
+	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
+	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
+	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
+};
+
+const char *__attribute_const__ ib_reject_msg(int reason)
+{
+	size_t index = reason;
+
+	return (index < ARRAY_SIZE(ib_rej_reason_strs) &&
+		ib_rej_reason_strs[index]) ?
+		ib_rej_reason_strs[index] : "unrecognized reason";
+}
+EXPORT_SYMBOL(ib_reject_msg);
+
 static void cm_add_one(struct ib_device *device);
 static void cm_remove_one(struct ib_device *device, void *client_data);
 
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5f65a78..7cc7346 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -101,6 +101,19 @@ const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
 }
 EXPORT_SYMBOL(rdma_event_msg);
 
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+						int reason)
+{
+	if (rdma_ib_or_roce(id->device, id->port_num))
+		return ib_reject_msg(reason);
+
+	if (rdma_protocol_iwarp(id->device, id->port_num))
+		return iw_reject_msg(reason);
+
+	return "unrecognized reason";
+}
+EXPORT_SYMBOL(rdma_reject_msg);
+
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device, void *client_data);
 
diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c
index 357624f..588a31d 100644
--- a/drivers/infiniband/core/iwcm.c
+++ b/drivers/infiniband/core/iwcm.c
@@ -59,6 +59,24 @@ MODULE_AUTHOR("Tom Tucker");
 MODULE_DESCRIPTION("iWARP CM");
 MODULE_LICENSE("Dual BSD/GPL");
 
+static const char * const iw_rej_reason_strs[] = {
+	[ECONNRESET]			= "reset by remote host",
+	[ECONNREFUSED]			= "refused by remote application",
+	[ETIMEDOUT]			= "setup timeout",
+};
+
+const char *__attribute_const__ iw_reject_msg(int reason)
+{
+	size_t index = -reason;
+
+	/* iWARP uses negative errnos */
+	index = -index;
+	return (index < ARRAY_SIZE(iw_rej_reason_strs) &&
+		iw_rej_reason_strs[index]) ?
+		iw_rej_reason_strs[index] : "unrecognized reason";
+}
+EXPORT_SYMBOL(iw_reject_msg);
+
 static struct ibnl_client_cbs iwcm_nl_cb_table[] = {
 	[RDMA_NL_IWPM_REG_PID] = {.dump = iwpm_register_pid_cb},
 	[RDMA_NL_IWPM_ADD_MAPPING] = {.dump = iwpm_add_mapping_cb},
diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h
index 92a7d85..af193b7 100644
--- a/include/rdma/ib_cm.h
+++ b/include/rdma/ib_cm.h
@@ -603,4 +603,10 @@ struct ib_cm_sidr_rep_param {
 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
 			struct ib_cm_sidr_rep_param *param);
 
+/**
+ * ib_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ ib_reject_msg(int reason);
+
 #endif /* IB_CM_H */
diff --git a/include/rdma/iw_cm.h b/include/rdma/iw_cm.h
index 6d0065c..15b437e 100644
--- a/include/rdma/iw_cm.h
+++ b/include/rdma/iw_cm.h
@@ -253,4 +253,10 @@ int iw_cm_disconnect(struct iw_cm_id *cm_id, int abrupt);
 int iw_cm_init_qp_attr(struct iw_cm_id *cm_id, struct ib_qp_attr *qp_attr,
 		       int *qp_attr_mask);
 
+/**
+ * iw_reject_msg - return a pointer to a reject message string.
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ iw_reject_msg(int reason);
+
 #endif /* IW_CM_H */
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 81fb1d1..712a70c 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -388,4 +388,12 @@ int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
  */
 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
 
+/**
+ * rdma_reject_msg - return a pointer to a reject message string.
+ * @id: Communication identifier that received the REJECT event
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+						int reason);
+
 #endif /* RDMA_CM_H */
-- 
2.7.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* RE: [PATCH RFC] rdma_cm: add rdma_reject_msg() helper function
From: Steve Wise @ 2016-10-20 21:28 UTC (permalink / raw)
  To: 'Hefty, Sean', dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ
In-Reply-To: <1828884A29C6694DAF28B7E6B8A82373AB0A1D24-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>

> 
> > Hey Sean, I can do that.  I thought perhaps it was better to keep it
> > static in
> > cma.c, rather than having to extern them.   But I'll make that change.
> 
> You could make them static to the ib/iw cm and export reject_msg() helper
routines
> from there.

Yea ok.  So rdma_reject_msg() calls ib_reject_msg() or iw_reject_msg()...

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [PATCH RFC] rdma_cm: add rdma_reject_msg() helper function
From: Hefty, Sean @ 2016-10-20 21:25 UTC (permalink / raw)
  To: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org
In-Reply-To: <018e01d22b18$31a40d10$94ec2730$@opengridcomputing.com>

> Hey Sean, I can do that.  I thought perhaps it was better to keep it
> static in
> cma.c, rather than having to extern them.   But I'll make that change.

You could make them static to the ib/iw cm and export reject_msg() helper routines from there.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [PATCH RFC] rdma_cm: add rdma_reject_msg() helper function
From: Steve Wise @ 2016-10-20 21:23 UTC (permalink / raw)
  To: 'Hefty, Sean', dledford-H+wXaHxf7aLQT0dZR+AlfA
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ
In-Reply-To: <1828884A29C6694DAF28B7E6B8A82373AB0A1CF9-P5GAC/sN6hkd3b2yrw5b5LfspsVTdybXVpNB7YpNyf8@public.gmane.org>

> 
> 
> > +static const char * const ib_rej_reason_strs[] = {
> > +	[IB_CM_REJ_NO_QP]			= "no qp",
> > +	[IB_CM_REJ_NO_EEC]			= "no eec",
> > +	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
> > +	[IB_CM_REJ_TIMEOUT]			= "timeout",
> > +	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
> > +	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm id",
> > +	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
> > +	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service id",
> > +	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
> > +	[IB_CM_REJ_STALE_CONN]			= "stale conn",
> > +	[IB_CM_REJ_RDC_NOT_EXIST]		= "rdc not exist",
> > +	[IB_CM_REJ_INVALID_GID]			= "invalid gid",
> > +	[IB_CM_REJ_INVALID_LID]			= "invalid lid",
> > +	[IB_CM_REJ_INVALID_SL]			= "invalid sl",
> > +	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
> > +	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
> > +	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet
> > rate",
> > +	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt gid",
> > +	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt lid",
> > +	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt sl",
> > +	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic
> > class",
> > +	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
> > +	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
> > +	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port cm redirect",
> > +	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
> > +	[IB_CM_REJ_INVALID_MTU]			= "invalid mtu",
> > +	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp
> > resources",
> > +	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
> > +	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid rnr retry",
> > +	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm
> id",
> > +	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
> > +	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
> > +	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
> > +};
> 
> This would be better placed as part of the ib_cm.
> 
> > +
> > +static const char * const iw_rej_reason_strs[] = {
> > +	[ECONNRESET]			= "reset by remote host",
> > +	[ECONNREFUSED]			= "refused by remote application",
> > +	[ETIMEDOUT]			= "setup timeout",
> > +};
> 
> Same with iw_cm.

Hey Sean, I can do that.  I thought perhaps it was better to keep it static in
cma.c, rather than having to extern them.   But I'll make that change.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* RE: [PATCH RFC] rdma_cm: add rdma_reject_msg() helper function
From: Hefty, Sean @ 2016-10-20 21:21 UTC (permalink / raw)
  To: Steve Wise, dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org
In-Reply-To: <20161020211652.35902E0C53-/5N3P9jjx0xzbRFIqnYvSA@public.gmane.org>

>  drivers/infiniband/core/cma.c | 64
> +++++++++++++++++++++++++++++++++++++++++++
>  include/rdma/rdma_cm.h        |  8 ++++++
>  2 files changed, 72 insertions(+)


> +static const char * const ib_rej_reason_strs[] = {
> +	[IB_CM_REJ_NO_QP]			= "no qp",
> +	[IB_CM_REJ_NO_EEC]			= "no eec",
> +	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
> +	[IB_CM_REJ_TIMEOUT]			= "timeout",
> +	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
> +	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm id",
> +	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
> +	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service id",
> +	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
> +	[IB_CM_REJ_STALE_CONN]			= "stale conn",
> +	[IB_CM_REJ_RDC_NOT_EXIST]		= "rdc not exist",
> +	[IB_CM_REJ_INVALID_GID]			= "invalid gid",
> +	[IB_CM_REJ_INVALID_LID]			= "invalid lid",
> +	[IB_CM_REJ_INVALID_SL]			= "invalid sl",
> +	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
> +	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
> +	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet
> rate",
> +	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt gid",
> +	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt lid",
> +	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt sl",
> +	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic
> class",
> +	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
> +	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
> +	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port cm redirect",
> +	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
> +	[IB_CM_REJ_INVALID_MTU]			= "invalid mtu",
> +	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp
> resources",
> +	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
> +	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid rnr retry",
> +	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm id",
> +	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
> +	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
> +	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
> +};

This would be better placed as part of the ib_cm.

> +
> +static const char * const iw_rej_reason_strs[] = {
> +	[ECONNRESET]			= "reset by remote host",
> +	[ECONNREFUSED]			= "refused by remote application",
> +	[ETIMEDOUT]			= "setup timeout",
> +};

Same with iw_cm.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH RFC] rdma_cm: add rdma_reject_msg() helper function
From: Steve Wise @ 2016-10-20 21:12 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	sean.hefty-ral2JQCrhuEAvxtiuMwx3w
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	bart.vanassche-XdAiOPVOjttBDgjK7y7TUQ

rdma_reject_msg() returns a pointer to a string message associated with
the transport reject reason codes.

Signed-off-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>

---

Hey Bart, if folks like this, then perhaps you can make use of it in your
nvme series.

Steve.

---
 drivers/infiniband/core/cma.c | 64 +++++++++++++++++++++++++++++++++++++++++++
 include/rdma/rdma_cm.h        |  8 ++++++
 2 files changed, 72 insertions(+)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 5f65a78..fd11821 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -101,6 +101,70 @@ const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event)
 }
 EXPORT_SYMBOL(rdma_event_msg);
 
+static const char * const ib_rej_reason_strs[] = {
+	[IB_CM_REJ_NO_QP]			= "no qp",
+	[IB_CM_REJ_NO_EEC]			= "no eec",
+	[IB_CM_REJ_NO_RESOURCES]		= "no resources",
+	[IB_CM_REJ_TIMEOUT]			= "timeout",
+	[IB_CM_REJ_UNSUPPORTED]			= "unsupported",
+	[IB_CM_REJ_INVALID_COMM_ID]		= "invalid comm id",
+	[IB_CM_REJ_INVALID_COMM_INSTANCE]	= "invalid comm instance",
+	[IB_CM_REJ_INVALID_SERVICE_ID]		= "invalid service id",
+	[IB_CM_REJ_INVALID_TRANSPORT_TYPE]	= "invalid transport type",
+	[IB_CM_REJ_STALE_CONN]			= "stale conn",
+	[IB_CM_REJ_RDC_NOT_EXIST]		= "rdc not exist",
+	[IB_CM_REJ_INVALID_GID]			= "invalid gid",
+	[IB_CM_REJ_INVALID_LID]			= "invalid lid",
+	[IB_CM_REJ_INVALID_SL]			= "invalid sl",
+	[IB_CM_REJ_INVALID_TRAFFIC_CLASS]	= "invalid traffic class",
+	[IB_CM_REJ_INVALID_HOP_LIMIT]		= "invalid hop limit",
+	[IB_CM_REJ_INVALID_PACKET_RATE]		= "invalid packet rate",
+	[IB_CM_REJ_INVALID_ALT_GID]		= "invalid alt gid",
+	[IB_CM_REJ_INVALID_ALT_LID]		= "invalid alt lid",
+	[IB_CM_REJ_INVALID_ALT_SL]		= "invalid alt sl",
+	[IB_CM_REJ_INVALID_ALT_TRAFFIC_CLASS]	= "invalid alt traffic class",
+	[IB_CM_REJ_INVALID_ALT_HOP_LIMIT]	= "invalid alt hop limit",
+	[IB_CM_REJ_INVALID_ALT_PACKET_RATE]	= "invalid alt packet rate",
+	[IB_CM_REJ_PORT_CM_REDIRECT]		= "port cm redirect",
+	[IB_CM_REJ_PORT_REDIRECT]		= "port redirect",
+	[IB_CM_REJ_INVALID_MTU]			= "invalid mtu",
+	[IB_CM_REJ_INSUFFICIENT_RESP_RESOURCES]	= "insufficient resp resources",
+	[IB_CM_REJ_CONSUMER_DEFINED]		= "consumer defined",
+	[IB_CM_REJ_INVALID_RNR_RETRY]		= "invalid rnr retry",
+	[IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID]	= "duplicate local comm id",
+	[IB_CM_REJ_INVALID_CLASS_VERSION]	= "invalid class version",
+	[IB_CM_REJ_INVALID_FLOW_LABEL]		= "invalid flow label",
+	[IB_CM_REJ_INVALID_ALT_FLOW_LABEL]	= "invalid alt flow label",
+};
+
+static const char * const iw_rej_reason_strs[] = {
+	[ECONNRESET]			= "reset by remote host",
+	[ECONNREFUSED]			= "refused by remote application",
+	[ETIMEDOUT]			= "setup timeout",
+};
+
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+						int reason)
+{
+	size_t index = reason;
+
+	if (rdma_ib_or_roce(id->device, id->port_num))
+		return (index < ARRAY_SIZE(ib_rej_reason_strs) &&
+			ib_rej_reason_strs[index]) ?
+			ib_rej_reason_strs[index] : "unrecognized reason";
+
+	if (rdma_protocol_iwarp(id->device, id->port_num)) {
+
+		/* iWARP uses negative errnos */
+		index = -index;
+		return (index < ARRAY_SIZE(iw_rej_reason_strs) &&
+			iw_rej_reason_strs[index]) ?
+			iw_rej_reason_strs[index] : "unrecognized reason";
+	}
+	return "unrecognized reason";
+}
+EXPORT_SYMBOL(rdma_reject_msg);
+
 static void cma_add_one(struct ib_device *device);
 static void cma_remove_one(struct ib_device *device, void *client_data);
 
diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h
index 81fb1d1..712a70c 100644
--- a/include/rdma/rdma_cm.h
+++ b/include/rdma/rdma_cm.h
@@ -388,4 +388,12 @@ int rdma_set_afonly(struct rdma_cm_id *id, int afonly);
  */
 __be64 rdma_get_service_id(struct rdma_cm_id *id, struct sockaddr *addr);
 
+/**
+ * rdma_reject_msg - return a pointer to a reject message string.
+ * @id: Communication identifier that received the REJECT event
+ * @reason: Value returned in the REJECT event status field.
+ */
+const char *__attribute_const__ rdma_reject_msg(struct rdma_cm_id *id,
+						int reason);
+
 #endif /* RDMA_CM_H */
-- 
2.7.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH 00/10] mm: adjust get_user_pages* functions to explicitly pass FOLL_* flags
From: Michal Hocko @ 2016-10-20 19:26 UTC (permalink / raw)
  To: Dave Hansen
  Cc: Lorenzo Stoakes, linux-mm, Linus Torvalds, Jan Kara, Hugh Dickins,
	Rik van Riel, Mel Gorman, Andrew Morton, adi-buildroot-devel,
	ceph-devel, dri-devel, intel-gfx, kvm, linux-alpha,
	linux-arm-kernel, linux-cris-kernel, linux-fbdev, linux-fsdevel,
	linux-ia64, linux-kernel, linux-media, linux-mips, linux-rdma,
	linux-s390, linux-samsung-soc
In-Reply-To: <5807AC2B.4090208@linux.intel.com>

On Wed 19-10-16 10:23:55, Dave Hansen wrote:
> On 10/19/2016 10:01 AM, Michal Hocko wrote:
> > The question I had earlier was whether this has to be an explicit FOLL
> > flag used by g-u-p users or we can just use it internally when mm !=
> > current->mm
> 
> The reason I chose not to do that was that deferred work gets run under
> a basically random 'current'.  If we just use 'mm != current->mm', then
> the deferred work will sometimes have pkeys enforced and sometimes not,
> basically randomly.

OK, I see (async_pf_execute and ksm ). It makes more sense to me. Thanks
for the clarification.

-- 
Michal Hocko
SUSE Labs

^ permalink raw reply

* [PATCH net-next v2 7/9] net: use core MTU range checking in misc drivers
From: Jarod Wilson @ 2016-10-20 17:55 UTC (permalink / raw)
  To: linux-kernel
  Cc: Jarod Wilson, netdev, linux-rdma, Stefan Richter, Faisal Latif,
	Cliff Whickman, Robin Holt, Jes Sorensen, Marek Lindner,
	Simon Wunderlich, Antonio Quartulli, Sathya Prakash, Chaitra P B,
	Suganath Prabu Subramani, MPT-FusionLinux.pdl, Sebastian Reichel,
	Felipe Balbi, Arvid Brodin, Remi Denis-Courmont
In-Reply-To: <20161020175524.6184-1-jarod@redhat.com>

firewire-net:
- set min/max_mtu
- remove fwnet_change_mtu

nes:
- set max_mtu
- clean up nes_netdev_change_mtu

xpnet:
- set min/max_mtu
- remove xpnet_dev_change_mtu

hippi:
- set min/max_mtu
- remove hippi_change_mtu

batman-adv:
- set max_mtu
- remove batadv_interface_change_mtu
- initialization is a little async, not 100% certain that max_mtu is set
  in the optimal place, don't have hardware to test with

rionet:
- set min/max_mtu
- remove rionet_change_mtu

slip:
- set min/max_mtu
- streamline sl_change_mtu

um/net_kern:
- remove pointless ndo_change_mtu

hsi/clients/ssi_protocol:
- use core MTU range checking
- remove now redundant ssip_pn_set_mtu

ipoib:
- set a default max MTU value
- Note: ipoib's actual max MTU can vary, depending on if the device is in
  connected mode or not, so we'll just set the max_mtu value to the max
  possible, and let the ndo_change_mtu function continue to validate any new
  MTU change requests with checks for CM or not. Note that ipoib has no
  min_mtu set, and thus, the network core's mtu > 0 check is the only lower
  bounds here.

mptlan:
- use net core MTU range checking
- remove now redundant mpt_lan_change_mtu

fddi:
- min_mtu = 21, max_mtu = 4470
- remove now redundant fddi_change_mtu (including export)

fjes:
- min_mtu = 8192, max_mtu = 65536
- The max_mtu value is actually one over IP_MAX_MTU here, but the idea is to
  get past the core net MTU range checks so fjes_change_mtu can validate a
  new MTU against what it supports (see fjes_support_mtu in fjes_hw.c)

hsr:
- min_mtu = 0 (calls ether_setup, max_mtu is 1500)

f_phonet:
- min_mtu = 6, max_mtu = 65541

u_ether:
- min_mtu = 14, max_mtu = 15412

phonet/pep-gprs:
- min_mtu = 576, max_mtu = 65530
- remove redundant gprs_set_mtu

CC: netdev@vger.kernel.org
CC: linux-rdma@vger.kernel.org
CC: Stefan Richter <stefanr@s5r6.in-berlin.de>
CC: Faisal Latif <faisal.latif@intel.com>
CC: linux-rdma@vger.kernel.org
CC: Cliff Whickman <cpw@sgi.com>
CC: Robin Holt <robinmholt@gmail.com>
CC: Jes Sorensen <jes@trained-monkey.org>
CC: Marek Lindner <mareklindner@neomailbox.ch>
CC: Simon Wunderlich <sw@simonwunderlich.de>
CC: Antonio Quartulli <a@unstable.cc>
CC: Sathya Prakash <sathya.prakash@broadcom.com>
CC: Chaitra P B <chaitra.basappa@broadcom.com>
CC: Suganath Prabu Subramani <suganath-prabu.subramani@broadcom.com>
CC: MPT-FusionLinux.pdl@broadcom.com
CC: Sebastian Reichel <sre@kernel.org>
CC: Felipe Balbi <balbi@kernel.org>
CC: Arvid Brodin <arvid.brodin@alten.se>
CC: Remi Denis-Courmont <courmisch@gmail.com>
Signed-off-by: Jarod Wilson <jarod@redhat.com>
---
 arch/um/drivers/net_kern.c                |  8 --------
 drivers/firewire/net.c                    | 18 ++++--------------
 drivers/hsi/clients/ssi_protocol.c        | 14 ++++----------
 drivers/infiniband/hw/nes/nes.c           |  1 -
 drivers/infiniband/hw/nes/nes.h           |  4 ++--
 drivers/infiniband/hw/nes/nes_nic.c       | 10 +++-------
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  1 +
 drivers/message/fusion/mptlan.c           | 15 ++++-----------
 drivers/misc/sgi-xp/xpnet.c               | 21 ++++-----------------
 drivers/net/fddi/skfp/skfddi.c            |  1 -
 drivers/net/fjes/fjes_main.c              |  2 ++
 drivers/net/hippi/rrunner.c               |  1 -
 drivers/net/rionet.c                      | 15 +++------------
 drivers/net/slip/slip.c                   | 11 +++++------
 drivers/usb/gadget/function/f_phonet.c    | 11 ++---------
 drivers/usb/gadget/function/u_ether.c     | 14 ++++----------
 include/linux/fddidevice.h                |  1 -
 include/linux/hippidevice.h               |  1 -
 net/802/fddi.c                            | 11 ++---------
 net/802/hippi.c                           | 14 ++------------
 net/batman-adv/soft-interface.c           | 13 +------------
 net/hsr/hsr_device.c                      |  1 +
 net/phonet/pep-gprs.c                     | 12 ++----------
 23 files changed, 46 insertions(+), 154 deletions(-)

diff --git a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
index 2cd5b68..1669240 100644
--- a/arch/um/drivers/net_kern.c
+++ b/arch/um/drivers/net_kern.c
@@ -256,13 +256,6 @@ static void uml_net_tx_timeout(struct net_device *dev)
 	netif_wake_queue(dev);
 }
 
-static int uml_net_change_mtu(struct net_device *dev, int new_mtu)
-{
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 #ifdef CONFIG_NET_POLL_CONTROLLER
 static void uml_net_poll_controller(struct net_device *dev)
 {
@@ -374,7 +367,6 @@ static const struct net_device_ops uml_netdev_ops = {
 	.ndo_set_rx_mode	= uml_net_set_multicast_list,
 	.ndo_tx_timeout 	= uml_net_tx_timeout,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu 	= uml_net_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = uml_net_poll_controller,
diff --git a/drivers/firewire/net.c b/drivers/firewire/net.c
index 309311b..8430222 100644
--- a/drivers/firewire/net.c
+++ b/drivers/firewire/net.c
@@ -1349,15 +1349,6 @@ static netdev_tx_t fwnet_tx(struct sk_buff *skb, struct net_device *net)
 	return NETDEV_TX_OK;
 }
 
-static int fwnet_change_mtu(struct net_device *net, int new_mtu)
-{
-	if (new_mtu < 68)
-		return -EINVAL;
-
-	net->mtu = new_mtu;
-	return 0;
-}
-
 static const struct ethtool_ops fwnet_ethtool_ops = {
 	.get_link	= ethtool_op_get_link,
 };
@@ -1366,7 +1357,6 @@ static const struct net_device_ops fwnet_netdev_ops = {
 	.ndo_open       = fwnet_open,
 	.ndo_stop	= fwnet_stop,
 	.ndo_start_xmit = fwnet_tx,
-	.ndo_change_mtu = fwnet_change_mtu,
 };
 
 static void fwnet_init_dev(struct net_device *net)
@@ -1435,7 +1425,6 @@ static int fwnet_probe(struct fw_unit *unit,
 	struct net_device *net;
 	bool allocated_netdev = false;
 	struct fwnet_device *dev;
-	unsigned max_mtu;
 	int ret;
 	union fwnet_hwaddr *ha;
 
@@ -1478,9 +1467,10 @@ static int fwnet_probe(struct fw_unit *unit,
 	 * Use the RFC 2734 default 1500 octets or the maximum payload
 	 * as initial MTU
 	 */
-	max_mtu = (1 << (card->max_receive + 1))
-		  - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE;
-	net->mtu = min(1500U, max_mtu);
+	net->max_mtu = (1 << (card->max_receive + 1))
+		       - sizeof(struct rfc2734_header) - IEEE1394_GASP_HDR_SIZE;
+	net->mtu = min(1500U, net->max_mtu);
+	net->min_mtu = ETH_MIN_MTU;
 
 	/* Set our hardware address while we're at it */
 	ha = (union fwnet_hwaddr *)net->dev_addr;
diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index 6031cd1..7ef8196 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -960,15 +960,6 @@ static int ssip_pn_stop(struct net_device *dev)
 	return 0;
 }
 
-static int ssip_pn_set_mtu(struct net_device *dev, int new_mtu)
-{
-	if (new_mtu > SSIP_MAX_MTU || new_mtu < PHONET_MIN_MTU)
-		return -EINVAL;
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 static void ssip_xmit_work(struct work_struct *work)
 {
 	struct ssi_protocol *ssi =
@@ -1060,7 +1051,6 @@ static const struct net_device_ops ssip_pn_ops = {
 	.ndo_open	= ssip_pn_open,
 	.ndo_stop	= ssip_pn_stop,
 	.ndo_start_xmit	= ssip_pn_xmit,
-	.ndo_change_mtu	= ssip_pn_set_mtu,
 };
 
 static void ssip_pn_setup(struct net_device *dev)
@@ -1136,6 +1126,10 @@ static int ssi_protocol_probe(struct device *dev)
 		goto out1;
 	}
 
+	/* MTU range: 6 - 65535 */
+	ssi->netdev->min_mtu = PHONET_MIN_MTU;
+	ssi->netdev->max_mtu = SSIP_MAX_MTU;
+
 	SET_NETDEV_DEV(ssi->netdev, dev);
 	netif_carrier_off(ssi->netdev);
 	err = register_netdev(ssi->netdev);
diff --git a/drivers/infiniband/hw/nes/nes.c b/drivers/infiniband/hw/nes/nes.c
index 35cbb17..2baa45a 100644
--- a/drivers/infiniband/hw/nes/nes.c
+++ b/drivers/infiniband/hw/nes/nes.c
@@ -65,7 +65,6 @@ MODULE_DESCRIPTION("NetEffect RNIC Low-level iWARP Driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRV_VERSION);
 
-int max_mtu = 9000;
 int interrupt_mod_interval = 0;
 
 /* Interoperability */
diff --git a/drivers/infiniband/hw/nes/nes.h b/drivers/infiniband/hw/nes/nes.h
index e7430c9..85acd08 100644
--- a/drivers/infiniband/hw/nes/nes.h
+++ b/drivers/infiniband/hw/nes/nes.h
@@ -83,6 +83,8 @@
 #define NES_FIRST_QPN           64
 #define NES_SW_CONTEXT_ALIGN    1024
 
+#define NES_MAX_MTU		9000
+
 #define NES_NIC_MAX_NICS        16
 #define NES_MAX_ARP_TABLE_SIZE  4096
 
@@ -169,8 +171,6 @@ do { \
 #include "nes_cm.h"
 #include "nes_mgt.h"
 
-extern int max_mtu;
-#define max_frame_len (max_mtu+ETH_HLEN)
 extern int interrupt_mod_interval;
 extern int nes_if_count;
 extern int mpa_version;
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 2b27d13..7f8597d 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -981,20 +981,16 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct nes_vnic	*nesvnic = netdev_priv(netdev);
 	struct nes_device *nesdev = nesvnic->nesdev;
-	int ret = 0;
 	u8 jumbomode = 0;
 	u32 nic_active;
 	u32 nic_active_bit;
 	u32 uc_all_active;
 	u32 mc_all_active;
 
-	if ((new_mtu < ETH_ZLEN) || (new_mtu > max_mtu))
-		return -EINVAL;
-
 	netdev->mtu = new_mtu;
 	nesvnic->max_frame_size	= new_mtu + VLAN_ETH_HLEN;
 
-	if (netdev->mtu	> 1500)	{
+	if (netdev->mtu	> ETH_DATA_LEN)	{
 		jumbomode=1;
 	}
 	nes_nic_init_timer_defaults(nesdev, jumbomode);
@@ -1020,7 +1016,7 @@ static int nes_netdev_change_mtu(struct net_device *netdev, int new_mtu)
 		nes_write_indexed(nesdev, NES_IDX_NIC_UNICAST_ALL, nic_active);
 	}
 
-	return ret;
+	return 0;
 }
 
 
@@ -1658,7 +1654,7 @@ struct net_device *nes_netdev_init(struct nes_device *nesdev,
 
 	netdev->watchdog_timeo = NES_TX_TIMEOUT;
 	netdev->irq = nesdev->pcidev->irq;
-	netdev->mtu = ETH_DATA_LEN;
+	netdev->max_mtu = NES_MAX_MTU;
 	netdev->hard_header_len = ETH_HLEN;
 	netdev->addr_len = ETH_ALEN;
 	netdev->type = ARPHRD_ETHER;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index cc05921..ae5d7cd 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -2017,6 +2017,7 @@ static struct net_device *ipoib_add_port(const char *format,
 	/* MTU will be reset when mcast join happens */
 	priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
 	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
+	priv->dev->max_mtu = IPOIB_CM_MTU;
 
 	priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
 
diff --git a/drivers/message/fusion/mptlan.c b/drivers/message/fusion/mptlan.c
index 6955c9e..55dd71b 100644
--- a/drivers/message/fusion/mptlan.c
+++ b/drivers/message/fusion/mptlan.c
@@ -549,16 +549,6 @@ mpt_lan_close(struct net_device *dev)
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
-static int
-mpt_lan_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < MPT_LAN_MIN_MTU) || (new_mtu > MPT_LAN_MAX_MTU))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
-/*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /* Tx timeout handler. */
 static void
 mpt_lan_tx_timeout(struct net_device *dev)
@@ -1304,7 +1294,6 @@ static const struct net_device_ops mpt_netdev_ops = {
 	.ndo_open       = mpt_lan_open,
 	.ndo_stop       = mpt_lan_close,
 	.ndo_start_xmit = mpt_lan_sdu_send,
-	.ndo_change_mtu = mpt_lan_change_mtu,
 	.ndo_tx_timeout = mpt_lan_tx_timeout,
 };
 
@@ -1375,6 +1364,10 @@ mpt_register_lan_device (MPT_ADAPTER *mpt_dev, int pnum)
 	dev->netdev_ops = &mpt_netdev_ops;
 	dev->watchdog_timeo = MPT_LAN_TX_TIMEOUT;
 
+	/* MTU range: 96 - 65280 */
+	dev->min_mtu = MPT_LAN_MIN_MTU;
+	dev->max_mtu = MPT_LAN_MAX_MTU;
+
 	dlprintk((KERN_INFO MYNAM ": Finished registering dev "
 		"and setting initial values\n"));
 
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 557f978..0c26eaf 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -118,6 +118,8 @@ static DEFINE_SPINLOCK(xpnet_broadcast_lock);
  * now, the default is 64KB.
  */
 #define XPNET_MAX_MTU (0x800000UL - L1_CACHE_BYTES)
+/* 68 comes from min TCP+IP+MAC header */
+#define XPNET_MIN_MTU 68
 /* 32KB has been determined to be the ideal */
 #define XPNET_DEF_MTU (0x8000UL)
 
@@ -330,22 +332,6 @@ xpnet_dev_stop(struct net_device *dev)
 	return 0;
 }
 
-static int
-xpnet_dev_change_mtu(struct net_device *dev, int new_mtu)
-{
-	/* 68 comes from min TCP+IP+MAC header */
-	if ((new_mtu < 68) || (new_mtu > XPNET_MAX_MTU)) {
-		dev_err(xpnet, "ifconfig %s mtu %d failed; value must be "
-			"between 68 and %ld\n", dev->name, new_mtu,
-			XPNET_MAX_MTU);
-		return -EINVAL;
-	}
-
-	dev->mtu = new_mtu;
-	dev_dbg(xpnet, "ifconfig %s mtu set to %d\n", dev->name, new_mtu);
-	return 0;
-}
-
 /*
  * Notification that the other end has received the message and
  * DMA'd the skb information.  At this point, they are done with
@@ -519,7 +505,6 @@ static const struct net_device_ops xpnet_netdev_ops = {
 	.ndo_open		= xpnet_dev_open,
 	.ndo_stop		= xpnet_dev_stop,
 	.ndo_start_xmit		= xpnet_dev_hard_start_xmit,
-	.ndo_change_mtu		= xpnet_dev_change_mtu,
 	.ndo_tx_timeout		= xpnet_dev_tx_timeout,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -555,6 +540,8 @@ xpnet_init(void)
 
 	xpnet_device->netdev_ops = &xpnet_netdev_ops;
 	xpnet_device->mtu = XPNET_DEF_MTU;
+	xpnet_device->min_mtu = XPNET_MIN_MTU;
+	xpnet_device->max_mtu = XPNET_MAX_MTU;
 
 	/*
 	 * Multicast assumes the LSB of the first octet is set for multicast
diff --git a/drivers/net/fddi/skfp/skfddi.c b/drivers/net/fddi/skfp/skfddi.c
index 51acc6d..3a63918 100644
--- a/drivers/net/fddi/skfp/skfddi.c
+++ b/drivers/net/fddi/skfp/skfddi.c
@@ -166,7 +166,6 @@ static const struct net_device_ops skfp_netdev_ops = {
 	.ndo_stop		= skfp_close,
 	.ndo_start_xmit		= skfp_send_pkt,
 	.ndo_get_stats		= skfp_ctl_get_stats,
-	.ndo_change_mtu		= fddi_change_mtu,
 	.ndo_set_rx_mode	= skfp_ctl_set_multicast_list,
 	.ndo_set_mac_address	= skfp_ctl_set_mac_address,
 	.ndo_do_ioctl		= skfp_ioctl,
diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c
index f36eb4a..b77e4ecf 100644
--- a/drivers/net/fjes/fjes_main.c
+++ b/drivers/net/fjes/fjes_main.c
@@ -1316,6 +1316,8 @@ static void fjes_netdev_setup(struct net_device *netdev)
 	netdev->netdev_ops = &fjes_netdev_ops;
 	fjes_set_ethtool_ops(netdev);
 	netdev->mtu = fjes_support_mtu[3];
+	netdev->min_mtu = fjes_support_mtu[0];
+	netdev->max_mtu = fjes_support_mtu[3];
 	netdev->flags |= IFF_BROADCAST;
 	netdev->features |= NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_FILTER;
 }
diff --git a/drivers/net/hippi/rrunner.c b/drivers/net/hippi/rrunner.c
index 95c0b45..f5a9728 100644
--- a/drivers/net/hippi/rrunner.c
+++ b/drivers/net/hippi/rrunner.c
@@ -68,7 +68,6 @@ static const struct net_device_ops rr_netdev_ops = {
 	.ndo_stop		= rr_close,
 	.ndo_do_ioctl		= rr_ioctl,
 	.ndo_start_xmit		= rr_start_xmit,
-	.ndo_change_mtu		= hippi_change_mtu,
 	.ndo_set_mac_address	= hippi_mac_addr,
 };
 
diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index a31f461..300bb14 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -466,17 +466,6 @@ static void rionet_set_msglevel(struct net_device *ndev, u32 value)
 	rnet->msg_enable = value;
 }
 
-static int rionet_change_mtu(struct net_device *ndev, int new_mtu)
-{
-	if ((new_mtu < 68) || (new_mtu > RIONET_MAX_MTU)) {
-		printk(KERN_ERR "%s: Invalid MTU size %d\n",
-		       ndev->name, new_mtu);
-		return -EINVAL;
-	}
-	ndev->mtu = new_mtu;
-	return 0;
-}
-
 static const struct ethtool_ops rionet_ethtool_ops = {
 	.get_drvinfo = rionet_get_drvinfo,
 	.get_msglevel = rionet_get_msglevel,
@@ -488,7 +477,6 @@ static const struct net_device_ops rionet_netdev_ops = {
 	.ndo_open		= rionet_open,
 	.ndo_stop		= rionet_close,
 	.ndo_start_xmit		= rionet_start_xmit,
-	.ndo_change_mtu		= rionet_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
@@ -525,6 +513,9 @@ static int rionet_setup_netdev(struct rio_mport *mport, struct net_device *ndev)
 
 	ndev->netdev_ops = &rionet_netdev_ops;
 	ndev->mtu = RIONET_MAX_MTU;
+	/* MTU range: 68 - 4082 */
+	ndev->min_mtu = ETH_MIN_MTU;
+	ndev->max_mtu = RIONET_MAX_MTU;
 	ndev->features = NETIF_F_LLTX;
 	SET_NETDEV_DEV(ndev, &mport->dev);
 	ndev->ethtool_ops = &rionet_ethtool_ops;
diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c
index 9ed6d1c..7e933d8 100644
--- a/drivers/net/slip/slip.c
+++ b/drivers/net/slip/slip.c
@@ -561,12 +561,7 @@ static int sl_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct slip *sl = netdev_priv(dev);
 
-	if (new_mtu < 68 || new_mtu > 65534)
-		return -EINVAL;
-
-	if (new_mtu != dev->mtu)
-		return sl_realloc_bufs(sl, new_mtu);
-	return 0;
+	return sl_realloc_bufs(sl, new_mtu);
 }
 
 /* Netdevice get statistics request */
@@ -663,6 +658,10 @@ static void sl_setup(struct net_device *dev)
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 10;
 
+	/* MTU range: 68 - 65534 */
+	dev->min_mtu = 68;
+	dev->max_mtu = 65534;
+
 	/* New-style flags. */
 	dev->flags		= IFF_NOARP|IFF_POINTOPOINT|IFF_MULTICAST;
 }
diff --git a/drivers/usb/gadget/function/f_phonet.c b/drivers/usb/gadget/function/f_phonet.c
index 0473d61..b4058f0 100644
--- a/drivers/usb/gadget/function/f_phonet.c
+++ b/drivers/usb/gadget/function/f_phonet.c
@@ -261,19 +261,10 @@ static int pn_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static int pn_net_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < PHONET_MIN_MTU) || (new_mtu > PHONET_MAX_MTU))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 static const struct net_device_ops pn_netdev_ops = {
 	.ndo_open	= pn_net_open,
 	.ndo_stop	= pn_net_close,
 	.ndo_start_xmit	= pn_net_xmit,
-	.ndo_change_mtu	= pn_net_mtu,
 };
 
 static void pn_net_setup(struct net_device *dev)
@@ -282,6 +273,8 @@ static void pn_net_setup(struct net_device *dev)
 	dev->type		= ARPHRD_PHONET;
 	dev->flags		= IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu		= PHONET_DEV_MTU;
+	dev->min_mtu		= PHONET_MIN_MTU;
+	dev->max_mtu		= PHONET_MAX_MTU;
 	dev->hard_header_len	= 1;
 	dev->dev_addr[0]	= PN_MEDIA_USB;
 	dev->addr_len		= 1;
diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index 9c8c9ed..39a6df1 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -142,15 +142,6 @@ static inline int qlen(struct usb_gadget *gadget, unsigned qmult)
 
 /* NETWORK DRIVER HOOKUP (to the layer above this driver) */
 
-static int ueth_change_mtu(struct net_device *net, int new_mtu)
-{
-	if (new_mtu <= ETH_HLEN || new_mtu > GETHER_MAX_ETH_FRAME_LEN)
-		return -ERANGE;
-	net->mtu = new_mtu;
-
-	return 0;
-}
-
 static void eth_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *p)
 {
 	struct eth_dev *dev = netdev_priv(net);
@@ -736,7 +727,6 @@ static const struct net_device_ops eth_netdev_ops = {
 	.ndo_open		= eth_open,
 	.ndo_stop		= eth_stop,
 	.ndo_start_xmit		= eth_start_xmit,
-	.ndo_change_mtu		= ueth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
@@ -799,6 +789,10 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
 
 	net->ethtool_ops = &ops;
 
+	/* MTU range: 14 - 15412 */
+	net->min_mtu = ETH_HLEN;
+	net->max_mtu = GETHER_MAX_ETH_FRAME_LEN;
+
 	dev->gadget = g;
 	SET_NETDEV_DEV(net, &g->dev);
 	SET_NETDEV_DEVTYPE(net, &gadget_type);
diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h
index 9a79f01..32c22cf 100644
--- a/include/linux/fddidevice.h
+++ b/include/linux/fddidevice.h
@@ -26,7 +26,6 @@
 
 #ifdef __KERNEL__
 __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev);
-int fddi_change_mtu(struct net_device *dev, int new_mtu);
 struct net_device *alloc_fddidev(int sizeof_priv);
 #endif
 
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h
index 8ec23fb..402f99e 100644
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -32,7 +32,6 @@ struct hippi_cb {
 };
 
 __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev);
-int hippi_change_mtu(struct net_device *dev, int new_mtu);
 int hippi_mac_addr(struct net_device *dev, void *p);
 int hippi_neigh_setup_dev(struct net_device *dev, struct neigh_parms *p);
 struct net_device *alloc_hippi_dev(int sizeof_priv);
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 7d3a0af..6356623 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -141,15 +141,6 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 EXPORT_SYMBOL(fddi_type_trans);
 
-int fddi_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-EXPORT_SYMBOL(fddi_change_mtu);
-
 static const struct header_ops fddi_header_ops = {
 	.create		= fddi_header,
 };
@@ -161,6 +152,8 @@ static void fddi_setup(struct net_device *dev)
 	dev->type		= ARPHRD_FDDI;
 	dev->hard_header_len	= FDDI_K_SNAP_HLEN+3;	/* Assume 802.2 SNAP hdr len + 3 pad bytes */
 	dev->mtu		= FDDI_K_SNAP_DLEN;	/* Assume max payload of 802.2 SNAP frame */
+	dev->min_mtu		= FDDI_K_SNAP_HLEN;
+	dev->max_mtu		= FDDI_K_SNAP_DLEN;
 	dev->addr_len		= FDDI_K_ALEN;
 	dev->tx_queue_len	= 100;			/* Long queues on FDDI */
 	dev->flags		= IFF_BROADCAST | IFF_MULTICAST;
diff --git a/net/802/hippi.c b/net/802/hippi.c
index ade1a52..5e4427b 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -116,18 +116,6 @@ __be16 hippi_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 EXPORT_SYMBOL(hippi_type_trans);
 
-int hippi_change_mtu(struct net_device *dev, int new_mtu)
-{
-	/*
-	 * HIPPI's got these nice large MTUs.
-	 */
-	if ((new_mtu < 68) || (new_mtu > 65280))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-EXPORT_SYMBOL(hippi_change_mtu);
-
 /*
  * For HIPPI we will actually use the lower 4 bytes of the hardware
  * address as the I-FIELD rather than the actual hardware address.
@@ -174,6 +162,8 @@ static void hippi_setup(struct net_device *dev)
 	dev->type		= ARPHRD_HIPPI;
 	dev->hard_header_len 	= HIPPI_HLEN;
 	dev->mtu		= 65280;
+	dev->min_mtu		= 68;
+	dev->max_mtu		= 65280;
 	dev->addr_len		= HIPPI_ALEN;
 	dev->tx_queue_len	= 25 /* 5 */;
 	memset(dev->broadcast, 0xFF, HIPPI_ALEN);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 49e16b6..112679d 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -158,17 +158,6 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p)
 	return 0;
 }
 
-static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu)
-{
-	/* check ranges */
-	if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev)))
-		return -EINVAL;
-
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 /**
  * batadv_interface_set_rx_mode - set the rx mode of a device
  * @dev: registered network device to modify
@@ -920,7 +909,6 @@ static const struct net_device_ops batadv_netdev_ops = {
 	.ndo_vlan_rx_add_vid = batadv_interface_add_vid,
 	.ndo_vlan_rx_kill_vid = batadv_interface_kill_vid,
 	.ndo_set_mac_address = batadv_interface_set_mac_addr,
-	.ndo_change_mtu = batadv_interface_change_mtu,
 	.ndo_set_rx_mode = batadv_interface_set_rx_mode,
 	.ndo_start_xmit = batadv_interface_tx,
 	.ndo_validate_addr = eth_validate_addr,
@@ -987,6 +975,7 @@ struct net_device *batadv_softif_create(struct net *net, const char *name)
 	dev_net_set(soft_iface, net);
 
 	soft_iface->rtnl_link_ops = &batadv_link_ops;
+	soft_iface->max_mtu = batadv_hardif_min_mtu(soft_iface);
 
 	ret = register_netdevice(soft_iface);
 	if (ret < 0) {
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 16737cd..fc65b14 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -398,6 +398,7 @@ void hsr_dev_setup(struct net_device *dev)
 	random_ether_addr(dev->dev_addr);
 
 	ether_setup(dev);
+	dev->min_mtu = 0;
 	dev->header_ops = &hsr_header_ops;
 	dev->netdev_ops = &hsr_device_ops;
 	SET_NETDEV_DEVTYPE(dev, &hsr_type);
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index fa8237f..21c28b5 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -217,20 +217,10 @@ static netdev_tx_t gprs_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static int gprs_set_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < 576) || (new_mtu > (PHONET_MAX_MTU - 11)))
-		return -EINVAL;
-
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 static const struct net_device_ops gprs_netdev_ops = {
 	.ndo_open	= gprs_open,
 	.ndo_stop	= gprs_close,
 	.ndo_start_xmit	= gprs_xmit,
-	.ndo_change_mtu	= gprs_set_mtu,
 };
 
 static void gprs_setup(struct net_device *dev)
@@ -239,6 +229,8 @@ static void gprs_setup(struct net_device *dev)
 	dev->type		= ARPHRD_PHONET_PIPE;
 	dev->flags		= IFF_POINTOPOINT | IFF_NOARP;
 	dev->mtu		= GPRS_DEFAULT_MTU;
+	dev->min_mtu		= 576;
+	dev->max_mtu		= (PHONET_MAX_MTU - 11);
 	dev->hard_header_len	= 0;
 	dev->addr_len		= 0;
 	dev->tx_queue_len	= 10;
-- 
2.10.0

^ permalink raw reply related

* Re: [PATCH rdma-core 4/6] libqedr: main
From: Jason Gunthorpe @ 2016-10-20 17:08 UTC (permalink / raw)
  To: Ram Amrani
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
	Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA
In-Reply-To: <1476956952-17388-5-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

On Thu, Oct 20, 2016 at 12:49:10PM +0300, Ram Amrani wrote:
> +struct {
> +	unsigned int vendor;
> +	unsigned int device;
> +} hca_table[] = {

needs static const, please check all your stuff for static and const..

> +int qelr_modify_qp(struct ibv_qp *, struct ibv_qp_attr *,
> +		   int ibv_qp_attr_mask);
> +int qelr_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
> +		  struct ibv_qp_init_attr *init_attr);

It would be nice to be consistent, I prefer the argument name to be in
the prototype.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: New providers in rdma-core
From: Jason Gunthorpe @ 2016-10-20 16:35 UTC (permalink / raw)
  To: Amrani, Ram
  Cc: Adit Ranadive, Christoph Hellwig, Leon Romanovsky, Lijun Ou,
	Knut Omang, Doug Ledford, linux-rdma
In-Reply-To: <SN1PR07MB2207D6CF5ADA67FCFCAA3A2EF8D50-mikhvbZlbf8TSoR2DauN2+FPX92sqiQdvxpqHgZTriW3zl9H0oFU5g@public.gmane.org>

On Thu, Oct 20, 2016 at 09:24:30AM +0000, Amrani, Ram wrote:

> The idea looks good to me.
> But still, I'm missing something - in order for libraries and the kernel to use the 
> same headers they should reside in the same repository.
> Is that the long-term goal? And wouldn't that be mixing user/kernel spaces?

See the discussion here:

http://www.spinics.net/lists/linux-rdma/msg42067.html

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [Test fail]//Re: some test question//Re: [For help] configure crossbar build tool in CMakelist.txt
From: Jason Gunthorpe @ 2016-10-20 16:29 UTC (permalink / raw)
  To: oulijun; +Cc: linux-rdma, Linuxarm
In-Reply-To: <5808772F.6050305-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

On Thu, Oct 20, 2016 at 03:50:07PM +0800, oulijun wrote:
> 1. when set the VERBS_PROVIDER_DIR to empty, the
> sizeof(VERBS_PROVIDER_DIR) should be 0 and the branch should not be
> run

This is a mistake, sizeof('') is 1, so the if should be (> 1)' I will
fix it.

> 2. the value of so_name should be /libhisi-rdmav2.so in @1 and
> libhisi-rdmav2.so in @2.  in fact, the value of so_name is /libhisi
> and libhisi
> 
> the test print log as follows:
> 
> -rdmav2.soer, 218] so_name: /libhisi
> [load_driver, 219] so_name: -rdmav2.so
> -rdmav2.soer, 229] so_name: libhisi
> [load_driver, 230] so_name: -rdmav2.so

If you notice the -rdmav2.so has been placed at the start of the line,
this suggest you have a spurious '\r' character. This would come from
the .driver file.

Since you did not use the cmake install process you must have written
the .driver file yourself and used a DOS text editor. UNIX line
endings are required.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: systemd unit file location...
From: Jason Gunthorpe @ 2016-10-20 16:26 UTC (permalink / raw)
  To: Weiny, Ira; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <2807E5FD2F6FDA4886F6618EAC48510E24F0E61D-8k97q/ur5Z2krb+BlOpmy7fspsVTdybXVpNB7YpNyf8@public.gmane.org>

On Thu, Oct 20, 2016 at 05:04:35AM +0000, Weiny, Ira wrote:
> Not defined as:
> 
> ./build/CMakeCache.txt:CMAKE_INSTALL_SYSTEMD_SERVICEDIR:PATH=/usr/local/lib/systemd/system

It is a mistake.

> And you add the "system" in the Debian rules?
> 
> ./debian/iwpmd.install:lib/systemd/system/iwpmd.service
> ./debian/rules:                 -DCMAKE_INSTALL_SYSTEMD_SERVICEDIR:PATH=/lib/systemd/system \

because systemd requires /lib/systemd when installed and
/usr/local/lib/systemd when in 'local' mode.

It could be wrappered in some kind of "if CMAKE_INSTALL_PREFIX ==
/usr/", but at least RH wants this set from a RPM macro, so I decided
to do the same with Debian.

>From 33c80ab9f08814b7614c111aadaaa612cceb6234 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
Date: Thu, 20 Oct 2016 10:10:43 -0600
Subject: [PATCH] Fix default path for systemd unit files

Missed /system

Reported-by: Ira Weiny <ira.weiny-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
---
 CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index a23aa860e6d3..c4fe705b2234 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -55,7 +55,7 @@ set(BUILD_LIB ${CMAKE_BINARY_DIR}/lib)
 set(CONFIG_DIR "${CMAKE_INSTALL_FULL_SYSCONFDIR}/libibverbs.d")
 set(CMAKE_INSTALL_INITDDIR "${CMAKE_INSTALL_SYSCONFDIR}/init.d"
   CACHE PATH "Location for init.d files")
-set(CMAKE_INSTALL_SYSTEMD_SERVICEDIR "${CMAKE_INSTALL_PREFIX}/lib/systemd"
+set(CMAKE_INSTALL_SYSTEMD_SERVICEDIR "${CMAKE_INSTALL_PREFIX}/lib/systemd/system"
   CACHE PATH "Location for systemd service files")
 
 set(ACM_PROVIDER_DIR "${CMAKE_INSTALL_FULL_LIBDIR}/ibacm"
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* Re: [PATCH v3 0/11] Fix race conditions related to stopping block layer queues
From: Bart Van Assche @ 2016-10-20 15:35 UTC (permalink / raw)
  To: Keith Busch
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Ming Lin,
	Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <20161020145224.GA2771@localhost.localdomain>

On 10/20/2016 07:52 AM, Keith Busch wrote:
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index ccd9cc5..078530c 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -201,7 +201,7 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
>
>  void nvme_requeue_req(struct request *req)
>  {
> -	blk_mq_requeue_request(req, true);
> +	blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
>  }
>  EXPORT_SYMBOL_GPL(nvme_requeue_req);

Hello Keith,

What I had missed while I was preparing my patch series is that the NVMe 
driver, unlike the dm driver, can call blk_mq_requeue_request() on a 
stopped queue. So the above patch is needed to keep the current 
semantics of the NVMe code. I will merge this patch in my patch series.

Thanks,

Bart.

^ permalink raw reply

* [PATCH rdma-core v2 4/4] redhat/spec: build split rpm packages
From: Jarod Wilson @ 2016-10-20 15:33 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jarod Wilson
In-Reply-To: <20161020153357.27286-1-jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

We're thinking that the upgrade path for our users will be simpler and
less surprising if we take rdma-core and split out the end result into the
same packages we currently ship in our distributions, save the libibverbs
providers, which are all merged into libibverbs. End result with this
spec:

Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/rdma-core-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/rdma-core-devel-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/libibverbs-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/libibverbs-utils-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/ibacm-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/iwpmd-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/libibcm-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/libibumad-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/librdmacm-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/librdmacm-utils-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/srp_daemon-11-1.el7.x86_64.rpm
Wrote: /home/jwilson/rpmbuild/RPMS/x86_64/rdma-core-debuginfo-11-1.el7.x86_64.rpm

Signed-off-by: Jarod Wilson <jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 redhat/rdma-core.spec | 360 +++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 326 insertions(+), 34 deletions(-)

diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
index 47b7d87..6fc35b5 100644
--- a/redhat/rdma-core.spec
+++ b/redhat/rdma-core.spec
@@ -7,10 +7,11 @@ Summary: RDMA core userspace libraries and daemons
 #  providers/ipathverbs/ Dual licensed using a BSD license with an extra patent clause
 #  providers/rxe/ Incorporates code from ipathverbs and contains the patent clause
 #  providers/hfi1verbs Uses the 3 Clause BSD license
-License: (GPLv2 or BSD) and (GPLv2 or PathScale-BSD)
+License: GPLv2 or BSD
 Url: http://openfabrics.org/
 Source: rdma-core-%{version}.tgz
-BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
+# https://github.com/linux-rdma/rdma-core
+BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
 
 BuildRequires: binutils
 BuildRequires: cmake >= 2.8.11
@@ -19,20 +20,15 @@ BuildRequires: pkgconfig
 BuildRequires: pkgconfig(libnl-3.0)
 BuildRequires: pkgconfig(libnl-route-3.0)
 BuildRequires: valgrind-devel
+BuildRequires: libnl3-devel
+
+# Red Hat/Fedora previously shipped redhat/ as a stand-alone
+# package called 'rdma', which we're supplanting here.
+Provides: rdma = %{version}-%{release}
+Obsoletes: rdma < %{version}-%{release}
 
 # Since we recommend developers use Ninja, so should packagers, for consistency.
 %define CMAKE_FLAGS %{nil}
-%if 0%{?suse_version}
-# SuSE releases have it, and sometime around cmake 3.3.2-1.2 the macros learned to use it.
-BuildRequires: ninja,make
-%define __builder ninja
-# cmake_install,make_jobs is specified by opensuse
-
-# Tumbleweed's cmake RPM macro adds -Wl,--no-undefined to the module flags
-# which is totally inappropriate and breaks building 'ENABLE_EXPORTS' style
-# module libraries (eg ibacmp).
-%define CMAKE_FLAGS -DCMAKE_MODULE_LINKER_FLAGS=""
-%else
 %if 0%{?fedora} >= 23
 # Ninja was introduced in FC23
 BuildRequires: ninja-build
@@ -45,12 +41,161 @@ BuildRequires: make
 %define make_jobs make -v %{?_smp_mflags}
 %define cmake_install DESTDIR=%{buildroot} make install
 %endif
+
+%define systemd_dep systemd-units
+%if 0%{?fedora} >= 18
+%define systemd_dep systemd
 %endif
 
 %description
-Temporary packaging
+RDMA core userspace infrastructure and documentation.
+
+%package devel
+Summary: RDMA core development libraries and headers
+Provides: libibverbs-devel = %{version}-%{release}
+Obsoletes: libibverbs-devel < %{version}-%{release}
+Provides: libibcm-devel = %{version}-%{release}
+Obsoletes: libibcm-devel < %{version}-%{release}
+Provides: libibumad-devel = %{version}-%{release}
+Obsoletes: libibumad-devel < %{version}-%{release}
+Provides: librdmacm-devel = %{version}-%{release}
+Obsoletes: librdmacm-devel < %{version}-%{release}
+Provides: ibacm-devel = %{version}-%{release}
+Obsoletes: ibacm-devel < %{version}-%{release}
+
+%description devel
+RDMA core development libraries and headers.
+
+%package -n libibverbs
+Summary: A library and drivers for direct userspace use of RDMA (InfiniBand/iWARP) hardware
+Requires(post): /sbin/ldconfig
+Requires(postun): /sbin/ldconfig
+Requires: rdma-core
+Provides: libcxgb3 = %{version}-%{release}
+Obsoletes: libcxgb3 < %{version}-%{release}
+Provides: libcxgb4 = %{version}-%{release}
+Obsoletes: libcxgb4 < %{version}-%{release}
+Provides: libhfi1 = %{version}-%{release}
+Obsoletes: libhfi1 < %{version}-%{release}
+Provides: libi40iw = %{version}-%{release}
+Obsoletes: libi40iw < %{version}-%{release}
+Provides: libipathverbs = %{version}-%{release}
+Obsoletes: libipathverbs < %{version}-%{release}
+Provides: libmlx4 = %{version}-%{release}
+Obsoletes: libmlx4 < %{version}-%{release}
+Provides: libmlx5 = %{version}-%{release}
+Obsoletes: libmlx5 < %{version}-%{release}
+Provides: libmthca = %{version}-%{release}
+Obsoletes: libmthca < %{version}-%{release}
+Provides: libnes = %{version}-%{release}
+Obsoletes: libnes < %{version}-%{release}
+Provides: libocrdma = %{version}-%{release}
+Obsoletes: libocrdma < %{version}-%{release}
+Provides: librxe = %{version}-%{release}
+Obsoletes: librxe < %{version}-%{release}
+
+%description -n libibverbs
+libibverbs is a library that allows userspace processes to use RDMA
+"verbs" as described in the InfiniBand Architecture Specification and
+the RDMA Protocol Verbs Specification.  This includes direct hardware
+access from userspace to InfiniBand/iWARP adapters (kernel bypass) for
+fast path operations.
+
+Device-specific plug-in ibverbs userspace drivers are included:
+
+- libcxgb3: Chelsio T3 iWARP HCA
+- libcxgb4: Chelsio T4 iWARP HCA
+- libhfi1: Intel Omni-Path HFI
+- libi40iw: Intel Ethernet Connection X722 RDMA
+- libipathverbs: QLogic InfiniPath HCA
+- libmlx4: Mellanox ConnectX-3 InfiniBand HCA
+- libmlx5: Mellanox Connect-IB/X-4+ InfiniBand HCA
+- libmthca: Mellanox InfiniBand HCA
+- libnes: NetEffect RNIC
+- libocrdma: Emulex OneConnect RDMA/RoCE Device
+- librxe: A software implementation of the RoCE protocol
+
+%package -n libibverbs-utils
+Summary: Examples for the libibverbs library
+Requires: libibverbs%{?_isa} = %{version}-%{release}
+
+%description -n libibverbs-utils
+Useful libibverbs example programs such as ibv_devinfo, which
+displays information about RDMA devices.
+
+%package -n ibacm
+Summary: InfiniBand Communication Manager Assistant
+Requires(post): %{systemd_dep}
+Requires(preun): %{systemd_dep}
+Requires(postun): %{systemd_dep}
+Requires: rdma-core
+
+%description -n ibacm
+The ibacm daemon helps reduce the load of managing path record lookups on
+large InfiniBand fabrics by providing a user space implementation of what
+is functionally similar to an ARP cache.  The use of ibacm, when properly
+configured, can reduce the SA packet load of a large IB cluster from O(n^2)
+to O(n).  The ibacm daemon is started and normally runs in the background,
+user applications need not know about this daemon as long as their app
+uses librdmacm to handle connection bring up/tear down.  The librdmacm
+library knows how to talk directly to the ibacm daemon to retrieve data.
+
+%package -n iwpmd
+Summary: iWarp Port Mapper userspace daemon
+Requires(post): %{systemd_dep}
+Requires(preun): %{systemd_dep}
+Requires(postun): %{systemd_dep}
+Requires: rdma-core
+
+%description -n iwpmd
+iwpmd provides a userspace service for iWarp drivers to claim
+tcp ports through the standard socket interface.
+
+%package -n libibcm
+Summary: Userspace InfiniBand Connection Manager
+ExcludeArch: s390 s390x
+Requires: rdma-core
+
+%description -n libibcm
+libibcm provides a userspace library that handles the majority of the low
+level work required to open an RDMA connection between two machines.
+
+%package -n libibumad
+Summary: OpenFabrics Alliance InfiniBand umad (userspace management datagram) library
+Requires: rdma-core
+
+%description -n libibumad
+libibumad provides the userspace management datagram (umad) library
+functions, which sit on top of the umad modules in the kernel. These
+are used by the IB diagnostic and management tools, including OpenSM.
+
+%package -n librdmacm
+Summary: Userspace RDMA Connection Manager
+Requires: rdma-core
+
+%description -n librdmacm
+librdmacm provides a userspace RDMA Communication Managment API.
+
+%package -n librdmacm-utils
+Summary: Examples for the librdmacm library
+Requires: librdmacm%{?_isa} = %{version}-%{release}
+
+%description -n librdmacm-utils
+Example test programs for the librdmacm library.
 
-This is a simple example without the split sub packages to get things started.
+%package -n srp_daemon
+Summary: Tools for using the InfiniBand SRP protocol devices
+Obsoletes: srptools <= 1.0.3
+Provides: srptools = %{version}-%{release}
+Obsoletes: openib-srptools <= 0.0.6
+Requires(post): %{systemd_dep}
+Requires(preun): %{systemd_dep}
+Requires(postun): %{systemd_dep}
+Requires: rdma-core
+
+%description -n srp_daemon
+In conjunction with the kernel ib_srp driver, srptools allows you to
+discover and use SCSI devices via the SCSI RDMA Protocol over InfiniBand.
 
 %prep
 %setup
@@ -92,6 +237,43 @@ This is a simple example without the split sub packages to get things started.
 %install
 %cmake_install
 
+mkdir -p %{buildroot}/%{_sysconfdir}/rdma
+
+# Red Hat specific glue
+%global dracutlibdir %{_prefix}/lib/dracut
+%global sysmodprobedir %{_prefix}/lib/modprobe.d
+mkdir -p %{buildroot}/%{_sysconfdir}/sysconfig/network-scripts
+mkdir -p %{buildroot}%{_sysconfdir}/udev/rules.d
+mkdir -p %{buildroot}%{_libexecdir}
+mkdir -p %{buildroot}%{_udevrulesdir}
+mkdir -p %{buildroot}%{dracutlibdir}/modules.d/05rdma
+mkdir -p %{buildroot}%{sysmodprobedir}
+install -D -m0644 redhat/rdma.conf %{buildroot}/%{_sysconfdir}/rdma/rdma.conf
+install -D -m0644 redhat/rdma.sriov-vfs %{buildroot}/%{_sysconfdir}/rdma/sriov-vfs
+install -D -m0644 redhat/rdma.mlx4.conf %{buildroot}/%{_sysconfdir}/rdma/mlx4.conf
+install -D -m0755 redhat/rdma.ifup-ib %{buildroot}/%{_sysconfdir}/sysconfig/network-scripts/ifup-ib
+install -D -m0755 redhat/rdma.ifdown-ib %{buildroot}/%{_sysconfdir}/sysconfig/network-scripts/ifdown-ib
+install -D -m0644 redhat/rdma.service %{buildroot}%{_unitdir}/rdma.service
+install -D -m0644 redhat/rdma.udev-ipoib-naming.rules %{buildroot}%{_sysconfdir}/udev/rules.d/70-persistent-ipoib.rules
+install -D -m0644 redhat/rdma.mlx4.user.modprobe %{buildroot}%{_sysconfdir}/modprobe.d/mlx4.conf
+install -D -m0755 redhat/rdma.modules-setup.sh %{buildroot}%{dracutlibdir}/modules.d/05rdma/module-setup.sh
+install -D -m0644 redhat/rdma.udev-rules %{buildroot}%{_udevrulesdir}/98-rdma.rules
+install -D -m0644 redhat/rdma.mlx4.sys.modprobe %{buildroot}%{sysmodprobedir}/libmlx4.conf
+install -D -m0644 redhat/rdma.cxgb3.sys.modprobe %{buildroot}%{sysmodprobedir}/cxgb3.conf
+install -D -m0644 redhat/rdma.cxgb4.sys.modprobe %{buildroot}%{sysmodprobedir}/cxgb4.conf
+install -D -m0755 redhat/rdma.kernel-init %{buildroot}%{_libexecdir}/rdma-init-kernel
+install -D -m0755 redhat/rdma.sriov-init %{buildroot}%{_libexecdir}/rdma-set-sriov-vf
+install -D -m0644 redhat/rdma.fixup-mtrr.awk %{buildroot}%{_libexecdir}/rdma-fixup-mtrr.awk
+install -D -m0755 redhat/rdma.mlx4-setup.sh %{buildroot}%{_libexecdir}/mlx4-setup.sh
+
+# ibacm
+%{buildroot}/%{_bindir}/ib_acme -D . -O
+install -D -m0644 ibacm_opts.cfg %{buildroot}%{_sysconfdir}/rdma/
+install -D -m0644 redhat/ibacm.service %{buildroot}%{_unitdir}/
+
+# srp_daemon
+install -D -m0644 redhat/srp_daemon.service %{buildroot}%{_unitdir}/
+
 %if 0%{?_unitdir:1}
 rm -rf %{buildroot}/%{_initrddir}/
 %else
@@ -101,25 +283,135 @@ rm -rf %{buildroot}/%{my_unitdir}/
 %post -p /sbin/ldconfig
 %postun -p /sbin/ldconfig
 
+%post -n ibacm
+%systemd_post ibacm.service
+
+%preun -n ibacm
+%systemd_preun ibacm.service
+
+%postun -n ibacm
+%systemd_postun_with_restart ibacm.service
+
+%post -n libibcm -p /sbin/ldconfig
+%postun -n libibcm -p /sbin/ldconfig
+
 %files
-%doc %{_mandir}/man*/*
-%{_bindir}/*
+%dir %{_sysconfdir}/rdma
+%doc %{_docdir}/%{name}-%{version}/README.md
+%config(noreplace) %{_sysconfdir}/rdma/*
+%config(noreplace) %{_sysconfdir}/udev/rules.d/*
+%config(noreplace) %{_sysconfdir}/modprobe.d/mlx4.conf
+%config(noreplace) %{_sysconfdir}/modprobe.d/truescale.conf
+%{_sysconfdir}/sysconfig/network-scripts/*
+%{_unitdir}/rdma.service
+%dir %{dracutlibdir}/modules.d/05rdma
+%{dracutlibdir}/modules.d/05rdma/module-setup.sh
+%{_udevrulesdir}/*
+%{sysmodprobedir}/libmlx4.conf
+%{sysmodprobedir}/cxgb3.conf
+%{sysmodprobedir}/cxgb4.conf
+%{_libexecdir}/rdma-init-kernel
+%{_libexecdir}/rdma-set-sriov-vf
+%{_libexecdir}/rdma-fixup-mtrr.awk
+%{_libexecdir}/mlx4-setup.sh
+%{_libexecdir}/truescale-serdes.cmds
+%license COPYING.*
+
+%files devel
+%doc %{_docdir}/%{name}-%{version}/MAINTAINERS
 %{_includedir}/*
-%{_libdir}/lib*.so*
-%{_libdir}/libibverbs/*
+%{_libdir}/lib*.so
+%{_libdir}/rsocket/*.so
+%{_mandir}/man3/ibv_*
+%{_mandir}/man3/rdma*
+%{_mandir}/man3/umad*
+%{_mandir}/man3/*_to_ibv_rate.*
+%{_mandir}/man7/rdma_cm.*
+%{_mandir}/man7/rsocket.*
+
+%files -n libibverbs
+%dir %{_sysconfdir}/libibverbs.d
+%dir %{_libdir}/libibverbs
+%{_libdir}/libibverbs*.so.*
+%{_libdir}/libibverbs/*.so
+%config(noreplace) %{_sysconfdir}/libibverbs.d/*.driver
+%doc %{_docdir}/%{name}-%{version}/libibverbs.md
+%doc %{_docdir}/%{name}-%{version}/rxe.md
+%{_bindir}/rxe_cfg
+%{_mandir}/man7/rxe*
+%{_mandir}/man8/rxe*
+
+%files -n libibverbs-utils
+%{_bindir}/ibv_*
+%{_mandir}/man1/ibv_*
+
+%files -n ibacm
+%config(noreplace) %{_sysconfdir}/rdma/ibacm_opts.cfg
+%{_bindir}/ib_acme
+%{_sbindir}/ibacm
+%{_mandir}/man1/ibacm.*
+%{_mandir}/man1/ib_acme.*
+%{_mandir}/man7/ibacm.*
+%{_mandir}/man7/ibacm_prov.*
+%{_unitdir}/ibacm.service
+%dir %{_libdir}/ibacm
 %{_libdir}/ibacm/*
-%{_libdir}/rsocket/*
-%{_sbindir}/*
-%{_libexecdir}/*
-%{_docdir}/%{name}-%{version}/*
-%if 0%{?_unitdir:1}
-%{_unitdir}/*
-%else
-%config %{_initrddir}/*
-%endif
-%config %{_sysconfdir}/iwpmd.conf
-%config %{_sysconfdir}/srp_daemon.conf
-%config %{_sysconfdir}/libibverbs.d/*
-%config %{_sysconfdir}/logrotate.d/srp_daemon
-%{_sysconfdir}/modprobe.d/*
-%config %{_sysconfdir}/rsyslog.d/srp_daemon.conf
+%doc %{_docdir}/%{name}-%{version}/ibacm.md
+
+%files -n iwpmd
+%{_bindir}/iwpmd
+%{_unitdir}/iwpmd.service
+%config(noreplace) %{_sysconfdir}/iwpmd.conf
+%{_mandir}/man1/iwpmd.*
+%{_mandir}/man5/iwpmd.*
+
+%files -n libibcm
+%{_libdir}/libibcm*.so.*
+%doc %{_docdir}/%{name}-%{version}/libibcm.md
+
+%files -n libibumad
+%{_libdir}/libibumad*.so.*
+
+%files -n librdmacm
+%{_libdir}/librdmacm*.so.*
+%{_libdir}/rsocket/*.so.*
+%doc %{_docdir}/%{name}-%{version}/librdmacm.md
+
+%files -n librdmacm-utils
+%{_bindir}/cmtime
+%{_bindir}/mckey
+%{_bindir}/rcopy
+%{_bindir}/rdma_client
+%{_bindir}/rdma_server
+%{_bindir}/rdma_xclient
+%{_bindir}/rdma_xserver
+%{_bindir}/riostream
+%{_bindir}/rping
+%{_bindir}/rstream
+%{_bindir}/ucmatose
+%{_bindir}/udaddy
+%{_bindir}/udpong
+%{_mandir}/man1/mckey.*
+%{_mandir}/man1/rcopy.*
+%{_mandir}/man1/rdma_client.*
+%{_mandir}/man1/rdma_server.*
+%{_mandir}/man1/rdma_xclient.*
+%{_mandir}/man1/rdma_xserver.*
+%{_mandir}/man1/riostream.*
+%{_mandir}/man1/rping.*
+%{_mandir}/man1/rstream.*
+%{_mandir}/man1/ucmatose.*
+%{_mandir}/man1/udaddy.*
+
+%files -n srp_daemon
+%config(noreplace) %{_sysconfdir}/srp_daemon.conf
+%config(noreplace) %{_sysconfdir}/logrotate.d/srp_daemon
+%config(noreplace) %{_sysconfdir}/rsyslog.d/srp_daemon.conf
+%{_unitdir}/srp_daemon.service
+%{_sbindir}/ibsrpdm
+%{_sbindir}/srp_daemon
+%{_sbindir}/srp_daemon.sh
+%{_sbindir}/run_srp_daemon
+%{_mandir}/man1/ibsrpdm.1*
+%{_mandir}/man1/srp_daemon.1*
+%doc %{_docdir}/%{name}-%{version}/ibsrpdm.md
-- 
2.10.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-core v2 3/4] redhat: copy stock spec for RH customization
From: Jarod Wilson @ 2016-10-20 15:33 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jarod Wilson
In-Reply-To: <20161020153357.27286-1-jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

We're going to need to do a bit of majorly invasive surgery, and we're not
sure everyone else is going to want to package things this way, so we'll
make a copy of our own spec here, starting from the one in the tree.

Signed-off-by: Jarod Wilson <jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 redhat/rdma-core.spec | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 125 insertions(+)
 create mode 100644 redhat/rdma-core.spec

diff --git a/redhat/rdma-core.spec b/redhat/rdma-core.spec
new file mode 100644
index 0000000..47b7d87
--- /dev/null
+++ b/redhat/rdma-core.spec
@@ -0,0 +1,125 @@
+Name: rdma-core
+Version: 11
+Release: 1%{?dist}
+Summary: RDMA core userspace libraries and daemons
+
+# Almost everything is licensed under the OFA dual GPLv2, 2 Clause BSD license
+#  providers/ipathverbs/ Dual licensed using a BSD license with an extra patent clause
+#  providers/rxe/ Incorporates code from ipathverbs and contains the patent clause
+#  providers/hfi1verbs Uses the 3 Clause BSD license
+License: (GPLv2 or BSD) and (GPLv2 or PathScale-BSD)
+Url: http://openfabrics.org/
+Source: rdma-core-%{version}.tgz
+BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root
+
+BuildRequires: binutils
+BuildRequires: cmake >= 2.8.11
+BuildRequires: gcc
+BuildRequires: pkgconfig
+BuildRequires: pkgconfig(libnl-3.0)
+BuildRequires: pkgconfig(libnl-route-3.0)
+BuildRequires: valgrind-devel
+
+# Since we recommend developers use Ninja, so should packagers, for consistency.
+%define CMAKE_FLAGS %{nil}
+%if 0%{?suse_version}
+# SuSE releases have it, and sometime around cmake 3.3.2-1.2 the macros learned to use it.
+BuildRequires: ninja,make
+%define __builder ninja
+# cmake_install,make_jobs is specified by opensuse
+
+# Tumbleweed's cmake RPM macro adds -Wl,--no-undefined to the module flags
+# which is totally inappropriate and breaks building 'ENABLE_EXPORTS' style
+# module libraries (eg ibacmp).
+%define CMAKE_FLAGS -DCMAKE_MODULE_LINKER_FLAGS=""
+%else
+%if 0%{?fedora} >= 23
+# Ninja was introduced in FC23
+BuildRequires: ninja-build
+%define CMAKE_FLAGS -GNinja
+%define make_jobs ninja -v %{?_smp_mflags}
+%define cmake_install DESTDIR=%{buildroot} ninja-build install
+%else
+# Fallback to make otherwise
+BuildRequires: make
+%define make_jobs make -v %{?_smp_mflags}
+%define cmake_install DESTDIR=%{buildroot} make install
+%endif
+%endif
+
+%description
+Temporary packaging
+
+This is a simple example without the split sub packages to get things started.
+
+%prep
+%setup
+
+%build
+
+# Detect if systemd is supported on this system
+%if 0%{?_unitdir:1}
+%define my_unitdir %{_unitdir}
+%else
+%define my_unitdir /tmp/
+%endif
+
+# New RPM defines _rundir, usually as /run
+%if 0%{?_rundir:1}
+%else
+%define _rundir /var/run
+%endif
+
+# Pass all of the rpm paths directly to GNUInstallDirs and our other defines.
+%cmake %{CMAKE_FLAGS} \
+         -DCMAKE_BUILD_TYPE=Release \
+         -DCMAKE_INSTALL_BINDIR:PATH=%{_bindir} \
+         -DCMAKE_INSTALL_SBINDIR:PATH=%{_sbindir} \
+         -DCMAKE_INSTALL_LIBDIR:PATH=%{_libdir} \
+         -DCMAKE_INSTALL_LIBEXECDIR:PATH=%{_libexecdir} \
+         -DCMAKE_INSTALL_LOCALSTATEDIR:PATH=%{_localstatedir} \
+         -DCMAKE_INSTALL_SHAREDSTATEDIR:PATH=%{_sharedstatedir} \
+         -DCMAKE_INSTALL_INCLUDEDIR:PATH=%{_includedir} \
+         -DCMAKE_INSTALL_INFODIR:PATH=%{_infodir} \
+         -DCMAKE_INSTALL_MANDIR:PATH=%{_mandir} \
+         -DCMAKE_INSTALL_SYSCONFDIR:PATH=%{_sysconfdir} \
+	 -DCMAKE_INSTALL_SYSTEMD_SERVICEDIR:PATH=%{my_unitdir} \
+	 -DCMAKE_INSTALL_INITDDIR:PATH=%{_initrddir} \
+	 -DCMAKE_INSTALL_RUNDIR:PATH=%{_rundir} \
+	 -DCMAKE_INSTALL_DOCDIR:PATH=%{_docdir}/%{name}-%{version}
+%make_jobs
+
+%install
+%cmake_install
+
+%if 0%{?_unitdir:1}
+rm -rf %{buildroot}/%{_initrddir}/
+%else
+rm -rf %{buildroot}/%{my_unitdir}/
+%endif
+
+%post -p /sbin/ldconfig
+%postun -p /sbin/ldconfig
+
+%files
+%doc %{_mandir}/man*/*
+%{_bindir}/*
+%{_includedir}/*
+%{_libdir}/lib*.so*
+%{_libdir}/libibverbs/*
+%{_libdir}/ibacm/*
+%{_libdir}/rsocket/*
+%{_sbindir}/*
+%{_libexecdir}/*
+%{_docdir}/%{name}-%{version}/*
+%if 0%{?_unitdir:1}
+%{_unitdir}/*
+%else
+%config %{_initrddir}/*
+%endif
+%config %{_sysconfdir}/iwpmd.conf
+%config %{_sysconfdir}/srp_daemon.conf
+%config %{_sysconfdir}/libibverbs.d/*
+%config %{_sysconfdir}/logrotate.d/srp_daemon
+%{_sysconfdir}/modprobe.d/*
+%config %{_sysconfdir}/rsyslog.d/srp_daemon.conf
-- 
2.10.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-core v2 2/4] redhat: add udev/systemd/etc infrastructure bits
From: Jarod Wilson @ 2016-10-20 15:33 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jarod Wilson, Doug Ledford
In-Reply-To: <20161020153357.27286-1-jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

Red Hat has been shipping an "rdma" package, as well as it's own systemd
unit files for some daemons for a while now, in both Fedora and Red Hat
Enterprise Linux. Some of these are fairly RH-specific, but might be of
use to others, so we'd like to move them into the upstream source tree.

Most of these were authored by Doug Ledford, though I'm currently the one
that maintains (most of) them in RHEL.

CC: Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Jarod Wilson <jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 redhat/ibacm.service                |  12 ++
 redhat/rdma.conf                    |  25 +++
 redhat/rdma.cxgb3.sys.modprobe      |   1 +
 redhat/rdma.cxgb4.sys.modprobe      |   1 +
 redhat/rdma.fixup-mtrr.awk          | 160 +++++++++++++++++++
 redhat/rdma.ifdown-ib               | 183 +++++++++++++++++++++
 redhat/rdma.ifup-ib                 | 308 ++++++++++++++++++++++++++++++++++++
 redhat/rdma.kernel-init             | 262 ++++++++++++++++++++++++++++++
 redhat/rdma.mlx4-setup.sh           |  91 +++++++++++
 redhat/rdma.mlx4.conf               |  27 ++++
 redhat/rdma.mlx4.sys.modprobe       |   5 +
 redhat/rdma.mlx4.user.modprobe      |  21 +++
 redhat/rdma.modules-setup.sh        |  30 ++++
 redhat/rdma.service                 |  15 ++
 redhat/rdma.sriov-init              | 137 ++++++++++++++++
 redhat/rdma.sriov-vfs               |  41 +++++
 redhat/rdma.udev-ipoib-naming.rules |  13 ++
 redhat/rdma.udev-rules              |  18 +++
 redhat/srp_daemon.service           |  17 ++
 19 files changed, 1367 insertions(+)
 create mode 100644 redhat/ibacm.service
 create mode 100644 redhat/rdma.conf
 create mode 100644 redhat/rdma.cxgb3.sys.modprobe
 create mode 100644 redhat/rdma.cxgb4.sys.modprobe
 create mode 100644 redhat/rdma.fixup-mtrr.awk
 create mode 100644 redhat/rdma.ifdown-ib
 create mode 100644 redhat/rdma.ifup-ib
 create mode 100644 redhat/rdma.kernel-init
 create mode 100644 redhat/rdma.mlx4-setup.sh
 create mode 100644 redhat/rdma.mlx4.conf
 create mode 100644 redhat/rdma.mlx4.sys.modprobe
 create mode 100644 redhat/rdma.mlx4.user.modprobe
 create mode 100644 redhat/rdma.modules-setup.sh
 create mode 100644 redhat/rdma.service
 create mode 100644 redhat/rdma.sriov-init
 create mode 100644 redhat/rdma.sriov-vfs
 create mode 100644 redhat/rdma.udev-ipoib-naming.rules
 create mode 100644 redhat/rdma.udev-rules
 create mode 100644 redhat/srp_daemon.service

diff --git a/redhat/ibacm.service b/redhat/ibacm.service
new file mode 100644
index 0000000..1cd031a
--- /dev/null
+++ b/redhat/ibacm.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=Starts the InfiniBand Address Cache Manager daemon
+Documentation=man:ibacm
+Requires=rdma.service
+After=rdma.service opensm.service
+
+[Service]
+Type=forking
+ExecStart=/usr/sbin/ibacm
+
+[Install]
+WantedBy=network.target
diff --git a/redhat/rdma.conf b/redhat/rdma.conf
new file mode 100644
index 0000000..9446564
--- /dev/null
+++ b/redhat/rdma.conf
@@ -0,0 +1,25 @@
+# Load IPoIB
+IPOIB_LOAD=yes
+# Load SRP (SCSI Remote Protocol initiator support) module
+SRP_LOAD=yes
+# Load SRPT (SCSI Remote Protocol target support) module
+SRPT_LOAD=yes
+# Load iSER (iSCSI over RDMA initiator support) module
+ISER_LOAD=yes
+# Load iSERT (iSCSI over RDMA target support) module
+ISERT_LOAD=yes
+# Load RDS (Reliable Datagram Service) network protocol
+RDS_LOAD=no
+# Load NFSoRDMA client transport module
+XPRTRDMA_LOAD=yes
+# Load NFSoRDMA server transport module
+SVCRDMA_LOAD=no
+# Load Tech Preview device driver modules
+TECH_PREVIEW_LOAD=no
+# Should we modify the system mtrr registers?  We may need to do this if you
+# get messages from the ib_ipath driver saying that it couldn't enable
+# write combining for the PIO buffs on the card.
+#
+# Note: recent kernels should do this for us, but in case they don't, we'll
+# leave this option
+FIXUP_MTRR_REGS=no
diff --git a/redhat/rdma.cxgb3.sys.modprobe b/redhat/rdma.cxgb3.sys.modprobe
new file mode 100644
index 0000000..d5925a7
--- /dev/null
+++ b/redhat/rdma.cxgb3.sys.modprobe
@@ -0,0 +1 @@
+install cxgb3 /sbin/modprobe --ignore-install cxgb3 $CMDLINE_OPTS && /sbin/modprobe iw_cxgb3
diff --git a/redhat/rdma.cxgb4.sys.modprobe b/redhat/rdma.cxgb4.sys.modprobe
new file mode 100644
index 0000000..44163ab
--- /dev/null
+++ b/redhat/rdma.cxgb4.sys.modprobe
@@ -0,0 +1 @@
+install cxgb4 /sbin/modprobe --ignore-install cxgb4 $CMDLINE_OPTS && /sbin/modprobe iw_cxgb4
diff --git a/redhat/rdma.fixup-mtrr.awk b/redhat/rdma.fixup-mtrr.awk
new file mode 100644
index 0000000..a57ca76
--- /dev/null
+++ b/redhat/rdma.fixup-mtrr.awk
@@ -0,0 +1,160 @@
+# This is a simple script that checks the contents of /proc/mtrr to see if
+# the BIOS maker for the computer took the easy way out in terms of
+# specifying memory regions when there is a hole below 4GB for PCI access
+# and the machine has 4GB or more of RAM.  When the contents of /proc/mtrr
+# show a 4GB mapping of write-back cached RAM, minus punch out hole(s) of
+# uncacheable regions (the area reserved for PCI access), then it becomes
+# impossible for the ib_ipath driver to set write_combining on its PIO
+# buffers.  To correct the problem, remap the lower memory region in various
+# chunks up to the start of the punch out hole(s), then delete the punch out
+# hole(s) entirely as they aren't needed any more.  That way, ib_ipath will
+# be able to set write_combining on its PIO memory access region.
+
+BEGIN {
+	regs = 0
+}
+
+function check_base(mem)
+{
+	printf "Base memory data: base=0x%08x, size=0x%x\n", base[mem], size[mem] > "/dev/stderr"
+	if (size[mem] < (512 * 1024 * 1024))
+		return 0
+	if (type[mem] != "write-back")
+		return 0
+	if (base[mem] >= (4 * 1024 * 1024 * 1024))
+		return 0
+	return 1
+}
+
+function check_hole(hole)
+{
+	printf "Hole data: base=0x%08x, size=0x%x\n", base[hole], size[hole] > "/dev/stderr"
+	if (size[hole] > (1 * 1024 * 1024 * 1024))
+		return 0
+	if (type[hole] != "uncachable")
+		return 0
+	if ((base[hole] + size[hole]) > (4 * 1024 * 1024 * 1024))
+		return 0
+	return 1
+}
+
+function build_entries(start, end,     new_base, new_size, tmp_base)
+{
+	# mtrr registers require alignment of blocks, so a 256MB chunk must
+	# be 256MB aligned.  Additionally, all blocks must be a power of 2
+	# in size.  So, do the largest power of two size that we can and
+	# still have start + block <= end, rinse and repeat.
+	tmp_base = start
+	do {
+		new_base = tmp_base
+		new_size = 4096
+		while (((new_base + new_size) < end) &&
+		       ((new_base % new_size) == 0))
+			new_size = lshift(new_size, 1)
+		if (((new_base + new_size) > end) ||
+		    ((new_base % new_size) != 0))
+			new_size = rshift(new_size, 1)
+		printf "base=0x%x size=0x%x type=%s\n",
+			new_base, new_size, type[mem] > "/dev/stderr"
+		printf "base=0x%x size=0x%x type=%s\n",
+			new_base, new_size, type[mem] > "/proc/mtrr"
+		fflush("")
+		tmp_base = new_base + new_size
+	} while (tmp_base < end)
+}
+
+{
+	gsub("^reg", "")
+	gsub(": base=", " ")
+	gsub(" [(].*), size=", " ")
+	gsub(": ", " ")
+	gsub(", count=.*$", "")
+	register[regs] = strtonum($1)
+	base[regs] = strtonum($2)
+	size[regs] = strtonum($3)
+	human_size[regs] = size[regs]
+	if (match($3, "MB")) { size[regs] *= 1024*1024; mult[regs] = "MB" }
+	else { size[regs] *= 1024; mult[regs] = "KB" }
+	type[regs] = $4
+	enabled[regs] = 1
+	end[regs] = base[regs] + size[regs]
+	regs++
+}
+
+END {
+	# First we need to find our base memory region.  We only care about
+	# the memory register that starts at base 0.  This is the only one
+	# that we can reliably know is our global memory region, and the
+	# only one that we can reliably check against overlaps.  It's entirely
+	# possible that any memory region not starting at 0 and having an
+	# overlap with another memory region is in fact intentional and we
+	# shouldn't touch it.
+	for(i=0; i<regs; i++)
+		if (base[i] == 0)
+			break
+	# Did we get a valid base register?
+	if (i == regs)
+		exit 1
+	mem = i
+	if (!check_base(mem))
+		exit 1
+
+	cur_hole = 0
+	for(i=0; i<regs; i++) {
+		if (i == mem)
+			continue
+		if (base[i] < end[mem] && check_hole(i))
+			holes[cur_hole++] = i
+	}
+	if (cur_hole == 0) {
+		print "Nothing to do" > "/dev/stderr"
+		exit 1
+	}
+	printf "Found %d punch-out holes\n", cur_hole > "/dev/stderr"
+
+	# We need to sort the holes according to base address
+	for(j = 0; j < cur_hole - 1; j++) {
+		for(i = cur_hole - 1; i > j; i--) {
+			if(base[holes[i]] < base[holes[i-1]]) {
+				tmp = holes[i]
+				holes[i] = holes[i-1]
+				holes[i-1] = tmp
+			}
+		}
+	}
+	# OK, the common case would be that the BIOS is mapping holes out
+	# of the 4GB memory range, and that our hole(s) are consecutive and
+	# that our holes and our memory region end at the same place.  However,
+	# things like machines with 8GB of RAM or more can foul up these
+	# common traits.
+	#
+	# So, our modus operandi is to disable all of the memory/hole regions
+	# to start, then build new base memory zones that in the end add
+	# up to the same as our original zone minus the holes.  We know that
+	# we will never have a hole listed here that belongs to a valid
+	# hole punched in a write-combining memory region because you can't
+	# overlay write-combining on top of write-back and we know our base
+	# memory region is write-back, so in order for this hole to overlap
+	# our base memory region it can't be also overlapping a write-combining
+	# region.
+	printf "disable=%d\n", register[mem] > "/dev/stderr"
+	printf "disable=%d\n", register[mem] > "/proc/mtrr"
+	fflush("")
+	enabled[mem] = 0
+	for(i=0; i < cur_hole; i++) {
+		printf "disable=%d\n", register[holes[i]] > "/dev/stderr"
+		printf "disable=%d\n", register[holes[i]] > "/proc/mtrr"
+		fflush("")
+		enabled[holes[i]] = 0
+	}
+	build_entries(base[mem], base[holes[0]])
+	for(i=0; i < cur_hole - 1; i++)
+		if (base[holes[i+1]] > end[holes[i]])
+			build_entries(end[holes[i]], base[holes[i+1]])
+	if (end[mem] > end[holes[i]])
+		build_entries(end[holes[i]], end[mem])
+	# We changed up the mtrr regs, so signal to the rdma script to
+	# reload modules that need the mtrr regs to be right.
+	exit 0
+}
+
diff --git a/redhat/rdma.ifdown-ib b/redhat/rdma.ifdown-ib
new file mode 100644
index 0000000..1cb284d
--- /dev/null
+++ b/redhat/rdma.ifdown-ib
@@ -0,0 +1,183 @@
+#!/bin/bash
+# Network Interface Configuration System
+# Copyright (c) 1996-2013 Red Hat, Inc. all rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2,
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+. /etc/init.d/functions
+
+cd /etc/sysconfig/network-scripts
+. ./network-functions
+
+[ -f ../network ] && . ../network
+
+CONFIG=${1}
+
+source_config
+
+# Allow the user to override the detection of our physical device by passing
+# it in.  No checking is done, if the user gives us a bogus dev, it's
+# their problem.
+[ -n "${PHYSDEV}" ] && REALDEVICE="$PHYSDEV"
+
+. /etc/sysconfig/network
+
+# Check to make sure the device is actually up
+check_device_down ${DEVICE} && exit 0
+
+# If we are a P_Key device, we need to munge a few things
+if [ "${PKEY}" = yes ]; then
+	[ -z "${PKEY_ID}" ] && {
+	        net_log $"InfiniBand IPoIB device: PKEY=yes requires a PKEY_ID"
+	        exit 1
+	}
+	[ -z "${PHYSDEV}" ] && {
+	        net_log $"InfiniBand IPoIB device: PKEY=yes requires a PHYSDEV"
+	        exit 1
+	}
+	# Normalize our PKEY_ID to have the high bit set
+	NEW_PKEY_ID=`printf "0x%04x" $(( 0x8000 | ${PKEY_ID} ))`
+	NEW_PKEY_NAME=`printf "%04x" ${NEW_PKEY_ID}`
+	[ "${DEVICE}" != "${PHYSDEV}.${NEW_PKEY_NAME}" ] && {
+                net_log $"Configured DEVICE name does not match what new device name would be.  This
+is most likely because once the PKEY_ID was normalized, it no longer
+resulted in the expected device naming, and so the DEVICE entry in the
+config file needs to be updated to match. This can also be caused by
+giving PKEY_ID as a hex number but without using the mandatory 0x prefix.
+	Configured DEVICE=$DEVICE
+        Configured PHYSDEV=$PHYSDEV
+        Configured PKEY_ID=$PKEY_ID
+        Calculated PKEY_ID=$NEW_PKEY_ID
+        Calculated name=${PHYSDEV}.${NEW_PKEY_NAME}"
+                exit 1
+        }
+        [ -d "/sys/class/net/${DEVICE}" ] || exit 0
+        # When we get to downing the IP address, we need REALDEVICE to
+        # point to our PKEY device
+        REALDEVICE="${DEVICE}"
+fi
+
+
+if [ "${SLAVE}" != "yes" -o -z "${MASTER}" ]; then
+if [ -n "${HWADDR}" -a -z "${MACADDR}" ]; then
+    HWADDR=$(echo $HWADDR | tail -c 24)
+    FOUNDMACADDR=$(get_hwaddr ${REALDEVICE} | tail -c 24)
+    if [ -n "${FOUNDMACADDR}" -a "${FOUNDMACADDR}" != "${HWADDR}" ]; then
+        NEWCONFIG=$(get_config_by_hwaddr ${FOUNDMACADDR})
+	if [ -n "${NEWCONFIG}" ]; then
+	   eval $(LANG=C grep -F "DEVICE=" $NEWCONFIG)
+	else
+	   net_log $"Device ${DEVICE} has MAC address ${FOUNDMACADDR}, instead of configured address ${HWADDR}. Ignoring."
+	   exit 1
+	fi
+	if [ -n "${NEWCONFIG}" -a "${NEWCONFIG##*/}" != "${CONFIG##*/}" -a "${DEVICE}" = "${REALDEVICE}" ]; then
+	   exec /sbin/ifdown ${NEWCONFIG}
+	else
+	   net_log $"Device ${DEVICE} has MAC address ${FOUNDMACADDR}, instead of configured address ${HWADDR}. Ignoring."
+	   exit 1
+	fi
+    fi
+fi
+fi
+
+if is_bonding_device ${DEVICE} ; then
+    for device in $(LANG=C grep -l "^[[:space:]]*MASTER=\"\?${DEVICE}\"\?\([[:space:]#]\|$\)" /etc/sysconfig/network-scripts/ifcfg-*) ; do
+	is_ignored_file "$device" && continue
+	/sbin/ifdown ${device##*/}
+    done
+    for arg in $BONDING_OPTS ; do
+	key=${arg%%=*};
+	[[ "${key}" != "arp_ip_target" ]] && continue
+	value=${arg##*=};
+	if [ "${value:0:1}" != "" ]; then
+            OLDIFS=$IFS;
+            IFS=',';
+            for arp_ip in $value; do
+		if grep -q $arp_ip /sys/class/net/${DEVICE}/bonding/arp_ip_target; then
+                    echo "-$arp_ip" > /sys/class/net/${DEVICE}/bonding/arp_ip_target
+		fi
+            done
+            IFS=$OLDIFS;
+	else
+	    value=${value#+};
+	    if grep -q $value /sys/class/net/${DEVICE}/bonding/arp_ip_target; then
+                echo "-$value" > /sys/class/net/${DEVICE}/bonding/arp_ip_target
+	    fi
+	fi
+    done
+fi
+
+/etc/sysconfig/network-scripts/ifdown-ipv6 ${CONFIG}
+
+retcode=0
+[ -n "$(pidof -x dhclient)" ] && {
+   for VER in "" 6 ; do
+	if [ -f "/var/run/dhclient$VER-${DEVICE}.pid" ]; then
+		dhcpid=$(cat /var/run/dhclient$VER-${DEVICE}.pid)
+		generate_lease_file_name $VER
+		if [[ "$DHCPRELEASE" = [yY1]* ]];  then
+			/sbin/dhclient -r -lf ${LEASEFILE} -pf /var/run/dhclient$VER-${DEVICE}.pid ${DEVICE} >/dev/null 2>&1
+			retcode=$?
+		else
+			kill $dhcpid >/dev/null 2>&1
+			retcode=$?
+			reason=STOP$VER interface=${DEVICE} /sbin/dhclient-script
+		fi
+		if [ -f "/var/run/dhclient$VER-${DEVICE}.pid" ]; then
+			rm -f /var/run/dhclient$VER-${DEVICE}.pid
+			kill $dhcpid >/dev/null 2>&1
+		fi
+	fi
+    done
+}
+# we can't just delete the configured address because that address
+# may have been changed in the config file since the device was
+# brought up.  Flush all addresses associated with this
+# instance instead.
+if [ -d "/sys/class/net/${REALDEVICE}" ]; then
+	if [ "${REALDEVICE}" = "${DEVICE}" ]; then
+		ip addr flush dev ${REALDEVICE} scope global 2>/dev/null
+	else
+		ip addr flush dev ${REALDEVICE} label ${DEVICE} scope global 2>/dev/null
+	fi
+
+	if [ "${SLAVE}" = "yes" -a -n "${MASTER}" ]; then
+		echo "-${DEVICE}" > /sys/class/net/${MASTER}/bonding/slaves 2>/dev/null
+	fi
+
+	if [ "${REALDEVICE}" = "${DEVICE}" ]; then
+		ip link set dev ${DEVICE} down 2>/dev/null
+	fi
+fi
+[ "$retcode" = "0" ] && retcode=$?
+
+# wait up to 5 seconds for device to actually come down...
+waited=0
+while ! check_device_down ${DEVICE} && [ "$waited" -lt 50 ] ; do
+    usleep 10000
+    waited=$(($waited+1))
+done
+
+if [ "$retcode" = 0 ] ; then
+    /etc/sysconfig/network-scripts/ifdown-post $CONFIG
+    # do NOT use $? because ifdown should return whether or not
+    # the interface went down.
+fi
+
+if [ -n "$PKEY" ]; then
+    # PKey PKEY
+    echo "$NEW_PKEY_ID" > /sys/class/net/${PHYSDEV}/delete_child
+fi
+
+exit $retcode
diff --git a/redhat/rdma.ifup-ib b/redhat/rdma.ifup-ib
new file mode 100644
index 0000000..bb4d4f7
--- /dev/null
+++ b/redhat/rdma.ifup-ib
@@ -0,0 +1,308 @@
+#!/bin/bash
+# Network Interface Configuration System
+# Copyright (c) 1996-2013 Red Hat, Inc. all rights reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License, version 2,
+# as published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
+. /etc/init.d/functions
+
+cd /etc/sysconfig/network-scripts
+. ./network-functions
+
+[ -f ../network ] && . ../network
+
+CONFIG="${1}"
+
+need_config "${CONFIG}"
+
+source_config
+
+# Allow the user to override the detection of our physical device by passing
+# it in.  No checking is done, if the user gives us a bogus dev, it's
+# their problem.
+[ -n "${PHYSDEV}" ] && REALDEVICE="$PHYSDEV"
+
+if [ "${BOOTPROTO}" = "dhcp" ]; then
+    DYNCONFIG=true
+fi
+
+# load the module associated with that device
+# /sbin/modprobe ${REALDEVICE}
+is_available_wait ${REALDEVICE} ${DEVTIMEOUT}
+
+# bail out, if the MAC does not fit
+if [ -n "${HWADDR}" ]; then
+	FOUNDMACADDR=$(get_hwaddr ${REALDEVICE} | tail -c 24)
+	HWADDR=$(echo $HWADDR | tail -c 24)
+	if [ "${FOUNDMACADDR}" != "${HWADDR}" ]; then
+		net_log $"Device ${DEVICE} has different MAC address than expected, ignoring."
+		exit 1
+	fi
+fi
+
+# now check the real state
+is_available ${REALDEVICE} || {
+      if [ -n "$alias" ]; then
+         net_log $"$alias device ${DEVICE} does not seem to be present, delaying initialization."
+      else
+         net_log $"Device ${DEVICE} does not seem to be present, delaying initialization."
+      fi
+      exit 1
+}
+
+# if we are a P_Key device, create the device if needed
+if [ "${PKEY}" = yes ]; then
+	[ -z "${PKEY_ID}" ] && {
+		net_log $"InfiniBand IPoIB device: PKEY=yes requires a PKEY_ID"
+		exit 1
+	}
+	[ -z "${PHYSDEV}" ] && {
+		net_log $"InfiniBand IPoIB device: PKEY=yes requires a PHYSDEV"
+		exit 1
+	}
+	# Normalize our PKEY_ID to have the high bit set
+	NEW_PKEY_ID=`printf "0x%04x" $(( 0x8000 | ${PKEY_ID} ))`
+	NEW_PKEY_NAME=`printf "%04x" ${NEW_PKEY_ID}`
+	[ "${DEVICE}" != "${PHYSDEV}.${NEW_PKEY_NAME}" ] && {
+		net_log $"Configured DEVICE name does not match what new device name would be.  This
+is most likely because once the PKEY_ID was normalized, it no longer
+resulted in the expected device naming, and so the DEVICE entry in the
+config file needs to be updated to match. This can also be caused by
+giving PKEY_ID as a hex number but without using the mandatory 0x prefix.
+	Configured DEVICE=$DEVICE
+	Configured PHYSDEV=$PHYSDEV
+	Configured PKEY_ID=$PKEY_ID
+	Calculated PKEY_ID=$NEW_PKEY_ID
+	Calculated name=${PHYSDEV}.${NEW_PKEY_NAME}"
+		exit 1
+	}
+	[ -d "/sys/class/net/${DEVICE}" ] ||
+		echo "${NEW_PKEY_ID}" > "/sys/class/net/${PHYSDEV}/create_child"
+	[ -d "/sys/class/net/${DEVICE}" ] || {
+		echo "Failed to create child device $NEW_PKEY_ID of $PHYSDEV"
+		exit 1
+	}
+	# When we get to setting up the IP address, we need REALDEVICE to
+	# point to our new PKEY device
+	REALDEVICE="${DEVICE}"
+fi
+
+
+if [ -n "${MACADDR}" ]; then
+    net_log $"IPoIB devices do not support setting the MAC address of the interface"
+    # ip link set dev ${DEVICE} address ${MACADDR}
+fi
+
+# First, do we even support setting connected mode?
+if [ -e /sys/class/net/${DEVICE}/mode ]; then
+    # OK, set the mode in all cases, that way it gets reset on a down/up
+    # cycle, allowing people to change the mode without rebooting
+    if [ "${CONNECTED_MODE}" = yes ]; then
+        echo connected > /sys/class/net/${DEVICE}/mode
+	# cap the MTU where we should based upon mode
+	[ -z "$MTU" ] && MTU=65520
+	[ "$MTU" -gt 65520 ] && MTU=65520
+    else
+        echo datagram > /sys/class/net/${DEVICE}/mode
+	# cap the MTU where we should based upon mode
+	[ -z "$MTU" ] && MTU=2044
+	[ "$MTU" -gt 2044 ] && MTU=2044
+    fi
+fi
+
+if [ -n "${MTU}" ]; then
+    ip link set dev ${DEVICE} mtu ${MTU}
+fi
+
+# slave device?
+if [ "${SLAVE}" = yes -a "${ISALIAS}" = no -a "${MASTER}" != "" ]; then
+    install_bonding_driver ${MASTER}
+    grep -wq "${DEVICE}" /sys/class/net/${MASTER}/bonding/slaves 2>/dev/null || {
+	/sbin/ip link set dev ${DEVICE} down
+	echo "+${DEVICE}" > /sys/class/net/${MASTER}/bonding/slaves 2>/dev/null
+    }
+    ethtool_set
+
+    exit 0
+fi
+
+# Bonding initialization. For DHCP, we need to enslave the devices early,
+# so it can actually get an IP.
+if [ "$ISALIAS" = no ] && is_bonding_device ${DEVICE} ; then
+    install_bonding_driver ${DEVICE}
+    /sbin/ip link set dev ${DEVICE} up
+    for device in $(LANG=C grep -l "^[[:space:]]*MASTER=\"\?${DEVICE}\"\?\([[:space:]#]\|$\)" /etc/sysconfig/network-scripts/ifcfg-*) ; do
+	    is_ignored_file "$device" && continue
+	    /sbin/ifup ${device##*/}
+    done
+
+    [ -n "${LINKDELAY}" ] && /bin/sleep ${LINKDELAY}
+
+    # add the bits to setup the needed post enslavement parameters
+    for arg in $BONDING_OPTS ; do
+        key=${arg%%=*};
+        value=${arg##*=};
+	if [ "${key}" = "primary" ]; then
+            echo $value > /sys/class/net/${DEVICE}/bonding/$key
+	fi
+    done
+fi
+
+
+if [ -n "${DYNCONFIG}" ] && [ -x /sbin/dhclient ]; then
+    if [[ "${PERSISTENT_DHCLIENT}" =  [yY1]* ]]; then
+       ONESHOT="";
+    else
+       ONESHOT="-1";
+    fi;
+    generate_config_file_name
+    generate_lease_file_name
+    DHCLIENTARGS="${DHCLIENTARGS} -H ${DHCP_HOSTNAME:-${HOSTNAME%%.*}} ${ONESHOT} -q ${DHCLIENTCONF} -lf ${LEASEFILE} -pf /var/run/dhclient-${DEVICE}.pid"
+    echo
+    echo -n $"Determining IP information for ${DEVICE}..."
+    if [[ "${PERSISTENT_DHCLIENT}" !=  [yY1]* ]] && check_link_down ${DEVICE}; then
+	echo $" failed; no link present.  Check cable?"
+	exit 1
+    fi
+
+    ethtool_set
+
+    if /sbin/dhclient ${DHCLIENTARGS} ${DEVICE} ; then
+	echo $" done."
+	dhcpipv4="good"
+    else
+	echo $" failed."
+	if [[ "${IPV4_FAILURE_FATAL}"  = [Yy1]* ]] ; then
+	    exit 1
+	fi
+	if [[ "$IPV6INIT" != [yY1]* && "$DHCPV6C" != [yY1]* ]] ; then
+	    exit 1
+	fi
+	net_log "Unable to obtain IPv4 DHCP address ${DEVICE}." warning
+    fi
+# end dynamic device configuration
+else
+    if [ -z "${IPADDR}" -a -z "${IPADDR0}" -a -z "${IPADDR1}" -a -z "${IPADDR2}" ]; then
+         # enable device without IP, useful for e.g. PPPoE
+	 ip link set dev ${REALDEVICE} up
+	 ethtool_set
+	 [ -n "${LINKDELAY}" ] && /bin/sleep ${LINKDELAY}
+    else
+
+    expand_config
+
+    [ -n "${ARP}" ] && \
+	ip link set dev ${REALDEVICE} $(toggle_value arp $ARP)
+
+    if ! ip link set dev ${REALDEVICE} up ; then
+	net_log $"Failed to bring up ${DEVICE}."
+	exit 1
+    fi
+
+    ethtool_set
+
+    [ -n "${LINKDELAY}" ] && /bin/sleep ${LINKDELAY}
+
+    if [ "${DEVICE}" = "lo" ]; then
+        SCOPE="scope host"
+    else
+        SCOPE=${SCOPE:-}
+    fi
+
+    if [ -n "$SRCADDR" ]; then
+       SRC="src $SRCADDR"
+    else
+       SRC=
+    fi
+
+    # set IP address(es)
+    for idx in {0..256} ; do
+	if [ -z "${ipaddr[$idx]}" ]; then
+	    break
+	fi
+
+	if ! LC_ALL=C ip addr ls ${REALDEVICE} | LC_ALL=C grep -q "${ipaddr[$idx]}/${prefix[$idx]}" ; then
+	    [ "${REALDEVICE}" != "lo" ] && [ "${arpcheck[$idx]}" != "no" ] && \
+	    /sbin/arping -q -c 2 -w 3 -D -I ${REALDEVICE} ${ipaddr[$idx]}
+	    if [ $? = 1 ]; then
+		net_log $"Error, some other host already uses address ${ipaddr[$idx]}."
+		exit 1
+	    fi
+
+	    if ! ip addr add ${ipaddr[$idx]}/${prefix[$idx]} \
+		brd ${broadcast[$idx]:-+} dev ${REALDEVICE} ${SCOPE} label ${DEVICE}; then
+		net_log $"Error adding address ${ipaddr[$idx]} for ${DEVICE}."
+	    fi
+	fi
+
+	if [ -n "$SRCADDR" ]; then
+           sysctl -w "net.ipv4.conf.${REALDEVICE}.arp_filter=1" >/dev/null 2>&1
+	fi
+
+	# update ARP cache of neighboring computers
+	if [ "${REALDEVICE}" != "lo" ]; then
+	    /sbin/arping -q -A -c 1 -I ${REALDEVICE} ${ipaddr[$idx]}
+	    ( sleep 2;
+	      /sbin/arping -q -U -c 1 -I ${REALDEVICE} ${ipaddr[$idx]} ) > /dev/null 2>&1 < /dev/null &
+	fi
+    done
+
+    # Set a default route.
+    if [ "${DEFROUTE}" != "no" ] && [ -z "${GATEWAYDEV}" -o "${GATEWAYDEV}" = "${REALDEVICE}" ]; then
+	# set up default gateway. replace if one already exists
+	if [ -n "${GATEWAY}" ] && [ "$(ipcalc --network ${GATEWAY} ${netmask[0]} 2>/dev/null)" = "NETWORK=${NETWORK}" ]; then
+	    ip route replace default ${METRIC:+metric $METRIC} \
+		via ${GATEWAY} ${WINDOW:+window $WINDOW} ${SRC} \
+		${GATEWAYDEV:+dev $GATEWAYDEV} ||
+			net_log $"Error adding default gateway ${GATEWAY} for ${DEVICE}."
+	elif [ "${GATEWAYDEV}" = "${DEVICE}" ]; then
+	    ip route replace default ${METRIC:+metric $METRIC} \
+		${SRC} ${WINDOW:+window $WINDOW} dev ${REALDEVICE} ||
+			net_log $"Erorr adding default gateway for ${REALDEVICE}."
+	fi
+    fi
+    fi
+fi
+
+# Add Zeroconf route.
+if [ -z "${NOZEROCONF}" -a "${ISALIAS}" = "no" -a "${REALDEVICE}" != "lo" ]; then
+    ip route add 169.254.0.0/16 dev ${REALDEVICE} metric $((1000 + $(cat /sys/class/net/${REALDEVICE}/ifindex))) scope link
+fi
+
+# Inform firewall which network zone (empty means default) this interface belongs to
+if [ -x /usr/bin/firewall-cmd -a "${REALDEVICE}" != "lo" ]; then
+    /usr/bin/firewall-cmd --zone="${ZONE}" --change-interface="${DEVICE}" > /dev/null 2>&1
+fi
+
+# IPv6 initialisation?
+/etc/sysconfig/network-scripts/ifup-ipv6 ${CONFIG}
+if [[ "${DHCPV6C}"  = [Yy1]* ]] && [ -x /sbin/dhclient ]; then
+    generate_config_file_name 6
+    generate_lease_file_name 6
+    echo
+    echo -n $"Determining IPv6 information for ${DEVICE}..."
+    if /sbin/dhclient -6 -1 ${DHCPV6C_OPTIONS} ${DHCLIENTCONF} -lf ${LEASEFILE} -pf /var/run/dhclient6-${DEVICE}.pid -H ${DHCP_HOSTNAME:-${HOSTNAME%%.*}} ${DEVICE} ; then
+        echo $" done."
+    else
+        echo $" failed."
+        if [ "${dhcpipv4}" = "good" -o -n "${IPADDR}" ]; then
+            net_log "Unable to obtain IPv6 DHCP address ${DEVICE}." warning
+        else
+            exit 1
+        fi
+    fi
+fi
+
+exec /etc/sysconfig/network-scripts/ifup-post ${CONFIG} ${2}
+
diff --git a/redhat/rdma.kernel-init b/redhat/rdma.kernel-init
new file mode 100644
index 0000000..6cb4732
--- /dev/null
+++ b/redhat/rdma.kernel-init
@@ -0,0 +1,262 @@
+#!/bin/bash
+#
+# Bring up the kernel RDMA stack
+#
+# This is usually run automatically by systemd after a hardware activation
+# event in udev has triggered a start of the rdma.service unit
+#
+
+shopt -s nullglob
+
+CONFIG=/etc/rdma/rdma.conf
+MTRR_SCRIPT=/usr/libexec/rdma-fixup-mtrr.awk
+
+LOAD_ULP_MODULES=""
+LOAD_CORE_USER_MODULES="ib_umad ib_uverbs ib_ucm rdma_ucm"
+LOAD_CORE_CM_MODULES="iw_cm ib_cm rdma_cm"
+LOAD_CORE_MODULES="ib_core ib_mad ib_sa ib_addr"
+LOAD_TECH_PREVIEW_DRIVERS="no"
+
+if [ -f $CONFIG ]; then
+    . $CONFIG
+
+    if [ "${RDS_LOAD}" == "yes" ]; then
+        IPOIB_LOAD=yes
+    fi
+
+    if [ "${IPOIB_LOAD}" == "yes" ]; then
+	LOAD_ULP_MODULES="ib_ipoib"
+    fi
+
+    if [ "${RDS_LOAD}" == "yes" -a -f /lib/modules/`uname -r`/kernel/net/rds/rds.ko ]; then
+	LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds"
+	if [ -f /lib/modules/`uname -r`/kernel/net/rds/rds_tcp.ko ]; then
+	    LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds_tcp"
+	fi
+	if [ -f /lib/modules/`uname -r`/kernel/net/rds/rds_rdma.ko ]; then
+	    LOAD_ULP_MODULES="$LOAD_ULP_MODULES rds_rdma"
+	fi
+    fi
+
+    if [ "${SRP_LOAD}" == "yes" ]; then
+	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srp"
+    fi
+
+    if [ "${SRPT_LOAD}" == "yes" ]; then
+	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_srpt"
+    fi
+
+    if [ "${ISER_LOAD}" == "yes" ]; then
+	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_iser"
+    fi
+
+    if [ "${ISERT_LOAD}" == "yes" ]; then
+	LOAD_ULP_MODULES="$LOAD_ULP_MODULES ib_isert"
+    fi
+
+    if [ "${XPRTRDMA_LOAD}" == "yes" ]; then
+	LOAD_ULP_MODULES="$LOAD_ULP_MODULES xprtrdma"
+    fi
+
+    if [ "${SVCRDMA_LOAD}" == "yes" ]; then
+	LOAD_ULP_MODULES="$LOAD_ULP_MODULES svcrdma"
+    fi
+    if [ "${TECH_PREVIEW_LOAD}" == "yes" ]; then
+        LOAD_TECH_PREVIEW_DRIVERS="$TECH_PREVIEW_LOAD"
+    fi
+else
+    LOAD_ULP_MODULES="ib_ipoib"
+fi
+
+# If module $1 is loaded return - 0 else - 1
+is_loaded()
+{
+    /sbin/lsmod | grep -w "$1" > /dev/null 2>&1
+    return $?
+}
+
+load_modules()
+{
+    local RC=0
+
+    for module in $*; do
+	if ! /sbin/modinfo $module > /dev/null 2>&1; then
+	    # do not attempt to load modules which do not exist
+	    continue
+	fi
+	if ! is_loaded $module; then
+	    /sbin/modprobe $module
+	    res=$?
+	    RC=$[ $RC + $res ]
+	    if [ $res -ne 0 ]; then
+		echo
+		echo "Failed to load module $module"
+	    fi
+	fi
+    done
+    return $RC
+}
+
+# This function is a horrible hack to work around BIOS authors that should
+# be shot.  Specifically, certain BIOSes will map the entire 4GB address
+# space as write-back cacheable when the machine has 4GB or more of RAM, and
+# then they will exclude the reserved PCI I/O addresses from that 4GB
+# cacheable mapping by making on overlapping uncacheable mapping.  However,
+# once you do that, it is then impossible to set *any* of the PCI I/O
+# address space as write-combining.  This is an absolute death-knell to
+# certain IB hardware.  So, we unroll this mapping here.  Instead of
+# punching a hole in a single 4GB mapping, we redo the base 4GB mapping as
+# a series of discreet mappings that effectively are the same as the 4GB
+# mapping minus the hole, and then we delete the uncacheable mappings that
+# are used to punch the hole.  This then leaves the PCI I/O address space
+# unregistered (which defaults it to uncacheable), but available for
+# write-combining mappings where needed.
+check_mtrr_registers()
+{
+    # If we actually change the mtrr registers, then the awk script will
+    # return true, and we need to unload the ib_ipath module if it's already
+    # loaded.  The udevtrigger in load_hardware_modules will immediately
+    # reload the ib_ipath module for us, so there shouldn't be a problem.
+    [ -f /proc/mtrr -a -f $MTRR_SCRIPT ] &&
+	awk -f $MTRR_SCRIPT /proc/mtrr 2>/dev/null &&
+	if is_loaded ib_ipath; then
+		/sbin/rmmod ib_ipath
+	fi
+}
+
+load_hardware_modules()
+{
+    local -i RC=0
+
+    [ "$FIXUP_MTRR_REGS" = "yes" ] && check_mtrr_registers
+    # We match both class NETWORK and class INFINIBAND devices since our
+    # iWARP hardware is listed under class NETWORK.  The side effect of
+    # this is that we might cause a non-iWARP network driver to be loaded.
+    udevadm trigger --subsystem-match=pci --attr-nomatch=driver --attr-match=class=0x020000 --attr-match=class=0x0c0600
+    udevadm settle
+    if [ -r /proc/device-tree ]; then
+	if [ -n "`ls /proc/device-tree | grep lhca`" ]; then
+	    if ! is_loaded ib_ehca; then
+		load_modules ib_ehca
+		RC+=$?
+	    fi
+	fi
+    fi
+    if is_loaded mlx4_core -a ! is_loaded mlx4_ib; then
+        load_modules mlx4_ib
+	RC+=$?
+    fi
+    if is_loaded mlx4_core -a ! is_loaded mlx4_en; then
+        load_modules mlx4_en
+	RC+=$?
+    fi
+    if is_loaded mlx5_core -a ! is_loaded mlx5_ib; then
+	load_modules mlx5_ib
+	RC+=$?
+    fi
+    if is_loaded cxgb3 -a ! is_loaded iw_cxgb3; then
+	load_modules iw_cxgb3
+	RC+=$?
+    fi
+    if is_loaded cxgb4 -a ! is_loaded iw_cxgb4; then
+	load_modules iw_cxgb4
+	RC+=$?
+    fi
+    if is_loaded be2net -a ! is_loaded ocrdma; then
+	load_modules ocrdma
+	RC+=$?
+    fi
+    if is_loaded enic -a ! is_loaded usnic_verbs; then
+	load_modules usnic_verbs
+	RC+=$?
+    fi
+    if [ "${LOAD_TECH_PREVIEW_DRIVERS}" == "yes" ]; then
+        if is_loaded i40e -a ! is_loaded i40iw; then
+	    load_modules i40iw
+	    RC+=$?
+        fi
+    fi
+    return $RC
+}
+
+errata_58()
+{
+    # Check AMD chipset issue Errata #58
+    if test -x /sbin/lspci && test -x /sbin/setpci; then
+	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
+	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
+	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
+	    CURVAL=`/sbin/setpci -d 1022:1100 69`
+	    for val in $CURVAL
+	    do
+		if [ "${val}" != "c0" ]; then
+		    /sbin/setpci -d 1022:1100 69=c0
+		    if [ $? -eq 0 ]; then
+			break
+		    else
+			echo "Failed to apply AMD-8131 Errata #58 workaround"
+		    fi
+		fi
+	    done
+	fi
+    fi
+}
+
+errata_56()
+{
+    # Check AMD chipset issue Errata #56
+    if test -x /sbin/lspci && test -x /sbin/setpci; then
+	if ( /sbin/lspci -nd 1022:1100 | grep "1100" > /dev/null ) &&
+	   ( /sbin/lspci -nd 1022:7450 | grep "7450" > /dev/null ) &&
+	   ( /sbin/lspci -nd 15b3:5a46 | grep "5a46" > /dev/null ); then
+	    bus=""
+	    # Look for devices AMD-8131
+	    for dev in `/sbin/setpci -v -f -d 1022:7450 19 | cut -d':' -f1,2`
+	    do
+		bus=`/sbin/setpci -s $dev 19`
+		rev=`/sbin/setpci -s $dev 8`
+		# Look for Tavor attach to secondary bus of this devices
+		for device in `/sbin/setpci -f -s $bus: -d 15b3:5a46 19`
+		do
+		    if [ $rev -lt 13 ]; then
+			/sbin/setpci -d 15b3:5a44 72=14
+			if [ $? -eq 0 ]; then
+			    break
+			else
+			    echo
+			    echo "Failed to apply AMD-8131 Errata #56 workaround"
+			fi
+		    else
+			continue
+		    fi
+		    # If more than one device is on the bus the issue a
+		    # warning
+		    num=`/sbin/setpci -f -s $bus: 0 | wc -l |  sed 's/\ *//g'`
+		    if [ $num -gt 1 ]; then
+			echo "Warning: your current PCI-X configuration might be incorrect."
+			echo "see AMD-8131 Errata 56 for more details."
+		    fi
+		done
+	    done
+	fi
+    fi
+}
+
+
+load_hardware_modules
+RC=$[ $RC + $? ]
+load_modules $LOAD_CORE_MODULES
+RC=$[ $RC + $? ]
+load_modules $LOAD_CORE_CM_MODULES
+RC=$[ $RC + $? ]
+load_modules $LOAD_CORE_USER_MODULES
+RC=$[ $RC + $? ]
+load_modules $LOAD_ULP_MODULES
+RC=$[ $RC + $? ]
+
+errata_58
+errata_56
+
+/usr/libexec/rdma-set-sriov-vf
+
+exit $RC
diff --git a/redhat/rdma.mlx4-setup.sh b/redhat/rdma.mlx4-setup.sh
new file mode 100644
index 0000000..5e71ade
--- /dev/null
+++ b/redhat/rdma.mlx4-setup.sh
@@ -0,0 +1,91 @@
+#!/bin/bash
+dir="/sys/bus/pci/drivers/mlx4_core"
+[ ! -d $dir ] && exit 1
+pushd $dir >/dev/null
+
+function set_dual_port() {
+	device=$1
+	port1=$2
+	port2=$3
+	pushd $device >/dev/null
+	cur_p1=`cat mlx4_port1`
+	cur_p2=`cat mlx4_port2`
+
+	# special case the "eth eth" mode as we need port2 to
+	# actually switch to eth before the driver will let us
+	# switch port1 to eth as well
+	if [ "$port1" == "eth" ]; then
+		if [ "$port2" != "eth" ]; then
+			echo "In order for port1 to be eth, port2 to must also be eth"
+			popd >/dev/null
+			return
+		fi
+		if [ "$cur_p2" != "eth" -a "$cur_p2" != "auto (eth)" ]; then
+			tries=0
+			echo "$port2" > mlx4_port2 2>/dev/null
+			sleep .25
+			cur_p2=`cat mlx4_port2`
+			while [ "$cur_p2" != "eth" -a "$cur_p2" != "auto (eth)" -a $tries -lt 10 ]; do
+				sleep .25
+				let tries++
+				cur_p2=`cat mlx4_port2`
+			done
+			if [ "$cur_p2" != "eth" -a "$cur_p2" != "auto (eth)" ]; then
+				echo "Failed to set port2 to eth mode"
+				popd >/dev/null
+				return
+			fi
+		fi
+		if [ "$cur_p1" != "eth" -a "$cur_p1" != "auto (eth)" ]; then
+			tries=0
+			echo "$port1" > mlx4_port1 2>/dev/null
+			sleep .25
+			cur_p1=`cat mlx4_port1`
+			while [ "$cur_p1" != "eth" -a "$cur_p1" != "auto (eth)" -a $tries -lt 10 ]; do
+				sleep .25
+				let tries++
+				cur_p1=`cat mlx4_port1`
+			done
+			if [ "$cur_p1" != "eth" -a "$cur_p1" != "auto (eth)" ]; then
+				echo "Failed to set port1 to eth mode"
+			fi
+		fi
+		popd >/dev/null
+		return
+	fi
+
+	# our mode is not eth <anything> as that is covered above
+	# so we should be able to succesfully set the ports in
+	# port1 then port2 order
+	if [ "$cur_p1" != "$port1" -o "$cur_p2" != "$port2" ]; then
+		# Try setting the ports in order first
+		echo "$port1" > mlx4_port1 2>/dev/null ; sleep .1
+		echo "$port2" > mlx4_port2 2>/dev/null ; sleep .1
+		cur_p1=`cat mlx4_port1`
+		cur_p2=`cat mlx4_port2`
+	fi
+
+	if [ "$cur_p1" != "$port1" -o "$cur_p2" != "$port2" ]; then
+		# Try reverse order this time
+		echo "$port2" > mlx4_port2 2>/dev/null ; sleep .1
+		echo "$port1" > mlx4_port1 2>/dev/null ; sleep .1
+		cur_p1=`cat mlx4_port1`
+		cur_p2=`cat mlx4_port2`
+	fi
+
+	if [ "$cur_p1" != "$port1" -o "$cur_p2" != "$port2" ]; then
+		echo "Error setting port type on mlx4 device $device"
+	fi
+
+	popd >/dev/null
+	return
+}
+
+
+while read device port1 port2 ; do
+	[ -d "$device" ] || continue
+	[ -z "$port1" ] && continue
+	[ -f "$device/mlx4_port2" -a -z "$port2" ] && continue
+	[ -f "$device/mlx4_port2" ] && set_dual_port $device $port1 $port2 || echo "$port1" > "$device/mlx4_port1"
+done
+popd 2&>/dev/null
diff --git a/redhat/rdma.mlx4.conf b/redhat/rdma.mlx4.conf
new file mode 100644
index 0000000..71207cc
--- /dev/null
+++ b/redhat/rdma.mlx4.conf
@@ -0,0 +1,27 @@
+# Config file for mlx4 hardware port settings
+# This file is read when the mlx4_core module is loaded and used to
+# set the port types for any hardware found.  If a card is not listed
+# in this file, then its port types are left alone.
+#
+# Format:
+# <pci_device_of_card> <port1_type> [port2_type]
+#
+# @port1 and @port2:
+#   One of auto, ib, or eth.  No checking is performed to make sure that
+#   combinations are valid.  Invalid inputs will result in the driver
+#   not setting the port to the type requested.  port1 is required at
+#   all times, port2 is required for dual port cards.
+#
+# Example:
+# 0000:0b:00.0 eth eth
+#
+# You can find the right pci device to use for any given card by loading
+# the mlx4_core module, then going to /sys/bus/pci/drivers/mlx4_core and
+# seeing what possible PCI devices are listed there.  The possible values
+# for ports are: ib, eth, and auto.  However, not all cards support all
+# types, so if you get messages from the kernel that your selected port
+# type isn't supported, there's nothing this script can do about it.  Also,
+# some cards don't support using different types on the two ports (aka,
+# both ports must be either eth or ib).  Again, we can't set what the kernel
+# or hardware won't support.
+#
diff --git a/redhat/rdma.mlx4.sys.modprobe b/redhat/rdma.mlx4.sys.modprobe
new file mode 100644
index 0000000..781562c
--- /dev/null
+++ b/redhat/rdma.mlx4.sys.modprobe
@@ -0,0 +1,5 @@
+# WARNING! - This file is overwritten any time the rdma rpm package is
+# updated.  Please do not make any changes to this file.  Instead, make
+# changes to the mlx4.conf file.  It's contents are preserved if they
+# have been changed from the default values.
+install mlx4_core /sbin/modprobe --ignore-install mlx4_core $CMDLINE_OPTS && (if [ -f /usr/libexec/mlx4-setup.sh -a -f /etc/rdma/mlx4.conf ]; then /usr/libexec/mlx4-setup.sh < /etc/rdma/mlx4.conf; fi; /sbin/modprobe mlx4_en; if /sbin/modinfo mlx4_ib > /dev/null 2>&1; then /sbin/modprobe mlx4_ib; fi)
diff --git a/redhat/rdma.mlx4.user.modprobe b/redhat/rdma.mlx4.user.modprobe
new file mode 100644
index 0000000..c8b4cce
--- /dev/null
+++ b/redhat/rdma.mlx4.user.modprobe
@@ -0,0 +1,21 @@
+# This file is intended for users to select the various module options
+# they need for the mlx4 driver.  On upgrade of the rdma package,
+# any user made changes to this file are preserved.  Any changes made
+# to the libmlx4.conf file in this directory are overwritten on
+# pacakge upgrade.
+#
+# Some sample options and what they would do
+# Enable debugging output, device managed flow control, and disable SRIOV
+#options mlx4_core debug_level=1 log_num_mgm_entry_size=-1 probe_vf=0 num_vfs=0
+#
+# Enable debugging output and create SRIOV devices, but don't attach any of
+# the child devices to the host, only the parent device
+#options mlx4_core debug_level=1 probe_vf=0 num_vfs=7
+#
+# Enable debugging output, SRIOV, and attach one of the SRIOV child devices
+# in addition to the parent device to the host
+#options mlx4_core debug_level=1 probe_vf=1 num_vfs=7
+#
+# Enable per priority flow control for send and receive, setting both priority
+# 1 and 2 as no drop priorities
+#options mlx4_en pfctx=3 pfcrx=3
diff --git a/redhat/rdma.modules-setup.sh b/redhat/rdma.modules-setup.sh
new file mode 100644
index 0000000..19a182f
--- /dev/null
+++ b/redhat/rdma.modules-setup.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+check() {
+	[ -n "$hostonly" -a -c /sys/class/infiniband_verbs/uverbs0 ] && return 0
+	[ -n "$hostonly" ] && return 255
+	return 0
+}
+
+depends() {
+	return 0
+}
+
+install() {
+	inst /etc/rdma/rdma.conf
+	inst /etc/rdma/mlx4.conf
+	inst /etc/rdma/sriov-vfs
+	inst /usr/libexec/rdma-init-kernel
+	inst /usr/libexec/rdma-fixup-mtrr.awk
+	inst /usr/libexec/mlx4-setup.sh
+	inst /usr/libexec/rdma-set-sriov-vf
+	inst /usr/lib/modprobe.d/libmlx4.conf
+	inst_multiple lspci setpci awk sleep
+	inst_multiple -o /etc/modprobe.d/mlx4.conf
+	inst_rules 98-rdma.rules 70-persistent-ipoib.rules
+}
+
+installkernel() {
+	hostonly='' instmods =drivers/infiniband =drivers/net/ethernet/mellanox =drivers/net/ethernet/chelsio =drivers/net/ethernet/cisco =drivers/net/ethernet/emulex =drivers/target
+	hostonly='' instmods crc-t10dif crct10dif_common
+}
diff --git a/redhat/rdma.service b/redhat/rdma.service
new file mode 100644
index 0000000..514ef58
--- /dev/null
+++ b/redhat/rdma.service
@@ -0,0 +1,15 @@
+[Unit]
+Description=Initialize the iWARP/InfiniBand/RDMA stack in the kernel
+Documentation=file:/etc/rdma/rdma.conf
+RefuseManualStop=true
+DefaultDependencies=false
+Conflicts=emergency.target emergency.service
+Before=network.target remote-fs-pre.target
+
+[Service]
+Type=oneshot
+RemainAfterExit=yes
+ExecStart=/usr/libexec/rdma-init-kernel
+
+[Install]
+WantedBy=sysinit.target
diff --git a/redhat/rdma.sriov-init b/redhat/rdma.sriov-init
new file mode 100644
index 0000000..0d7cbc6
--- /dev/null
+++ b/redhat/rdma.sriov-init
@@ -0,0 +1,137 @@
+#!/bin/bash
+#
+# Initialize SRIOV virtual devices
+#
+# This is usually run automatically by systemd after a hardware activation
+# event in udev has triggered a start of the rdma.service unit
+port=1
+
+function __get_parent_pci_dev()
+{
+    pushd /sys/bus/pci/devices/$pci_dev >/dev/null 2>&1
+    ppci_dev=`ls -l physfn | cut -f 2 -d '/'`
+    popd >/dev/null 2>&1
+}
+
+function __get_parent_ib_dev()
+{
+    ib_dev=`ls -l | awk '/'$ppci_dev'/ { print $9 }'`
+}
+
+function __get_parent_net_dev()
+{
+    for netdev in /sys/bus/pci/devices/$ppci_dev/net/* ; do
+	if [ "$port" -eq `cat $netdev/dev_port` ]; then
+	    netdev=`basename $netdev`
+	    break
+	fi
+    done
+}
+
+function __get_vf_num()
+{
+    pushd /sys/bus/pci/devices/$ppci_dev >/dev/null 2>&1
+    vf=`ls -l virtfn* | awk '/'$pci_dev'/ { print $9 }' | sed -e 's/virtfn//'`
+    popd >/dev/null 2>&1
+}
+
+function __en_sriov_set_vf()
+{
+    pci_dev=$1
+    shift
+    [ "$1" = "port" ] && port=$2 && shift 2
+    # We find our parent device by the netdev registered port number,
+    # however, the netdev port numbers start at 0 while the port
+    # numbers on the card start at 1, so we subtract 1 from our
+    # configured port number to get the netdev number
+    let port--
+    # Now we need to fill in the necessary information to pass to the ip
+    # command
+    __get_parent_pci_dev
+    __get_parent_net_dev
+    __get_vf_num
+    # The rest is easy.  Either the user passed valid arguments as options
+    # or they didn't
+    ip link set dev $netdev vf $vf $*
+}
+
+function __ib_sriov_set_vf()
+{
+    pci_dev=$1
+    shift
+    [ "$1" = "port" ] && port=$2 && shift 2
+    guid=""
+    __get_parent_pci_dev
+    __get_parent_ib_dev
+    [ -f $ib_dev/iov/$pci_dev/ports/$port/gid_idx/0 ] || return
+    while [ -n "$1" ]; do
+	case $1 in
+	    guid)
+		guid=$2
+		shift 2
+		;;
+	    pkey)
+		shift 1
+		break
+		;;
+	    *)
+		echo "Unknown option in $src"
+		shift
+		;;
+	esac
+    done
+    if [ -n "$guid" ]; then
+	guid_idx=`cat "$ib_dev/iov/$pci_dev/ports/$port/gid_idx/0"`
+	echo "$guid" > "$ib_dev/iov/ports/$port/admin_guids/$guid_idx"
+    fi
+    i=0
+    while [ -n "$1" ]; do
+	for pkey in $ib_dev/iov/ports/$port/pkeys/*; do
+	    if [ `cat $pkey` = "$1" ]; then
+		echo `basename $pkey` > $ib_dev/iov/$pci_dev/ports/$port/pkey_idx/$i
+		let i++
+		break
+	    fi
+	done
+	shift
+    done
+}
+
+[ -d /sys/class/infiniband ] || return
+pushd /sys/class/infiniband >/dev/null 2>&1
+
+if [ -z "$*" ]; then
+    src=/etc/rdma/sriov-vfs
+    [ -f "$src" ] || return
+    grep -v "^#" $src | while read -a args; do
+	# When we use read -a to read into an array, the index starts at
+	# 0, unlike below where the arg count starts at 1
+	port=1
+	next_arg=1
+        [ "${args[$next_arg]}" = "port" ] && next_arg=3
+	case ${args[$next_arg]} in
+	    guid|pkey)
+		__ib_sriov_set_vf ${args[*]}
+		;;
+	    mac|vlan|rate|spoofchk|enable)
+		__en_sriov_set_vf ${args[*]}
+		;;
+	    *)
+		;;
+	esac
+    done
+else
+    [ "$2" = "port" ] && next_arg=$4 || next_arg=$2
+    case $next_arg in
+	guid|pkey)
+	    __ib_sriov_set_vf $*
+	    ;;
+	mac|vlan|rate|spoofchk|enable)
+	    __en_sriov_set_vf $*
+	    ;;
+	*)
+	    ;;
+    esac
+fi
+
+popd >/dev/null 2>&1
diff --git a/redhat/rdma.sriov-vfs b/redhat/rdma.sriov-vfs
new file mode 100644
index 0000000..ef3e6c0
--- /dev/null
+++ b/redhat/rdma.sriov-vfs
@@ -0,0 +1,41 @@
+# All lines in this file that start with a # are comments,
+# all other lines will be processed without argument checks
+# Format of this file is one sriov vf setting per line with
+# arguments as follows:
+#    vf [port #] [ethernet settings | infiniband settings]
+#
+#  @vf - PCI address of device to configure as found in
+#	  /sys/bus/pci/devices/
+#
+#  [port @port] - Optional: the port number we are setting on
+#         the device.  We always assume port 1 unless told
+#         otherwise.
+#
+#  Ethernet settings:
+#  mac <mac address> [additional options]
+#    @mac - mac address to assign to vf...this is currently required by
+#        the ip program if you wish to be able to set any of the other
+#        settings.  If you don't set anything on a vf, it will get a
+#        random mac address and you may use static IP addressing to
+#        have a consistent IP address in spite of the random mac
+#    @* - additional arguments are passed to ip link without any
+#      further processing/checking, additional options that could
+#      be passed as of the time of writing this are:
+#        [ vlan VLANID [ qos VLAN-QOS ] ]
+#        [ rate TXRATE ]
+#        [ spoofchk { on | off} ]
+#        [ state { auto | enable | disable} ]
+#
+#  InfiniBand settings:
+#  [guid <guid>] [pkey <space separated list of pkeys>]
+#    @guid - 64bit GUID value to assign to vf.  Omit this option to
+#          use a subnet manager assigned GUID.
+#    @pkey - one or more pkeys to assign to this guest, must be last
+#          item on line
+#
+#  Examples:
+#
+#    0000:44:00.1 guid 05011403007bcba1 pkey 0xffff 0x8002
+#    0000:44:00.1 port 2 mac aa:bb:cc:dd:ee:f0 spoofchk on
+#    0000:44:00.2 port 1 pkey 0x7fff 0x0002
+#    0000:44:00.2 port 2 mac aa:bb:cc:dd:ee:f1 vlan 10 spoofchk on state enable
diff --git a/redhat/rdma.udev-ipoib-naming.rules b/redhat/rdma.udev-ipoib-naming.rules
new file mode 100644
index 0000000..1002470
--- /dev/null
+++ b/redhat/rdma.udev-ipoib-naming.rules
@@ -0,0 +1,13 @@
+# This is a sample udev rules file that demonstrates how to get udev to
+# set the name of IPoIB interfaces to whatever you wish.  There is a
+# 16 character limit on network device names though, so don't go too nuts
+#
+# Important items to note: ATTR{type}=="32" is IPoIB interfaces, and the
+# ATTR{address} match must start with ?* and only reference the last 8
+# bytes of the address or else the address might not match on any given
+# start of the IPoIB stack
+#
+# Note: as of rhel7, udev is case sensitive on the address field match
+# and all addresses need to be in lower case.
+#
+# ACTION=="add", SUBSYSTEM=="net", DRIVERS=="?*", ATTR{type}=="32", ATTR{address}=="?*00:02:c9:03:00:31:78:f2", NAME="mlx4_ib3"
diff --git a/redhat/rdma.udev-rules b/redhat/rdma.udev-rules
new file mode 100644
index 0000000..0c7a8fc
--- /dev/null
+++ b/redhat/rdma.udev-rules
@@ -0,0 +1,18 @@
+# We list all the various kernel modules that drive hardware in the
+# InfiniBand stack (and a few in the network stack that might not actually
+# be RDMA capable, but we don't know that at this time and it's safe to
+# enable the IB stack, so do so unilaterally) and on load of any of that
+# hardware, we trigger the rdma.service load in systemd
+
+SUBSYSTEM=="module", KERNEL=="cxgb*", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}="rdma.service"
+SUBSYSTEM=="module", KERNEL=="ib_*", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}="rdma.service"
+SUBSYSTEM=="module", KERNEL=="mlx*", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}="rdma.service"
+SUBSYSTEM=="module", KERNEL=="iw_*", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}="rdma.service"
+SUBSYSTEM=="module", KERNEL=="be2net", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}="rdma.service"
+SUBSYSTEM=="module", KERNEL=="enic", ACTION=="add", TAG+="systemd", ENV{SYSTEMD_WANTS}="rdma.service"
+
+# When we detect a new verbs device is added to the system, set the node
+# description on that device
+# If rdma-ndd is installed, defer the setting of the node description to it.
+SUBSYSTEM=="infiniband", KERNEL=="*", ACTION=="add", TEST!="/usr/sbin/rdma-ndd", RUN+="/bin/bash -c 'sleep 1; echo -n `hostname -s` %k > /sys/class/infiniband/%k/node_desc'"
+
diff --git a/redhat/srp_daemon.service b/redhat/srp_daemon.service
new file mode 100644
index 0000000..f9c4b1e
--- /dev/null
+++ b/redhat/srp_daemon.service
@@ -0,0 +1,17 @@
+[Unit]
+Description=Start or stop the daemon that attaches to SRP devices
+Documentation=file:///etc/rdma/rdma.conf file:///etc/srp_daemon.conf
+DefaultDependencies=false
+Conflicts=emergency.target emergency.service
+Requires=rdma.service
+Wants=opensm.service
+After=rdma.service opensm.service
+After=network.target
+Before=remote-fs-pre.target
+
+[Service]
+Type=simple
+ExecStart=/usr/sbin/srp_daemon.sh
+
+[Install]
+WantedBy=remote-fs-pre.target
-- 
2.10.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-core v2 1/4] Install end user focused documentation files
From: Jarod Wilson @ 2016-10-20 15:33 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jarod Wilson, Jason Gunthorpe
In-Reply-To: <20161020153357.27286-1-jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

We recommend that all distros ship these.

v2: installing README.md and MAINTAINERS as well.

Suggested-by: Jarod Wilson <jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
Signed-off-by: Jason Gunthorpe <jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>
Signed-off-by: Jarod Wilson <jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
---
 CMakeLists.txt                   |  3 ++-
 Documentation/CMakeLists.txt     | 10 ++++++++++
 debian/ibacm.install             |  1 +
 debian/ibverbs-providers.install |  1 +
 debian/libibcm1.install          |  1 +
 debian/libibverbs1.install       |  1 +
 debian/librdmacm1.install        |  1 +
 debian/srptools.install          |  1 +
 rdma-core.spec                   |  6 +++++-
 9 files changed, 23 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4d291d2..8dec772 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -21,7 +21,7 @@
 #      Use the historical search path for providers, in the standard system library.
 
 cmake_minimum_required(VERSION 2.8.11 FATAL_ERROR)
-project(RDMA C)
+project(rdma-core C)
 
 # CMake likes to use -rdynamic too much, they fixed it in 3.4.
 if(POLICY CMP0065)
@@ -259,6 +259,7 @@ configure_file("${BUILDLIB}/config.h.in" "${BUILD_INCLUDE}/config.h" ESCAPE_QUOT
 # Sub-directories
 add_subdirectory(ccan)
 add_subdirectory(util)
+add_subdirectory(Documentation)
 # Libraries
 add_subdirectory(libibumad)
 add_subdirectory(libibumad/man)
diff --git a/Documentation/CMakeLists.txt b/Documentation/CMakeLists.txt
new file mode 100644
index 0000000..6170b5f
--- /dev/null
+++ b/Documentation/CMakeLists.txt
@@ -0,0 +1,10 @@
+install(FILES
+  ibacm.md
+  ibsrpdm.md
+  libibcm.md
+  libibverbs.md
+  librdmacm.md
+  rxe.md
+  ../README.md
+  ../MAINTAINERS
+  DESTINATION "${CMAKE_INSTALL_DOCDIR}")
diff --git a/debian/ibacm.install b/debian/ibacm.install
index 2bb9591..f2a6c53 100644
--- a/debian/ibacm.install
+++ b/debian/ibacm.install
@@ -8,3 +8,4 @@ usr/share/man/man1/ib_acme.1
 usr/share/man/man1/ibacm.1
 usr/share/man/man7/ibacm.7
 usr/share/man/man7/ibacm_prov.7
+usr/share/doc/rdma-core/ibacm.md usr/share/doc/ibacm/
diff --git a/debian/ibverbs-providers.install b/debian/ibverbs-providers.install
index 1b41218..7458540 100644
--- a/debian/ibverbs-providers.install
+++ b/debian/ibverbs-providers.install
@@ -5,3 +5,4 @@ usr/lib/truescale-serdes.cmds
 usr/share/man/man8/rxe_cfg.8
 usr/share/man/man7/rxe.7
 usr/bin/rxe_cfg
+usr/share/doc/rdma-core/rxe.md usr/share/doc/ibverbs-providers/
diff --git a/debian/libibcm1.install b/debian/libibcm1.install
index 6f270b7..a0ace6e 100644
--- a/debian/libibcm1.install
+++ b/debian/libibcm1.install
@@ -1 +1,2 @@
 usr/lib/*/libibcm*.so.*
+usr/share/doc/rdma-core/libibcm.md usr/share/doc/libibcm1/
diff --git a/debian/libibverbs1.install b/debian/libibverbs1.install
index 78a7f76..83bdd80 100644
--- a/debian/libibverbs1.install
+++ b/debian/libibverbs1.install
@@ -1 +1,2 @@
 usr/lib/*/libibverbs*.so.*
+usr/share/doc/rdma-core/libibverbs.md usr/share/doc/libibverbs1/
diff --git a/debian/librdmacm1.install b/debian/librdmacm1.install
index c17048e..09140ab 100644
--- a/debian/librdmacm1.install
+++ b/debian/librdmacm1.install
@@ -1,2 +1,3 @@
 usr/lib/*/librdmacm*.so.*
 usr/lib/*/rsocket/librspreload*.so*
+usr/share/doc/rdma-core/librdmacm.md usr/share/doc/librdmacm1/
diff --git a/debian/srptools.install b/debian/srptools.install
index 17b909d..9c07015 100644
--- a/debian/srptools.install
+++ b/debian/srptools.install
@@ -6,3 +6,4 @@ usr/sbin/ibsrpdm
 usr/sbin/srp_daemon
 usr/share/man/man1/ibsrpdm.1
 usr/share/man/man1/srp_daemon.1
+usr/share/doc/rdma-core/ibsrpdm.md usr/share/doc/srptools/
diff --git a/rdma-core.spec b/rdma-core.spec
index 96c86fe..39be6a0 100644
--- a/rdma-core.spec
+++ b/rdma-core.spec
@@ -85,7 +85,8 @@ This is a simple example without the split sub packages to get things started.
          -DCMAKE_INSTALL_SYSCONFDIR:PATH=%{_sysconfdir} \
 	 -DCMAKE_INSTALL_SYSTEMD_SERVICEDIR:PATH=%{my_unitdir} \
 	 -DCMAKE_INSTALL_INITDDIR:PATH=%{_initrddir} \
-	 -DCMAKE_INSTALL_RUNDIR:PATH=%{_rundir}
+	 -DCMAKE_INSTALL_RUNDIR:PATH=%{_rundir} \
+	 -DCMAKE_INSTALL_DOCDIR:PATH=%{_docdir}/%{name}-%{version}
 %make_jobs
 
 %install
@@ -102,6 +103,8 @@ rm -rf %{buildroot}/%{my_unitdir}/
 
 %files
 %doc %{_mandir}/man*/*
+%doc %{_docdir}/%{name}-%{version}/README.md
+%doc %{_docdir}/%{name}-%{version}/MAINTAINERS
 %{_bindir}/*
 %{_includedir}/*
 %{_libdir}/lib*.so*
@@ -110,6 +113,7 @@ rm -rf %{buildroot}/%{my_unitdir}/
 %{_libdir}/rsocket/*
 %{_sbindir}/*
 %{_libexecdir}/*
+%{_docdir}/%{name}-%{version}/*
 %if 0%{?_unitdir:1}
 %{_unitdir}/*
 %else
-- 
2.10.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-core v2 0/4] rdma-core redhat/ infrastructure
From: Jarod Wilson @ 2016-10-20 15:33 UTC (permalink / raw)
  To: linux-rdma-u79uwXL29TY76Z2rM5mHXA; +Cc: Jarod Wilson
In-Reply-To: <20161014192136.11731-1-jarod-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>

This is a set of patches to make some minor changes to the stock spec file,
add a bunch of userspace glue that Red Hat has been shipping on it's own in
Red Hat Enterprise Linux and Fedora for a while now, and a revised split
package spec file that we're likely to use in at least RHEL, to preserve the
current packaging split, save a change that merges all libibverbs providers
into the libibverbs package itself (with appropriate Obsoletes/Provides).

This second spin incorporates changes that hopefully address all feedback
from the first version of the patchset.

Jarod Wilson (4):
  Install end user focused documentation files
  redhat: add udev/systemd/etc infrastructure bits
  redhat: copy stock spec for RH customization
  redhat/spec: build split rpm packages

 CMakeLists.txt                      |   3 +-
 Documentation/CMakeLists.txt        |  10 +
 debian/ibacm.install                |   1 +
 debian/ibverbs-providers.install    |   1 +
 debian/libibcm1.install             |   1 +
 debian/libibverbs1.install          |   1 +
 debian/librdmacm1.install           |   1 +
 debian/srptools.install             |   1 +
 rdma-core.spec                      |   6 +-
 redhat/ibacm.service                |  12 ++
 redhat/rdma-core.spec               | 417 ++++++++++++++++++++++++++++++++++++
 redhat/rdma.conf                    |  25 +++
 redhat/rdma.cxgb3.sys.modprobe      |   1 +
 redhat/rdma.cxgb4.sys.modprobe      |   1 +
 redhat/rdma.fixup-mtrr.awk          | 160 ++++++++++++++
 redhat/rdma.ifdown-ib               | 183 ++++++++++++++++
 redhat/rdma.ifup-ib                 | 308 ++++++++++++++++++++++++++
 redhat/rdma.kernel-init             | 262 ++++++++++++++++++++++
 redhat/rdma.mlx4-setup.sh           |  91 ++++++++
 redhat/rdma.mlx4.conf               |  27 +++
 redhat/rdma.mlx4.sys.modprobe       |   5 +
 redhat/rdma.mlx4.user.modprobe      |  21 ++
 redhat/rdma.modules-setup.sh        |  30 +++
 redhat/rdma.service                 |  15 ++
 redhat/rdma.sriov-init              | 137 ++++++++++++
 redhat/rdma.sriov-vfs               |  41 ++++
 redhat/rdma.udev-ipoib-naming.rules |  13 ++
 redhat/rdma.udev-rules              |  18 ++
 redhat/srp_daemon.service           |  17 ++
 29 files changed, 1807 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/CMakeLists.txt
 create mode 100644 redhat/ibacm.service
 create mode 100644 redhat/rdma-core.spec
 create mode 100644 redhat/rdma.conf
 create mode 100644 redhat/rdma.cxgb3.sys.modprobe
 create mode 100644 redhat/rdma.cxgb4.sys.modprobe
 create mode 100644 redhat/rdma.fixup-mtrr.awk
 create mode 100644 redhat/rdma.ifdown-ib
 create mode 100644 redhat/rdma.ifup-ib
 create mode 100644 redhat/rdma.kernel-init
 create mode 100644 redhat/rdma.mlx4-setup.sh
 create mode 100644 redhat/rdma.mlx4.conf
 create mode 100644 redhat/rdma.mlx4.sys.modprobe
 create mode 100644 redhat/rdma.mlx4.user.modprobe
 create mode 100644 redhat/rdma.modules-setup.sh
 create mode 100644 redhat/rdma.service
 create mode 100644 redhat/rdma.sriov-init
 create mode 100644 redhat/rdma.sriov-vfs
 create mode 100644 redhat/rdma.udev-ipoib-naming.rules
 create mode 100644 redhat/rdma.udev-rules
 create mode 100644 redhat/srp_daemon.service

-- 
2.10.0

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH v3 0/11] Fix race conditions related to stopping block layer queues
From: Keith Busch @ 2016-10-20 14:52 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Ming Lin,
	Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <25418d7a-7e66-3b99-7532-669f7ebd58a6@sandisk.com>

On Wed, Oct 19, 2016 at 04:51:18PM -0700, Bart Van Assche wrote:
> 
> I assume that line 498 in blk-mq.c corresponds to BUG_ON(blk_queued_rq(rq))?
> Anyway, it seems to me like this is a bug in the NVMe code and also that
> this bug is completely unrelated to my patch series. In nvme_complete_rq() I
> see that blk_mq_requeue_request() is called. I don't think this is allowed
> from the context of nvme_cancel_request() because blk_mq_requeue_request()
> assumes that a request has already been removed from the request list.
> However, neither blk_mq_tagset_busy_iter() nor nvme_cancel_request() remove
> a request from the request list before nvme_complete_rq() is called. I think
> this is what triggers the BUG_ON() statement in blk_mq_requeue_request().
> Have you noticed that e.g. the scsi-mq code only calls
> blk_mq_requeue_request() after __blk_mq_end_request() has finished? Have you
> considered to follow the same approach in nvme_cancel_request()?

Both nvme and scsi requeue through their mp_ops 'complete' callback, so
nvme is similarly waiting for __blk_mq_end_request before requesting to
requeue. The problem, I think, is nvme's IO cancelling path is observing
active requests that it's requeuing from the queue_rq path.

Patch [11/11] kicks the requeue list unconditionally. This restarts queues
the driver had just quiesced a moment before, restarting those requests,
but the driver isn't ready to handle them. When the driver ultimately
unbinds from the device, it requeues those requests a second time.

Either the requeuing can't kick the requeue work when queisced, or the
shutdown needs to quiesce even when it hasn't restarted the queues.
Either patch below appears to fix the issue.

---
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index ccd9cc5..078530c 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -201,7 +201,7 @@ static struct nvme_ns *nvme_get_ns_from_disk(struct gendisk *disk)
 
 void nvme_requeue_req(struct request *req)
 {
-	blk_mq_requeue_request(req, true);
+	blk_mq_requeue_request(req, !blk_mq_queue_stopped(req->q));
 }
 EXPORT_SYMBOL_GPL(nvme_requeue_req);
--

--- 
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4b30fa2..a05da98 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1681,10 +1681,9 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	del_timer_sync(&dev->watchdog_timer);
 
 	mutex_lock(&dev->shutdown_lock);
-	if (pci_is_enabled(to_pci_dev(dev->dev))) {
-		nvme_stop_queues(&dev->ctrl);
+	nvme_stop_queues(&dev->ctrl);
+	if (pci_is_enabled(to_pci_dev(dev->dev)))
 		csts = readl(dev->bar + NVME_REG_CSTS);
-	}
 
 	queues = dev->online_queues - 1;
 	for (i = dev->queue_count - 1; i > 0; i--)
--

^ permalink raw reply related

* Re: [PATCH 0/2] IB/rdmavt: cq ktrhead worker fix and API update
From: Dalessandro, Dennis @ 2016-10-20 12:56 UTC (permalink / raw)
  To: dledford@redhat.com, pmladek@suse.com, Hefty, Sean
  Cc: hal.rosenstock@gmail.com, tj@kernel.org,
	linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org
In-Reply-To: <1476878840-14548-1-git-send-email-pmladek@suse.com>

On Wed, 2016-10-19 at 14:07 +0200, Petr Mladek wrote:
> The kthread worker API has been improved in 4.9-rc1. The 2nd
> patch uses the new functions and simplifies the kthread worker
> creation and destroying.
> 
> I have found a possible race when working on the API conversion.
> A proposed fix is in the 1st patch.
> 
> Both changes are compile tested only. I did not find an easy way
> how to test them at runtime.
> 
> Petr Mladek (2):
>   IB/rdmavt: Avoid queuing work into a destroyed cq kthread worker
>   IB/rdmavt: Handle the kthread worker using the new API
> 
>  drivers/infiniband/sw/rdmavt/cq.c | 64 +++++++++++++++++----------
> ------------
>  1 file changed, 27 insertions(+), 37 deletions(-)

Thanks for the patches. I'm going to take a closer look, I just now
seen these.

-Denny

^ permalink raw reply

* [PATCH rdma-core 6/6] libqedr: addition to consolidated repo
From: Ram Amrani @ 2016-10-20  9:49 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
	Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA, Ram Amrani, Ram Amrani
In-Reply-To: <1476956952-17388-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

From: Ram Amrani <Ram.Amrani-74tsMCuadCbQT0dZR+AlfA@public.gmane.org>

Configure the consolidated repo to build libqedr (qelr).

Signed-off-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
 CMakeLists.txt                | 1 +
 MAINTAINERS                   | 7 +++++++
 README.md                     | 1 +
 providers/qedr/CMakeLists.txt | 5 +++++
 4 files changed, 14 insertions(+)
 create mode 100644 providers/qedr/CMakeLists.txt

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 375859d..fef7f03 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -335,6 +335,7 @@ add_subdirectory(providers/mlx5)
 add_subdirectory(providers/mthca)
 add_subdirectory(providers/nes)
 add_subdirectory(providers/ocrdma)
+add_subdirectory(providers/qedr)
 add_subdirectory(providers/rxe)
 add_subdirectory(providers/rxe/man)
 
diff --git a/MAINTAINERS b/MAINTAINERS
index fb15276..65fad74 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -139,6 +139,13 @@ M:	Devesh Sharma <Devesh.sharma-dY08KVG/lbpWk0Htik3J/w@public.gmane.org>
 S:	Supported
 F:	providers/ocrdma/
 
+QEDR USERSPACE PROVIDER (for qedr.ko)
+M:	Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
+M:	Ariel Elior <Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
+S:	Supported
+F:	providers/qedr/
+P:	Doug Ledford <dledford-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
+
 RXE SOFT ROCEE USERSPACE PROVIDER (for rdma_rxe.ko)
 M:	Moni Shoua <monis-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
 S:	Supported
diff --git a/README.md b/README.md
index 66aee3f..3a13042 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ is included:
  - ib_mthca.ko
  - iw_nes.ko
  - ocrdma.ko
+ - qedr.ko
  - rdma_rxe.ko
 
 Additional service daemons are provided for:
diff --git a/providers/qedr/CMakeLists.txt b/providers/qedr/CMakeLists.txt
new file mode 100644
index 0000000..8d4f3ce
--- /dev/null
+++ b/providers/qedr/CMakeLists.txt
@@ -0,0 +1,5 @@
+rdma_provider(qedr
+  qelr_main.c
+  qelr_verbs.c
+  qelr_chain.c
+  )
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-core 5/6] libqedr: abi
From: Ram Amrani @ 2016-10-20  9:49 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
	Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA, Ram Amrani, Ram Amrani
In-Reply-To: <1476956952-17388-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

From: Ram Amrani <Ram.Amrani-74tsMCuadCbQT0dZR+AlfA@public.gmane.org>

Introducing abi structures that allows interfacing with the kernel.

Signed-off-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
 providers/qedr/qelr_abi.h | 120 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 120 insertions(+)
 create mode 100644 providers/qedr/qelr_abi.h

diff --git a/providers/qedr/qelr_abi.h b/providers/qedr/qelr_abi.h
new file mode 100644
index 0000000..a7a0638
--- /dev/null
+++ b/providers/qedr/qelr_abi.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __QELR_ABI_H__
+#define __QELR_ABI_H__
+
+#include <infiniband/kern-abi.h>
+
+#define QELR_ABI_VERSION			(8)
+
+struct qelr_get_context {
+	struct ibv_get_context cmd;		/* must be first */
+};
+
+struct qelr_alloc_ucontext_resp {
+	struct ibv_get_context_resp ibv_resp;	/* must be first */
+	__u64 db_pa;
+	__u32 db_size;
+
+	__u32 max_send_wr;
+	__u32 max_recv_wr;
+	__u32 max_srq_wr;
+	__u32 sges_per_send_wr;
+	__u32 sges_per_recv_wr;
+	__u32 sges_per_srq_wr;
+	__u32 max_cqes;
+};
+
+struct qelr_alloc_pd_req {
+	struct ibv_alloc_pd cmd;		/* must be first */
+};
+
+struct qelr_alloc_pd_resp {
+	struct ibv_alloc_pd_resp ibv_resp;	/* must be first */
+	__u32 pd_id;
+};
+
+struct qelr_create_cq_req {
+	struct ibv_create_cq ibv_cmd;		/* must be first */
+
+	__u64 addr;	/* user space virtual address of CQ buffer */
+	__u64 len;	/* size of CQ buffer */
+};
+
+struct qelr_create_cq_resp {
+	struct ibv_create_cq_resp ibv_resp;	/* must be first */
+	__u32 db_offset;
+	__u16 icid;
+};
+
+struct qelr_reg_mr {
+	struct ibv_reg_mr ibv_cmd;		/* must be first */
+};
+
+struct qelr_reg_mr_resp {
+	struct ibv_reg_mr_resp ibv_resp;	/* must be first */
+};
+
+struct qelr_create_qp_req {
+	struct ibv_create_qp ibv_qp;	/* must be first */
+
+	__u32 qp_handle_hi;
+	__u32 qp_handle_lo;
+
+	/* SQ */
+	__u64 sq_addr;	/* user space virtual address of SQ buffer */
+	__u64 sq_len;		/* length of SQ buffer */
+
+	/* RQ */
+	__u64 rq_addr;	/* user space virtual address of RQ buffer */
+	__u64 rq_len;		/* length of RQ buffer */
+};
+
+struct qelr_create_qp_resp {
+	struct ibv_create_qp_resp ibv_resp;	/* must be first */
+
+	__u32 qp_id;
+	__u32 atomic_supported;
+
+	/* SQ */
+	__u32 sq_db_offset;
+	__u16 sq_icid;
+
+	/* RQ */
+	__u32 rq_db_offset;
+	__u16 rq_icid;
+
+	__u32 rq_db2_offset;
+};
+
+#endif /* __QELR_ABI_H__ */
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-core 4/6] libqedr: main
From: Ram Amrani @ 2016-10-20  9:49 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
	Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA, Ram Amrani, Ram Amrani
In-Reply-To: <1476956952-17388-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

From: Ram Amrani <Ram.Amrani-74tsMCuadCbQT0dZR+AlfA@public.gmane.org>

Introducing main, responsible for initializing the driver
and allocating the user context.

Signed-off-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
 providers/qedr/qelr.h      | 320 +++++++++++++++++++++++++++++++++++++++++++++
 providers/qedr/qelr_main.c | 286 ++++++++++++++++++++++++++++++++++++++++
 providers/qedr/qelr_main.h |  83 ++++++++++++
 3 files changed, 689 insertions(+)
 create mode 100644 providers/qedr/qelr.h
 create mode 100644 providers/qedr/qelr_main.c
 create mode 100644 providers/qedr/qelr_main.h

diff --git a/providers/qedr/qelr.h b/providers/qedr/qelr.h
new file mode 100644
index 0000000..e321195
--- /dev/null
+++ b/providers/qedr/qelr.h
@@ -0,0 +1,320 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __QELR_H__
+#define __QELR_H__
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <endian.h>
+#include <ccan/minmax.h>
+
+#include <infiniband/driver.h>
+#include <infiniband/arch.h>
+
+#define writel(b, p) (*(uint32_t *)(p) = (b))
+#define writeq(b, p) (*(uint64_t *)(p) = (b))
+
+#include "qelr_hsi.h"
+#include "qelr_chain.h"
+
+#define qelr_err(format, arg...) printf(format, ##arg)
+
+extern uint32_t qelr_dp_level;
+extern uint32_t qelr_dp_module;
+
+enum DP_MODULE {
+	QELR_MSG_CQ		= 0x10000,
+	QELR_MSG_RQ		= 0x20000,
+	QELR_MSG_SQ		= 0x40000,
+	QELR_MSG_QP		= (QELR_MSG_SQ | QELR_MSG_RQ),
+	QELR_MSG_MR		= 0x80000,
+	QELR_MSG_INIT		= 0x100000,
+	/* to be added...up to 0x8000000 */
+};
+
+enum DP_LEVEL {
+	QELR_LEVEL_VERBOSE	= 0x0,
+	QELR_LEVEL_INFO		= 0x1,
+	QELR_LEVEL_NOTICE	= 0x2,
+	QELR_LEVEL_ERR		= 0x3,
+};
+
+#define DP_ERR(fd, fmt, ...)					\
+do {								\
+	fprintf(fd, "[%s:%d]" fmt,				\
+		__func__, __LINE__,				\
+		##__VA_ARGS__);					\
+	fflush(fd); \
+} while (0)
+
+#define DP_NOTICE(fd, fmt, ...)					\
+do {								\
+	if (qelr_dp_level <= QELR_LEVEL_NOTICE)	{\
+		fprintf(fd, "[%s:%d]" fmt,			\
+		      __func__, __LINE__,			\
+		      ##__VA_ARGS__);				\
+		      fflush(fd); }				\
+} while (0)
+
+#define DP_INFO(fd, fmt, ...)					\
+do {								\
+	if (qelr_dp_level <= QELR_LEVEL_INFO)	{		\
+		fprintf(fd, "[%s:%d]" fmt,			\
+		      __func__, __LINE__,			\
+		      ##__VA_ARGS__); fflush(fd);		\
+	}							\
+} while (0)
+
+#define DP_VERBOSE(fd, module, fmt, ...)			\
+do {								\
+	if ((qelr_dp_level <= QELR_LEVEL_VERBOSE) &&		\
+		     (qelr_dp_module & (module))) {		\
+		fprintf(fd, "[%s:%d]" fmt,			\
+		      __func__, __LINE__,			\
+		      ##__VA_ARGS__);	fflush(fd); }		\
+} while (0)
+
+#define ROUND_UP_X(_val, _x) \
+	(((unsigned long)(_val) + ((_x)-1)) & (long)~((_x)-1))
+
+struct qelr_buf {
+	void		*addr;
+	size_t		len;		/* a 64 uint is used as s preparation
+					 * for double layer pbl.
+					 */
+};
+
+struct qelr_device {
+	struct ibv_device ibv_dev;
+};
+
+struct qelr_devctx {
+	struct ibv_context	ibv_ctx;
+	FILE			*dbg_fp;
+	void			*db_addr;
+	uint64_t		db_pa;
+	uint32_t		db_size;
+	uint8_t			disable_edpm;
+	uint32_t		kernel_page_size;
+
+	uint32_t		max_send_wr;
+	uint32_t		max_recv_wr;
+	uint32_t		sges_per_send_wr;
+	uint32_t		sges_per_recv_wr;
+	int			max_cqes;
+};
+
+struct qelr_pd {
+	struct ibv_pd		ibv_pd;
+	uint32_t		pd_id;
+};
+
+struct qelr_mr {
+	struct ibv_mr		ibv_mr;
+};
+
+union db_prod64 {
+	struct rdma_pwm_val32_data data;
+	uint64_t raw;
+};
+
+struct qelr_cq {
+	struct ibv_cq		ibv_cq;	/* must be first */
+
+	struct qelr_chain	chain;
+
+	void			*db_addr;
+	union db_prod64		db;
+
+	uint8_t			chain_toggle;
+	union rdma_cqe		*latest_cqe;
+	union rdma_cqe		*toggle_cqe;
+
+	uint8_t			arm_flags;
+};
+
+enum qelr_qp_state {
+	QELR_QPS_RST,
+	QELR_QPS_INIT,
+	QELR_QPS_RTR,
+	QELR_QPS_RTS,
+	QELR_QPS_SQD,
+	QELR_QPS_ERR,
+	QELR_QPS_SQE
+};
+
+union db_prod32 {
+	struct rdma_pwm_val16_data	data;
+	uint32_t			raw;
+};
+
+struct qelr_qp_hwq_info {
+	/* WQE */
+	struct qelr_chain			chain;
+	uint8_t					max_sges;
+
+	/* WQ */
+	uint16_t				prod;
+	uint16_t				wqe_cons;
+	uint16_t				cons;
+	uint16_t				max_wr;
+
+	/* DB */
+	void					*db;      /* Doorbell address */
+	void					*edpm_db;
+	union db_prod32				db_data;  /* Doorbell data */
+
+	uint16_t				icid;
+};
+
+struct qelr_rdma_ext {
+	uint64_t remote_va;
+	uint32_t remote_key;
+	uint32_t dma_length;
+};
+
+/* rdma extension, invalidate / immediate data + padding, inline data... */
+#define QELR_MAX_DPM_PAYLOAD (sizeof(struct qelr_rdma_ext) + sizeof(uint64_t) +\
+			       ROCE_REQ_MAX_INLINE_DATA_SIZE)
+struct qelr_edpm {
+	union {
+		struct db_roce_dpm_data	data;
+		uint64_t raw;
+	} msg;
+
+	uint8_t			dpm_payload[QELR_MAX_DPM_PAYLOAD];
+	uint32_t		dpm_payload_size;
+	uint32_t		dpm_payload_offset;
+	uint8_t			is_edpm;
+	struct qelr_rdma_ext    *rdma_ext;
+};
+
+struct qelr_qp {
+	struct ibv_qp				ibv_qp;
+	pthread_spinlock_t			q_lock;
+	enum qelr_qp_state			state;   /*  QP state */
+
+	struct qelr_qp_hwq_info			sq;
+	struct qelr_qp_hwq_info			rq;
+	struct {
+		uint64_t wr_id;
+		enum ibv_wc_opcode opcode;
+		uint32_t bytes_len;
+		uint8_t wqe_size;
+		uint8_t signaled;
+	} *wqe_wr_id;
+
+	struct {
+		uint64_t wr_id;
+		uint8_t wqe_size;
+	} *rqe_wr_id;
+
+	struct qelr_edpm			edpm;
+	uint8_t					prev_wqe_size;
+	uint32_t				max_inline_data;
+	uint32_t				qp_id;
+	int					sq_sig_all;
+	int					atomic_supported;
+
+};
+
+static inline struct qelr_devctx *get_qelr_ctx(struct ibv_context *ibctx)
+{
+	return container_of(ibctx, struct qelr_devctx, ibv_ctx);
+}
+
+static inline struct qelr_device *get_qelr_dev(struct ibv_device *ibdev)
+{
+	return container_of(ibdev, struct qelr_device, ibv_dev);
+}
+
+static inline struct qelr_qp *get_qelr_qp(struct ibv_qp *ibqp)
+{
+	return container_of(ibqp, struct qelr_qp, ibv_qp);
+}
+
+static inline struct qelr_pd *get_qelr_pd(struct ibv_pd *ibpd)
+{
+	return container_of(ibpd, struct qelr_pd, ibv_pd);
+}
+
+static inline struct qelr_cq *get_qelr_cq(struct ibv_cq *ibcq)
+{
+	return container_of(ibcq, struct qelr_cq, ibv_cq);
+}
+
+#define SET_FIELD(value, name, flag)				\
+	do {							\
+		(value) &= ~(name ## _MASK << name ## _SHIFT);	\
+		(value) |= ((flag) << (name ## _SHIFT));	\
+	} while (0)
+
+#define SET_FIELD2(value, name, flag)				\
+		((value) |= ((flag) << (name ## _SHIFT)))
+
+#define GET_FIELD(value, name) \
+	(((value) >> (name ## _SHIFT)) & name ## _MASK)
+
+#define ROCE_WQE_ELEM_SIZE	sizeof(struct rdma_sq_sge)
+
+#define QELR_RESP_IMM (RDMA_CQE_RESPONDER_IMM_FLG_MASK <<	\
+			RDMA_CQE_RESPONDER_IMM_FLG_SHIFT)
+#define QELR_RESP_RDMA (RDMA_CQE_RESPONDER_RDMA_FLG_MASK <<	\
+			RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT)
+#define QELR_RESP_RDMA_IMM (QELR_RESP_IMM | QELR_RESP_RDMA)
+
+#define round_up(_val, _x) \
+	(((unsigned long)(_val) + ((_x)-1)) & (long)~((_x)-1))
+
+#define TYPEPTR_ADDR_SET(type_ptr, field, vaddr)			\
+	do {								\
+		(type_ptr)->field.hi = htole32(U64_HI(vaddr));	\
+		(type_ptr)->field.lo = htole32(U64_LO(vaddr));	\
+	} while (0)
+
+#define RQ_SGE_SET(sge, vaddr, vlength, vflags)			\
+	do {							\
+		TYPEPTR_ADDR_SET(sge, addr, vaddr);		\
+		(sge)->length = htole32(vlength);		\
+		(sge)->flags = htole32(vflags);		\
+	} while (0)
+
+#define U64_HI(val) ((uint32_t)(((uint64_t)(val)) >> 32))
+#define U64_LO(val) ((uint32_t)(((uint64_t)(val)) & 0xffffffff))
+#define HILO_U64(hi, lo)		((((uint64_t)(hi)) << 32) + (lo))
+
+#define QELR_MAX_RQ_WQE_SIZE (RDMA_MAX_SGE_PER_RQ_WQE)
+#define QELR_MAX_SQ_WQE_SIZE (ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE /	\
+			      ROCE_WQE_ELEM_SIZE)
+
+#endif /* __QELR_H__ */
diff --git a/providers/qedr/qelr_main.c b/providers/qedr/qelr_main.c
new file mode 100644
index 0000000..386d7e6
--- /dev/null
+++ b/providers/qedr/qelr_main.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <config.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <pthread.h>
+
+#include "qelr.h"
+#include "qelr_main.h"
+#include "qelr_abi.h"
+#include "qelr_chain.h"
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#define PCI_VENDOR_ID_QLOGIC           (0x1077)
+#define PCI_DEVICE_ID_QLOGIC_57980S    (0x1629)
+#define PCI_DEVICE_ID_QLOGIC_57980S_40 (0x1634)
+#define PCI_DEVICE_ID_QLOGIC_57980S_10 (0x1666)
+#define PCI_DEVICE_ID_QLOGIC_57980S_MF (0x1636)
+#define PCI_DEVICE_ID_QLOGIC_57980S_100 (0x1644)
+#define PCI_DEVICE_ID_QLOGIC_57980S_50  (0x1654)
+#define PCI_DEVICE_ID_QLOGIC_57980S_25  (0x1656)
+#define PCI_DEVICE_ID_QLOGIC_57980S_IOV (0x1664)
+#define PCI_DEVICE_ID_QLOGIC_AH_50G     (0x8070)
+#define PCI_DEVICE_ID_QLOGIC_AH_10G     (0x8071)
+#define PCI_DEVICE_ID_QLOGIC_AH_40G     (0x8072)
+#define PCI_DEVICE_ID_QLOGIC_AH_25G     (0x8073)
+#define PCI_DEVICE_ID_QLOGIC_AH_IOV     (0x8090)
+
+uint32_t qelr_dp_level;
+uint32_t qelr_dp_module;
+
+#define QHCA(d)					\
+	{ .vendor = PCI_VENDOR_ID_QLOGIC,	\
+	  .device = PCI_DEVICE_ID_QLOGIC_##d }
+
+struct {
+	unsigned int vendor;
+	unsigned int device;
+} hca_table[] = {
+	QHCA(57980S),
+	QHCA(57980S_40),
+	QHCA(57980S_10),
+	QHCA(57980S_MF),
+	QHCA(57980S_100),
+	QHCA(57980S_50),
+	QHCA(57980S_25),
+	QHCA(57980S_IOV),
+	QHCA(AH_50G),
+	QHCA(AH_10G),
+	QHCA(AH_40G),
+	QHCA(AH_25G),
+	QHCA(AH_IOV),
+};
+
+static struct ibv_context *qelr_alloc_context(struct ibv_device *, int);
+static void qelr_free_context(struct ibv_context *);
+
+static struct ibv_context_ops qelr_ctx_ops = {
+	.query_device = qelr_query_device,
+	.query_port = qelr_query_port,
+	.alloc_pd = qelr_alloc_pd,
+	.dealloc_pd = qelr_dealloc_pd,
+	.reg_mr = qelr_reg_mr,
+	.dereg_mr = qelr_dereg_mr,
+	.create_cq = qelr_create_cq,
+	.poll_cq = qelr_poll_cq,
+	.req_notify_cq = qelr_arm_cq,
+	.cq_event = qelr_cq_event,
+	.destroy_cq = qelr_destroy_cq,
+	.create_qp = qelr_create_qp,
+	.query_qp = qelr_query_qp,
+	.modify_qp = qelr_modify_qp,
+	.destroy_qp = qelr_destroy_qp,
+	.post_send = qelr_post_send,
+	.post_recv = qelr_post_recv,
+	.async_event = qelr_async_event,
+};
+
+static struct ibv_device_ops qelr_dev_ops = {
+	.alloc_context = qelr_alloc_context,
+	.free_context = qelr_free_context
+};
+
+static void qelr_open_debug_file(struct qelr_devctx *ctx)
+{
+	char *env;
+
+	env = getenv("QELR_DEBUG_FILE");
+	if (!env) {
+		ctx->dbg_fp = stderr;
+		DP_VERBOSE(ctx->dbg_fp, QELR_MSG_INIT,
+			   "Debug file opened: stderr\n");
+		return;
+	}
+
+	ctx->dbg_fp = fopen(env, "aw+");
+	if (!ctx->dbg_fp) {
+		fprintf(stderr, "Failed opening debug file %s, using stderr\n",
+			env);
+		ctx->dbg_fp = stderr;
+		DP_VERBOSE(ctx->dbg_fp, QELR_MSG_INIT,
+			   "Debug file opened: stderr\n");
+		return;
+	}
+
+	DP_VERBOSE(ctx->dbg_fp, QELR_MSG_INIT, "Debug file opened: %s\n", env);
+}
+
+static void qelr_close_debug_file(struct qelr_devctx *ctx)
+{
+	if (ctx->dbg_fp && ctx->dbg_fp != stderr)
+		fclose(ctx->dbg_fp);
+}
+
+static void qelr_set_debug_mask(void)
+{
+	char *env;
+
+	qelr_dp_level = QELR_LEVEL_NOTICE;
+	qelr_dp_module = 0;
+
+	env = getenv("QELR_DP_LEVEL");
+	if (env)
+		qelr_dp_level = atoi(env);
+
+	env = getenv("QELR_DP_MODULE");
+	if (env)
+		qelr_dp_module = atoi(env);
+}
+
+static struct ibv_context *qelr_alloc_context(struct ibv_device *ibdev,
+					      int cmd_fd)
+{
+	struct qelr_devctx *ctx;
+	struct qelr_get_context cmd;
+	struct qelr_alloc_ucontext_resp resp;
+
+	ctx = calloc(1, sizeof(struct qelr_devctx));
+	if (!ctx)
+		return NULL;
+	memset(&resp, 0, sizeof(resp));
+
+	ctx->ibv_ctx.cmd_fd = cmd_fd;
+
+	qelr_open_debug_file(ctx);
+	qelr_set_debug_mask();
+
+	if (ibv_cmd_get_context(&ctx->ibv_ctx,
+				(struct ibv_get_context *)&cmd, sizeof(cmd),
+				&resp.ibv_resp, sizeof(resp)))
+		goto cmd_err;
+
+	ctx->kernel_page_size = sysconf(_SC_PAGESIZE);
+	ctx->ibv_ctx.device = ibdev;
+	ctx->ibv_ctx.ops = qelr_ctx_ops;
+	ctx->db_pa = resp.db_pa;
+	ctx->db_size = resp.db_size;
+	ctx->max_send_wr = resp.max_send_wr;
+	ctx->max_recv_wr = resp.max_recv_wr;
+	ctx->sges_per_send_wr = resp.sges_per_send_wr;
+	ctx->sges_per_recv_wr = resp.sges_per_recv_wr;
+	ctx->max_cqes = resp.max_cqes;
+
+	ctx->db_addr = mmap(NULL, ctx->db_size, PROT_WRITE, MAP_SHARED,
+			    cmd_fd, ctx->db_pa);
+
+	if (ctx->db_addr == MAP_FAILED) {
+		int errsv = errno;
+
+		DP_ERR(ctx->dbg_fp,
+		       "alloc context: doorbell mapping failed resp.db_pa = %llx resp.db_size=%d context->cmd_fd=%d errno=%d\n",
+		       resp.db_pa, resp.db_size, cmd_fd, errsv);
+		goto cmd_err;
+	}
+
+	return &ctx->ibv_ctx;
+
+cmd_err:
+	qelr_err("%s: Failed to allocate context for device.\n", __func__);
+	qelr_close_debug_file(ctx);
+	free(ctx);
+	return NULL;
+}
+
+static void qelr_free_context(struct ibv_context *ibctx)
+{
+	struct qelr_devctx *ctx = get_qelr_ctx(ibctx);
+
+	if (ctx->db_addr)
+		munmap(ctx->db_addr, ctx->db_size);
+
+	qelr_close_debug_file(ctx);
+	free(ctx);
+}
+
+struct ibv_device *qelr_driver_init(const char *uverbs_sys_path,
+				    int abi_version)
+{
+	char value[16];
+	struct qelr_device *dev;
+	unsigned int vendor, device;
+	int i;
+
+	if (ibv_read_sysfs_file(uverbs_sys_path, "device/vendor",
+				value, sizeof(value)) < 0)
+		return NULL;
+
+	sscanf(value, "%i", &vendor);
+
+	if (ibv_read_sysfs_file(uverbs_sys_path, "device/device",
+				value, sizeof(value)) < 0)
+		return NULL;
+
+	sscanf(value, "%i", &device);
+
+	for (i = 0; i < sizeof(hca_table) / sizeof(hca_table[0]); ++i)
+		if (vendor == hca_table[i].vendor &&
+		    device == hca_table[i].device)
+			goto found;
+
+	return NULL;
+found:
+	if (abi_version != QELR_ABI_VERSION) {
+		fprintf(stderr,
+			"Fatal: libqedr ABI version %d of %s is not supported.\n",
+			abi_version, uverbs_sys_path);
+		return NULL;
+	}
+
+	dev = malloc(sizeof(*dev));
+	if (!dev) {
+		qelr_err("%s() Fatal: fail allocate device for libqedr\n",
+			 __func__);
+		return NULL;
+	}
+
+	bzero(dev, sizeof(*dev));
+
+	dev->ibv_dev.ops = qelr_dev_ops;
+
+	return &dev->ibv_dev;
+}
+
+static __attribute__ ((constructor))
+void qelr_register_driver(void)
+{
+	ibv_register_driver("qelr", qelr_driver_init);
+}
diff --git a/providers/qedr/qelr_main.h b/providers/qedr/qelr_main.h
new file mode 100644
index 0000000..1f65be6
--- /dev/null
+++ b/providers/qedr/qelr_main.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __QELR_MAIN_H__
+#define __QELR_MAIN_H__
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <endian.h>
+
+#include <infiniband/driver.h>
+#include <infiniband/arch.h>
+
+struct ibv_device *qelr_driver_init(const char *, int);
+
+int qelr_query_device(struct ibv_context *, struct ibv_device_attr *);
+int qelr_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
+
+struct ibv_pd *qelr_alloc_pd(struct ibv_context *);
+int qelr_dealloc_pd(struct ibv_pd *);
+
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *, void *, size_t,
+			   int ibv_access_flags);
+int qelr_dereg_mr(struct ibv_mr *);
+
+struct ibv_cq *qelr_create_cq(struct ibv_context *, int,
+			      struct ibv_comp_channel *, int);
+int qelr_destroy_cq(struct ibv_cq *);
+int qelr_poll_cq(struct ibv_cq *, int, struct ibv_wc *);
+void qelr_cq_event(struct ibv_cq *);
+int qelr_arm_cq(struct ibv_cq *, int);
+
+int qelr_query_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr);
+int qelr_modify_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr,
+		    int attr_mask);
+struct ibv_srq *qelr_create_srq(struct ibv_pd *, struct ibv_srq_init_attr *);
+int qelr_destroy_srq(struct ibv_srq *ibv_srq);
+int qelr_post_srq_recv(struct ibv_srq *, struct ibv_recv_wr *,
+		       struct ibv_recv_wr **bad_wr);
+
+struct ibv_qp *qelr_create_qp(struct ibv_pd *, struct ibv_qp_init_attr *);
+int qelr_modify_qp(struct ibv_qp *, struct ibv_qp_attr *,
+		   int ibv_qp_attr_mask);
+int qelr_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
+		  struct ibv_qp_init_attr *init_attr);
+int qelr_destroy_qp(struct ibv_qp *);
+
+int qelr_post_send(struct ibv_qp *, struct ibv_send_wr *,
+		   struct ibv_send_wr **);
+int qelr_post_recv(struct ibv_qp *, struct ibv_recv_wr *,
+		   struct ibv_recv_wr **);
+
+void qelr_async_event(struct ibv_async_event *event);
+#endif /* __QELR_MAIN_H__ */
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-core 3/6] libqedr: HSI
From: Ram Amrani @ 2016-10-20  9:49 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
	Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA, Ram Amrani
In-Reply-To: <1476956952-17388-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

From: Ram Amrani <Ram.Amrani-74tsMCuadCbQT0dZR+AlfA@public.gmane.org>

Introduce the HSI that allows interfacing directly with the NIC.

Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
 providers/qedr/common_hsi.h    | 1502 ++++++++++++++++++++++++++++++++++++++++
 providers/qedr/qelr_hsi.h      |   67 ++
 providers/qedr/qelr_hsi_rdma.h |  914 ++++++++++++++++++++++++
 providers/qedr/rdma_common.h   |   74 ++
 providers/qedr/roce_common.h   |   50 ++
 5 files changed, 2607 insertions(+)
 create mode 100644 providers/qedr/common_hsi.h
 create mode 100644 providers/qedr/qelr_hsi.h
 create mode 100644 providers/qedr/qelr_hsi_rdma.h
 create mode 100644 providers/qedr/rdma_common.h
 create mode 100644 providers/qedr/roce_common.h

diff --git a/providers/qedr/common_hsi.h b/providers/qedr/common_hsi.h
new file mode 100644
index 0000000..8027866
--- /dev/null
+++ b/providers/qedr/common_hsi.h
@@ -0,0 +1,1502 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __COMMON_HSI__
+#define __COMMON_HSI__
+/********************************/
+/* PROTOCOL COMMON FW CONSTANTS */
+/********************************/
+
+/* Temporarily here should be added to HSI automatically by resource allocation tool.*/
+#define T_TEST_AGG_INT_TEMP    6
+#define	M_TEST_AGG_INT_TEMP    8
+#define	U_TEST_AGG_INT_TEMP    6
+#define	X_TEST_AGG_INT_TEMP    14
+#define	Y_TEST_AGG_INT_TEMP    4
+#define	P_TEST_AGG_INT_TEMP    4
+
+#define X_FINAL_CLEANUP_AGG_INT  1
+
+#define EVENT_RING_PAGE_SIZE_BYTES          4096
+
+#define NUM_OF_GLOBAL_QUEUES				128
+#define COMMON_QUEUE_ENTRY_MAX_BYTE_SIZE	64
+
+#define ISCSI_CDU_TASK_SEG_TYPE       0
+#define FCOE_CDU_TASK_SEG_TYPE        0
+#define RDMA_CDU_TASK_SEG_TYPE        1
+
+#define FW_ASSERT_GENERAL_ATTN_IDX    32
+
+#define MAX_PINNED_CCFC			32
+
+#define EAGLE_ENG1_WORKAROUND_NIG_FLOWCTRL_MODE	3
+
+/* Queue Zone sizes in bytes */
+#define TSTORM_QZONE_SIZE    8	 /*tstorm_scsi_queue_zone*/
+#define MSTORM_QZONE_SIZE    16  /*mstorm_eth_queue_zone. Used only for RX producer of VFs in backward compatibility mode.*/
+#define USTORM_QZONE_SIZE    8	 /*ustorm_eth_queue_zone*/
+#define XSTORM_QZONE_SIZE    8	 /*xstorm_eth_queue_zone*/
+#define YSTORM_QZONE_SIZE    0
+#define PSTORM_QZONE_SIZE    0
+
+#define MSTORM_VF_ZONE_DEFAULT_SIZE_LOG       7     /*Log of mstorm default VF zone size.*/
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DEFAULT  16    /*Maximum number of RX queues that can be allocated to VF by default*/
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DOUBLE   48    /*Maximum number of RX queues that can be allocated to VF with doubled VF zone size. Up to 96 VF supported in this mode*/
+#define ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD     112   /*Maximum number of RX queues that can be allocated to VF with 4 VF zone size. Up to 48 VF supported in this mode*/
+
+
+/********************************/
+/* CORE (LIGHT L2) FW CONSTANTS */
+/********************************/
+
+#define CORE_LL2_MAX_RAMROD_PER_CON				8
+#define CORE_LL2_TX_BD_PAGE_SIZE_BYTES			4096
+#define CORE_LL2_RX_BD_PAGE_SIZE_BYTES			4096
+#define CORE_LL2_RX_CQE_PAGE_SIZE_BYTES			4096
+#define CORE_LL2_RX_NUM_NEXT_PAGE_BDS			1
+
+#define CORE_LL2_TX_MAX_BDS_PER_PACKET				12
+
+#define CORE_SPQE_PAGE_SIZE_BYTES			4096
+
+#define MAX_NUM_LL2_RX_QUEUES					32
+#define MAX_NUM_LL2_TX_STATS_COUNTERS			32
+
+
+///////////////////////////////////////////////////////////////////////////////////////////////////
+// Include firmware verison number only- do not add constants here to avoid redundunt compilations
+///////////////////////////////////////////////////////////////////////////////////////////////////
+
+
+#define FW_MAJOR_VERSION		8
+#define FW_MINOR_VERSION		10
+#define FW_REVISION_VERSION		9
+#define FW_ENGINEERING_VERSION	0
+
+/***********************/
+/* COMMON HW CONSTANTS */
+/***********************/
+
+/* PCI functions */
+#define MAX_NUM_PORTS_K2		(4)
+#define MAX_NUM_PORTS_BB		(2)
+#define MAX_NUM_PORTS			(MAX_NUM_PORTS_K2)
+
+#define MAX_NUM_PFS_K2			(16)
+#define MAX_NUM_PFS_BB			(8)
+#define MAX_NUM_PFS				(MAX_NUM_PFS_K2)
+#define MAX_NUM_OF_PFS_IN_CHIP	(16) /* On both engines */
+
+#define MAX_NUM_VFS_K2			(192)
+#define MAX_NUM_VFS_BB			(120)
+#define MAX_NUM_VFS				(MAX_NUM_VFS_K2)
+
+#define MAX_NUM_FUNCTIONS_BB	(MAX_NUM_PFS_BB + MAX_NUM_VFS_BB)
+#define MAX_NUM_FUNCTIONS_K2	(MAX_NUM_PFS_K2 + MAX_NUM_VFS_K2)
+#define MAX_NUM_FUNCTIONS		(MAX_NUM_PFS + MAX_NUM_VFS)
+
+/* in both BB and K2, the VF number starts from 16. so for arrays containing all */
+/* possible PFs and VFs - we need a constant for this size */
+#define MAX_FUNCTION_NUMBER_BB	(MAX_NUM_PFS + MAX_NUM_VFS_BB)
+#define MAX_FUNCTION_NUMBER_K2	(MAX_NUM_PFS + MAX_NUM_VFS_K2)
+#define MAX_FUNCTION_NUMBER		(MAX_NUM_PFS + MAX_NUM_VFS)
+
+#define MAX_NUM_VPORTS_K2		(208)
+#define MAX_NUM_VPORTS_BB		(160)
+#define MAX_NUM_VPORTS			(MAX_NUM_VPORTS_K2)
+
+#define MAX_NUM_L2_QUEUES_K2	(320)
+#define MAX_NUM_L2_QUEUES_BB	(256)
+#define MAX_NUM_L2_QUEUES		(MAX_NUM_L2_QUEUES_K2)
+
+/* Traffic classes in network-facing blocks (PBF, BTB, NIG, BRB, PRS and QM) */
+// 4-Port K2.
+#define NUM_PHYS_TCS_4PORT_K2	(4)
+#define NUM_OF_PHYS_TCS			(8)
+
+#define NUM_TCS_4PORT_K2		(NUM_PHYS_TCS_4PORT_K2 + 1)
+#define NUM_OF_TCS				(NUM_OF_PHYS_TCS + 1)
+
+#define LB_TC					(NUM_OF_PHYS_TCS)
+
+/* Num of possible traffic priority values */
+#define NUM_OF_PRIO				(8)
+
+#define MAX_NUM_VOQS_K2			(NUM_TCS_4PORT_K2 * MAX_NUM_PORTS_K2)
+#define MAX_NUM_VOQS_BB         (NUM_OF_TCS * MAX_NUM_PORTS_BB)
+#define MAX_NUM_VOQS			(MAX_NUM_VOQS_K2)
+#define MAX_PHYS_VOQS			(NUM_OF_PHYS_TCS * MAX_NUM_PORTS_BB)
+
+/* CIDs */
+#define NUM_OF_CONNECTION_TYPES (8)
+#define NUM_OF_LCIDS			(320)
+#define NUM_OF_LTIDS			(320)
+
+/* Clock values */
+#define MASTER_CLK_FREQ_E4		(375e6)
+#define STORM_CLK_FREQ_E4		(1000e6)
+#define CLK25M_CLK_FREQ_E4		(25e6)
+
+/* Global PXP windows (GTT) */
+#define NUM_OF_GTT			19
+#define GTT_DWORD_SIZE_BITS	10
+#define GTT_BYTE_SIZE_BITS	(GTT_DWORD_SIZE_BITS + 2)
+#define GTT_DWORD_SIZE		(1 << GTT_DWORD_SIZE_BITS)
+
+/* Tools Version */
+#define TOOLS_VERSION 10
+/*****************/
+/* CDU CONSTANTS */
+/*****************/
+
+#define CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT		(17)
+#define CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK		(0x1ffff)
+
+#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_TYPE_SHIFT	(12)
+#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_OFFSET_MASK	(0xfff)
+
+
+/*****************/
+/* DQ CONSTANTS  */
+/*****************/
+
+/* DEMS */
+#define	DQ_DEMS_LEGACY						0
+#define DQ_DEMS_TOE_MORE_TO_SEND			3
+#define DQ_DEMS_TOE_LOCAL_ADV_WND			4
+#define DQ_DEMS_ROCE_CQ_CONS				7
+
+/* XCM agg val selection (HW) */
+#define DQ_XCM_AGG_VAL_SEL_WORD2  0
+#define DQ_XCM_AGG_VAL_SEL_WORD3  1
+#define DQ_XCM_AGG_VAL_SEL_WORD4  2
+#define DQ_XCM_AGG_VAL_SEL_WORD5  3
+#define DQ_XCM_AGG_VAL_SEL_REG3   4
+#define DQ_XCM_AGG_VAL_SEL_REG4   5
+#define DQ_XCM_AGG_VAL_SEL_REG5   6
+#define DQ_XCM_AGG_VAL_SEL_REG6   7
+
+/* XCM agg val selection (FW) */
+#define DQ_XCM_CORE_TX_BD_CONS_CMD          DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_CORE_TX_BD_PROD_CMD          DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_CORE_SPQ_PROD_CMD            DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_ETH_EDPM_NUM_BDS_CMD         DQ_XCM_AGG_VAL_SEL_WORD2
+#define DQ_XCM_ETH_TX_BD_CONS_CMD           DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_ETH_TX_BD_PROD_CMD           DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_ETH_GO_TO_BD_CONS_CMD        DQ_XCM_AGG_VAL_SEL_WORD5
+#define DQ_XCM_FCOE_SQ_CONS_CMD             DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_FCOE_SQ_PROD_CMD             DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_FCOE_X_FERQ_PROD_CMD         DQ_XCM_AGG_VAL_SEL_WORD5
+#define DQ_XCM_ISCSI_SQ_CONS_CMD            DQ_XCM_AGG_VAL_SEL_WORD3
+#define DQ_XCM_ISCSI_SQ_PROD_CMD            DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD   DQ_XCM_AGG_VAL_SEL_REG3
+#define DQ_XCM_ISCSI_EXP_STAT_SN_CMD        DQ_XCM_AGG_VAL_SEL_REG6
+#define DQ_XCM_ROCE_SQ_PROD_CMD             DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_TOE_TX_BD_PROD_CMD           DQ_XCM_AGG_VAL_SEL_WORD4
+#define DQ_XCM_TOE_MORE_TO_SEND_SEQ_CMD     DQ_XCM_AGG_VAL_SEL_REG3
+#define DQ_XCM_TOE_LOCAL_ADV_WND_SEQ_CMD    DQ_XCM_AGG_VAL_SEL_REG4
+
+/* UCM agg val selection (HW) */
+#define DQ_UCM_AGG_VAL_SEL_WORD0  0
+#define DQ_UCM_AGG_VAL_SEL_WORD1  1
+#define DQ_UCM_AGG_VAL_SEL_WORD2  2
+#define DQ_UCM_AGG_VAL_SEL_WORD3  3
+#define DQ_UCM_AGG_VAL_SEL_REG0   4
+#define DQ_UCM_AGG_VAL_SEL_REG1   5
+#define DQ_UCM_AGG_VAL_SEL_REG2   6
+#define DQ_UCM_AGG_VAL_SEL_REG3   7
+
+/* UCM agg val selection (FW) */
+#define DQ_UCM_ETH_PMD_TX_CONS_CMD			DQ_UCM_AGG_VAL_SEL_WORD2
+#define DQ_UCM_ETH_PMD_RX_CONS_CMD			DQ_UCM_AGG_VAL_SEL_WORD3
+#define DQ_UCM_ROCE_CQ_CONS_CMD				DQ_UCM_AGG_VAL_SEL_REG0
+#define DQ_UCM_ROCE_CQ_PROD_CMD				DQ_UCM_AGG_VAL_SEL_REG2
+
+/* TCM agg val selection (HW) */
+#define DQ_TCM_AGG_VAL_SEL_WORD0  0
+#define DQ_TCM_AGG_VAL_SEL_WORD1  1
+#define DQ_TCM_AGG_VAL_SEL_WORD2  2
+#define DQ_TCM_AGG_VAL_SEL_WORD3  3
+#define DQ_TCM_AGG_VAL_SEL_REG1   4
+#define DQ_TCM_AGG_VAL_SEL_REG2   5
+#define DQ_TCM_AGG_VAL_SEL_REG6   6
+#define DQ_TCM_AGG_VAL_SEL_REG9   7
+
+/* TCM agg val selection (FW) */
+#define DQ_TCM_L2B_BD_PROD_CMD				DQ_TCM_AGG_VAL_SEL_WORD1
+#define DQ_TCM_ROCE_RQ_PROD_CMD				DQ_TCM_AGG_VAL_SEL_WORD0
+
+/* XCM agg counter flag selection (HW) */
+#define DQ_XCM_AGG_FLG_SHIFT_BIT14  0
+#define DQ_XCM_AGG_FLG_SHIFT_BIT15  1
+#define DQ_XCM_AGG_FLG_SHIFT_CF12   2
+#define DQ_XCM_AGG_FLG_SHIFT_CF13   3
+#define DQ_XCM_AGG_FLG_SHIFT_CF18   4
+#define DQ_XCM_AGG_FLG_SHIFT_CF19   5
+#define DQ_XCM_AGG_FLG_SHIFT_CF22   6
+#define DQ_XCM_AGG_FLG_SHIFT_CF23   7
+
+/* XCM agg counter flag selection (FW) */
+#define DQ_XCM_CORE_DQ_CF_CMD               (1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_CORE_TERMINATE_CMD           (1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_CORE_SLOW_PATH_CMD           (1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_DQ_CF_CMD                (1 << DQ_XCM_AGG_FLG_SHIFT_CF18)
+#define DQ_XCM_ETH_TERMINATE_CMD            (1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ETH_SLOW_PATH_CMD            (1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ETH_TPH_EN_CMD               (1 << DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_FCOE_SLOW_PATH_CMD           (1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ISCSI_DQ_FLUSH_CMD           (1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_ISCSI_SLOW_PATH_CMD          (1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+#define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD  (1 << DQ_XCM_AGG_FLG_SHIFT_CF23)
+#define DQ_XCM_TOE_DQ_FLUSH_CMD             (1 << DQ_XCM_AGG_FLG_SHIFT_CF19)
+#define DQ_XCM_TOE_SLOW_PATH_CMD            (1 << DQ_XCM_AGG_FLG_SHIFT_CF22)
+
+/* UCM agg counter flag selection (HW) */
+#define DQ_UCM_AGG_FLG_SHIFT_CF0       0
+#define DQ_UCM_AGG_FLG_SHIFT_CF1       1
+#define DQ_UCM_AGG_FLG_SHIFT_CF3       2
+#define DQ_UCM_AGG_FLG_SHIFT_CF4       3
+#define DQ_UCM_AGG_FLG_SHIFT_CF5       4
+#define DQ_UCM_AGG_FLG_SHIFT_CF6       5
+#define DQ_UCM_AGG_FLG_SHIFT_RULE0EN   6
+#define DQ_UCM_AGG_FLG_SHIFT_RULE1EN   7
+
+/* UCM agg counter flag selection (FW) */
+#define DQ_UCM_ETH_PMD_TX_ARM_CMD           (1 << DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_ETH_PMD_RX_ARM_CMD           (1 << DQ_UCM_AGG_FLG_SHIFT_CF5)
+#define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD        (1 << DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_ROCE_CQ_ARM_CF_CMD           (1 << DQ_UCM_AGG_FLG_SHIFT_CF5)
+#define DQ_UCM_TOE_TIMER_STOP_ALL_CMD       (1 << DQ_UCM_AGG_FLG_SHIFT_CF3)
+#define DQ_UCM_TOE_SLOW_PATH_CF_CMD         (1 << DQ_UCM_AGG_FLG_SHIFT_CF4)
+#define DQ_UCM_TOE_DQ_CF_CMD                (1 << DQ_UCM_AGG_FLG_SHIFT_CF5)
+
+/* TCM agg counter flag selection (HW) */
+#define DQ_TCM_AGG_FLG_SHIFT_CF0  0
+#define DQ_TCM_AGG_FLG_SHIFT_CF1  1
+#define DQ_TCM_AGG_FLG_SHIFT_CF2  2
+#define DQ_TCM_AGG_FLG_SHIFT_CF3  3
+#define DQ_TCM_AGG_FLG_SHIFT_CF4  4
+#define DQ_TCM_AGG_FLG_SHIFT_CF5  5
+#define DQ_TCM_AGG_FLG_SHIFT_CF6  6
+#define DQ_TCM_AGG_FLG_SHIFT_CF7  7
+
+/* TCM agg counter flag selection (FW) */
+#define DQ_TCM_FCOE_FLUSH_Q0_CMD            (1 << DQ_TCM_AGG_FLG_SHIFT_CF1)
+#define DQ_TCM_FCOE_DUMMY_TIMER_CMD         (1 << DQ_TCM_AGG_FLG_SHIFT_CF2)
+#define DQ_TCM_FCOE_TIMER_STOP_ALL_CMD      (1 << DQ_TCM_AGG_FLG_SHIFT_CF3)
+#define DQ_TCM_ISCSI_FLUSH_Q0_CMD           (1 << DQ_TCM_AGG_FLG_SHIFT_CF1)
+#define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD     (1 << DQ_TCM_AGG_FLG_SHIFT_CF3)
+#define DQ_TCM_TOE_FLUSH_Q0_CMD             (1 << DQ_TCM_AGG_FLG_SHIFT_CF1)
+#define DQ_TCM_TOE_TIMER_STOP_ALL_CMD       (1 << DQ_TCM_AGG_FLG_SHIFT_CF3)
+#define DQ_TCM_IWARP_POST_RQ_CF_CMD         (1 << DQ_TCM_AGG_FLG_SHIFT_CF1)
+
+/* PWM address mapping */
+#define DQ_PWM_OFFSET_DPM_BASE				0x0
+#define DQ_PWM_OFFSET_DPM_END				0x27
+#define DQ_PWM_OFFSET_XCM16_BASE			0x40
+#define DQ_PWM_OFFSET_XCM32_BASE			0x44
+#define DQ_PWM_OFFSET_UCM16_BASE			0x48
+#define DQ_PWM_OFFSET_UCM32_BASE			0x4C
+#define DQ_PWM_OFFSET_UCM16_4				0x50
+#define DQ_PWM_OFFSET_TCM16_BASE			0x58
+#define DQ_PWM_OFFSET_TCM32_BASE			0x5C
+#define DQ_PWM_OFFSET_XCM_FLAGS				0x68
+#define DQ_PWM_OFFSET_UCM_FLAGS				0x69
+#define DQ_PWM_OFFSET_TCM_FLAGS				0x6B
+
+#define DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD			(DQ_PWM_OFFSET_XCM16_BASE + 2)
+#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT	(DQ_PWM_OFFSET_UCM32_BASE)
+#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_16BIT	(DQ_PWM_OFFSET_UCM16_4)
+#define DQ_PWM_OFFSET_UCM_RDMA_INT_TIMEOUT		(DQ_PWM_OFFSET_UCM16_BASE + 2)
+#define DQ_PWM_OFFSET_UCM_RDMA_ARM_FLAGS		(DQ_PWM_OFFSET_UCM_FLAGS)
+#define DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD			(DQ_PWM_OFFSET_TCM16_BASE + 1)
+#define DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD			(DQ_PWM_OFFSET_TCM16_BASE + 3)
+
+#define DQ_REGION_SHIFT				        (12)
+
+/* DPM */
+#define	DQ_DPM_WQE_BUFF_SIZE			    (320)
+
+// Conn type ranges
+#define DQ_CONN_TYPE_RANGE_SHIFT			(4)
+
+/*****************/
+/* QM CONSTANTS  */
+/*****************/
+
+/* number of TX queues in the QM */
+#define MAX_QM_TX_QUEUES_K2			512
+#define MAX_QM_TX_QUEUES_BB			448
+#define MAX_QM_TX_QUEUES			MAX_QM_TX_QUEUES_K2
+
+/* number of Other queues in the QM */
+#define MAX_QM_OTHER_QUEUES_BB		64
+#define MAX_QM_OTHER_QUEUES_K2		128
+#define MAX_QM_OTHER_QUEUES			MAX_QM_OTHER_QUEUES_K2
+
+/* number of queues in a PF queue group */
+#define QM_PF_QUEUE_GROUP_SIZE		8
+
+/* the size of a single queue element in bytes */
+#define QM_PQ_ELEMENT_SIZE			4
+
+/* base number of Tx PQs in the CM PQ representation.
+   should be used when storing PQ IDs in CM PQ registers and context */
+#define CM_TX_PQ_BASE               0x200
+
+/* number of global Vport/QCN rate limiters */
+#define MAX_QM_GLOBAL_RLS			256
+
+/* QM registers data */
+#define QM_LINE_CRD_REG_WIDTH		16
+#define QM_LINE_CRD_REG_SIGN_BIT	(1 << (QM_LINE_CRD_REG_WIDTH - 1))
+#define QM_BYTE_CRD_REG_WIDTH		24
+#define QM_BYTE_CRD_REG_SIGN_BIT	(1 << (QM_BYTE_CRD_REG_WIDTH - 1))
+#define QM_WFQ_CRD_REG_WIDTH		32
+#define QM_WFQ_CRD_REG_SIGN_BIT		(1 << (QM_WFQ_CRD_REG_WIDTH - 1))
+#define QM_RL_CRD_REG_WIDTH			32
+#define QM_RL_CRD_REG_SIGN_BIT		(1 << (QM_RL_CRD_REG_WIDTH - 1))
+
+/*****************/
+/* CAU CONSTANTS */
+/*****************/
+
+#define CAU_FSM_ETH_RX  0
+#define CAU_FSM_ETH_TX  1
+
+/* Number of Protocol Indices per Status Block */
+#define PIS_PER_SB    12
+
+
+#define CAU_HC_STOPPED_STATE		3			/* fsm is stopped or not valid for this sb */
+#define CAU_HC_DISABLE_STATE		4			/* fsm is working without interrupt coalescing for this sb*/
+#define CAU_HC_ENABLE_STATE			0			/* fsm is working with interrupt coalescing for this sb*/
+
+
+/*****************/
+/* IGU CONSTANTS */
+/*****************/
+
+#define MAX_SB_PER_PATH_K2					(368)
+#define MAX_SB_PER_PATH_BB					(288)
+#define MAX_TOT_SB_PER_PATH					MAX_SB_PER_PATH_K2
+
+#define MAX_SB_PER_PF_MIMD					129
+#define MAX_SB_PER_PF_SIMD					64
+#define MAX_SB_PER_VF						64
+
+/* Memory addresses on the BAR for the IGU Sub Block */
+#define IGU_MEM_BASE						0x0000
+
+#define IGU_MEM_MSIX_BASE					0x0000
+#define IGU_MEM_MSIX_UPPER					0x0101
+#define IGU_MEM_MSIX_RESERVED_UPPER			0x01ff
+
+#define IGU_MEM_PBA_MSIX_BASE				0x0200
+#define IGU_MEM_PBA_MSIX_UPPER				0x0202
+#define IGU_MEM_PBA_MSIX_RESERVED_UPPER		0x03ff
+
+#define IGU_CMD_INT_ACK_BASE				0x0400
+#define IGU_CMD_INT_ACK_UPPER				(IGU_CMD_INT_ACK_BASE + MAX_TOT_SB_PER_PATH - 1)
+#define IGU_CMD_INT_ACK_RESERVED_UPPER		0x05ff
+
+#define IGU_CMD_ATTN_BIT_UPD_UPPER			0x05f0
+#define IGU_CMD_ATTN_BIT_SET_UPPER			0x05f1
+#define IGU_CMD_ATTN_BIT_CLR_UPPER			0x05f2
+
+#define IGU_REG_SISR_MDPC_WMASK_UPPER		0x05f3
+#define IGU_REG_SISR_MDPC_WMASK_LSB_UPPER	0x05f4
+#define IGU_REG_SISR_MDPC_WMASK_MSB_UPPER	0x05f5
+#define IGU_REG_SISR_MDPC_WOMASK_UPPER		0x05f6
+
+#define IGU_CMD_PROD_UPD_BASE				0x0600
+#define IGU_CMD_PROD_UPD_UPPER				(IGU_CMD_PROD_UPD_BASE + MAX_TOT_SB_PER_PATH  - 1)
+#define IGU_CMD_PROD_UPD_RESERVED_UPPER		0x07ff
+
+/*****************/
+/* PXP CONSTANTS */
+/*****************/
+
+/* Bars for Blocks */
+#define PXP_BAR_GRC                                         0
+#define PXP_BAR_TSDM                                        0
+#define PXP_BAR_USDM                                        0
+#define PXP_BAR_XSDM                                        0
+#define PXP_BAR_MSDM                                        0
+#define PXP_BAR_YSDM                                        0
+#define PXP_BAR_PSDM                                        0
+#define PXP_BAR_IGU                                         0
+#define PXP_BAR_DQ                                          1
+
+/* PTT and GTT */
+#define PXP_NUM_PF_WINDOWS                                  12
+#define PXP_PER_PF_ENTRY_SIZE                               8
+#define PXP_NUM_GLOBAL_WINDOWS                              243
+#define PXP_GLOBAL_ENTRY_SIZE                               4
+#define PXP_ADMIN_WINDOW_ALLOWED_LENGTH                     4
+#define PXP_PF_WINDOW_ADMIN_START                           0
+#define PXP_PF_WINDOW_ADMIN_LENGTH                          0x1000
+#define PXP_PF_WINDOW_ADMIN_END                             (PXP_PF_WINDOW_ADMIN_START + PXP_PF_WINDOW_ADMIN_LENGTH - 1)
+#define PXP_PF_WINDOW_ADMIN_PER_PF_START                    0
+#define PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH                   (PXP_NUM_PF_WINDOWS * PXP_PER_PF_ENTRY_SIZE)
+#define PXP_PF_WINDOW_ADMIN_PER_PF_END                      (PXP_PF_WINDOW_ADMIN_PER_PF_START + PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH - 1)
+#define PXP_PF_WINDOW_ADMIN_GLOBAL_START                    0x200
+#define PXP_PF_WINDOW_ADMIN_GLOBAL_LENGTH                   (PXP_NUM_GLOBAL_WINDOWS * PXP_GLOBAL_ENTRY_SIZE)
+#define PXP_PF_WINDOW_ADMIN_GLOBAL_END                      (PXP_PF_WINDOW_ADMIN_GLOBAL_START + PXP_PF_WINDOW_ADMIN_GLOBAL_LENGTH - 1)
+#define PXP_PF_GLOBAL_PRETEND_ADDR                          0x1f0
+#define PXP_PF_ME_OPAQUE_MASK_ADDR                          0xf4
+#define PXP_PF_ME_OPAQUE_ADDR                               0x1f8
+#define PXP_PF_ME_CONCRETE_ADDR                             0x1fc
+
+#define PXP_EXTERNAL_BAR_PF_WINDOW_START                    0x1000
+#define PXP_EXTERNAL_BAR_PF_WINDOW_NUM                      PXP_NUM_PF_WINDOWS
+#define PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE              0x1000
+#define PXP_EXTERNAL_BAR_PF_WINDOW_LENGTH                   (PXP_EXTERNAL_BAR_PF_WINDOW_NUM * PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE)
+#define PXP_EXTERNAL_BAR_PF_WINDOW_END                      (PXP_EXTERNAL_BAR_PF_WINDOW_START + PXP_EXTERNAL_BAR_PF_WINDOW_LENGTH - 1)
+
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START                (PXP_EXTERNAL_BAR_PF_WINDOW_END + 1)
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_NUM                  PXP_NUM_GLOBAL_WINDOWS
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_SINGLE_SIZE          0x1000
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH               (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_NUM * PXP_EXTERNAL_BAR_GLOBAL_WINDOW_SINGLE_SIZE)
+#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_END                  (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1)
+
+/* PF BAR */
+//#define PXP_BAR0_START_GRC 0x1000
+//#define PXP_BAR0_GRC_LENGTH 0xBFF000
+#define PXP_BAR0_START_GRC                      0x0000
+#define PXP_BAR0_GRC_LENGTH                     0x1C00000
+#define PXP_BAR0_END_GRC                        (PXP_BAR0_START_GRC + PXP_BAR0_GRC_LENGTH - 1)
+
+#define PXP_BAR0_START_IGU                      0x1C00000
+#define PXP_BAR0_IGU_LENGTH                     0x10000
+#define PXP_BAR0_END_IGU                        (PXP_BAR0_START_IGU + PXP_BAR0_IGU_LENGTH - 1)
+
+#define PXP_BAR0_START_TSDM                     0x1C80000
+#define PXP_BAR0_SDM_LENGTH                     0x40000
+#define PXP_BAR0_SDM_RESERVED_LENGTH            0x40000
+#define PXP_BAR0_END_TSDM                       (PXP_BAR0_START_TSDM + PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_MSDM                     0x1D00000
+#define PXP_BAR0_END_MSDM                       (PXP_BAR0_START_MSDM + PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_USDM                     0x1D80000
+#define PXP_BAR0_END_USDM                       (PXP_BAR0_START_USDM + PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_XSDM                     0x1E00000
+#define PXP_BAR0_END_XSDM                       (PXP_BAR0_START_XSDM + PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_YSDM                     0x1E80000
+#define PXP_BAR0_END_YSDM                       (PXP_BAR0_START_YSDM + PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_START_PSDM                     0x1F00000
+#define PXP_BAR0_END_PSDM                       (PXP_BAR0_START_PSDM + PXP_BAR0_SDM_LENGTH - 1)
+
+#define PXP_BAR0_FIRST_INVALID_ADDRESS          (PXP_BAR0_END_PSDM + 1)
+
+/* VF BAR */
+#define PXP_VF_BAR0                             0
+
+#define PXP_VF_BAR0_START_GRC                   0x3E00
+#define PXP_VF_BAR0_GRC_LENGTH                  0x200
+#define PXP_VF_BAR0_END_GRC                     (PXP_VF_BAR0_START_GRC + PXP_VF_BAR0_GRC_LENGTH - 1)
+
+#define PXP_VF_BAR0_START_IGU                   0
+#define PXP_VF_BAR0_IGU_LENGTH                  0x3000
+#define PXP_VF_BAR0_END_IGU                     (PXP_VF_BAR0_START_IGU + PXP_VF_BAR0_IGU_LENGTH - 1)
+
+#define PXP_VF_BAR0_START_DQ                    0x3000
+#define PXP_VF_BAR0_DQ_LENGTH                   0x200
+#define PXP_VF_BAR0_DQ_OPAQUE_OFFSET            0
+#define PXP_VF_BAR0_ME_OPAQUE_ADDRESS           (PXP_VF_BAR0_START_DQ + PXP_VF_BAR0_DQ_OPAQUE_OFFSET)
+#define PXP_VF_BAR0_ME_CONCRETE_ADDRESS         (PXP_VF_BAR0_ME_OPAQUE_ADDRESS + 4)
+#define PXP_VF_BAR0_END_DQ                      (PXP_VF_BAR0_START_DQ + PXP_VF_BAR0_DQ_LENGTH - 1)
+
+#define PXP_VF_BAR0_START_TSDM_ZONE_B           0x3200
+#define PXP_VF_BAR0_SDM_LENGTH_ZONE_B           0x200
+#define PXP_VF_BAR0_END_TSDM_ZONE_B             (PXP_VF_BAR0_START_TSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_MSDM_ZONE_B           0x3400
+#define PXP_VF_BAR0_END_MSDM_ZONE_B             (PXP_VF_BAR0_START_MSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_USDM_ZONE_B           0x3600
+#define PXP_VF_BAR0_END_USDM_ZONE_B             (PXP_VF_BAR0_START_USDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_XSDM_ZONE_B           0x3800
+#define PXP_VF_BAR0_END_XSDM_ZONE_B             (PXP_VF_BAR0_START_XSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_YSDM_ZONE_B           0x3a00
+#define PXP_VF_BAR0_END_YSDM_ZONE_B             (PXP_VF_BAR0_START_YSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_PSDM_ZONE_B           0x3c00
+#define PXP_VF_BAR0_END_PSDM_ZONE_B             (PXP_VF_BAR0_START_PSDM_ZONE_B + PXP_VF_BAR0_SDM_LENGTH_ZONE_B - 1)
+
+#define PXP_VF_BAR0_START_SDM_ZONE_A            0x4000
+#define PXP_VF_BAR0_END_SDM_ZONE_A              0x10000
+
+#define PXP_VF_BAR0_GRC_WINDOW_LENGTH           32
+
+#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN          12
+#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER         1024
+
+// ILT Records
+#define PXP_NUM_ILT_RECORDS_BB 7600
+#define PXP_NUM_ILT_RECORDS_K2 11000
+#define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB,PXP_NUM_ILT_RECORDS_K2)
+
+
+// Host Interface
+#define PXP_QUEUES_ZONE_MAX_NUM	320
+
+
+
+
+/*****************/
+/* PRM CONSTANTS */
+/*****************/
+#define PRM_DMA_PAD_BYTES_NUM  2
+/*****************/
+/* SDMs CONSTANTS  */
+/*****************/
+
+
+#define SDM_OP_GEN_TRIG_NONE			0
+#define SDM_OP_GEN_TRIG_WAKE_THREAD		1
+#define SDM_OP_GEN_TRIG_AGG_INT			2
+#define SDM_OP_GEN_TRIG_LOADER			4
+#define SDM_OP_GEN_TRIG_INDICATE_ERROR	6
+#define SDM_OP_GEN_TRIG_RELEASE_THREAD	7
+
+/////////////////////////////////////////////////////////////
+// Completion types
+/////////////////////////////////////////////////////////////
+
+#define SDM_COMP_TYPE_NONE				0
+#define SDM_COMP_TYPE_WAKE_THREAD		1
+#define SDM_COMP_TYPE_AGG_INT			2
+#define SDM_COMP_TYPE_CM				3		// Send direct message to local CM and/or remote CMs. Destinations are defined by vector in CompParams.
+#define SDM_COMP_TYPE_LOADER			4
+#define SDM_COMP_TYPE_PXP				5		// Send direct message to PXP (like "internal write" command) to write to remote Storm RAM via remote SDM
+#define SDM_COMP_TYPE_INDICATE_ERROR	6		// Indicate error per thread
+#define SDM_COMP_TYPE_RELEASE_THREAD	7
+#define SDM_COMP_TYPE_RAM				8		// Write to local RAM as a completion
+
+
+/******************/
+/* PBF CONSTANTS  */
+/******************/
+
+/* Number of PBF command queue lines. Each line is 32B. */
+#define PBF_MAX_CMD_LINES 3328
+
+/* Number of BTB blocks. Each block is 256B. */
+#define BTB_MAX_BLOCKS 1440
+
+/*****************/
+/* PRS CONSTANTS */
+/*****************/
+
+#define PRS_GFT_CAM_LINES_NO_MATCH  31
+
+/*
+ * Async data KCQ CQE
+ */
+struct async_data
+{
+	__le32 cid /* Context ID of the connection */;
+	__le16 itid /* Task Id of the task (for error that happened on a a task) */;
+	uint8_t error_code /* error code - relevant only if the opcode indicates its an error */;
+	uint8_t fw_debug_param /* internal fw debug parameter */;
+};
+
+
+/*
+ * Interrupt coalescing TimeSet
+ */
+struct coalescing_timeset
+{
+	uint8_t value;
+#define COALESCING_TIMESET_TIMESET_MASK  0x7F /* Interrupt coalescing TimeSet (timeout_ticks = TimeSet shl (TimerRes+1)) */
+#define COALESCING_TIMESET_TIMESET_SHIFT 0
+#define COALESCING_TIMESET_VALID_MASK    0x1 /* Only if this flag is set, timeset will take effect */
+#define COALESCING_TIMESET_VALID_SHIFT   7
+};
+
+
+struct common_queue_zone
+{
+	__le16 ring_drv_data_consumer;
+	__le16 reserved;
+};
+
+
+/*
+ * ETH Rx producers data
+ */
+struct eth_rx_prod_data
+{
+	__le16 bd_prod /* BD producer. */;
+	__le16 cqe_prod /* CQE producer. */;
+};
+
+
+struct regpair
+{
+	__le32 lo /* low word for reg-pair */;
+	__le32 hi /* high word for reg-pair */;
+};
+
+/*
+ * Event Ring VF-PF Channel data
+ */
+struct vf_pf_channel_eqe_data
+{
+	struct regpair msg_addr /* VF-PF message address */;
+};
+
+struct iscsi_eqe_data
+{
+	__le32 cid /* Context ID of the connection */;
+	__le16 conn_id /* Task Id of the task (for error that happened on a a task) */;
+	uint8_t error_code /* error code - relevant only if the opcode indicates its an error */;
+	uint8_t error_pdu_opcode_reserved;
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_MASK        0x3F /* The processed PDUs opcode on which happened the error - updated for specific error codes, by defualt=0xFF */
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_SHIFT       0
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_MASK  0x1 /* Indication for driver is the error_pdu_opcode field has valid value */
+#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_SHIFT 6
+#define ISCSI_EQE_DATA_RESERVED0_MASK               0x1
+#define ISCSI_EQE_DATA_RESERVED0_SHIFT              7
+};
+
+/*
+ * Event Ring malicious VF data
+ */
+struct malicious_vf_eqe_data
+{
+	uint8_t vfId /* Malicious VF ID */;
+	uint8_t errId /* Malicious VF error */;
+	__le16 reserved[3];
+};
+
+/*
+ * Event Ring initial cleanup data
+ */
+struct initial_cleanup_eqe_data
+{
+	uint8_t vfId /* VF ID */;
+	uint8_t reserved[7];
+};
+
+/*
+ * Event Data Union
+ */
+union event_ring_data
+{
+	uint8_t bytes[8] /* Byte Array */;
+	struct vf_pf_channel_eqe_data vf_pf_channel /* VF-PF Channel data */;
+	struct iscsi_eqe_data iscsi_info /* Dedicated fields to iscsi data */;
+	struct regpair roceHandle /* Dedicated field for RoCE affiliated asynchronous error */;
+	struct malicious_vf_eqe_data malicious_vf /* Malicious VF data */;
+	struct initial_cleanup_eqe_data vf_init_cleanup /* VF Initial Cleanup data */;
+	struct regpair iwarp_handle /* Host handle for the Async Completions */;
+};
+
+
+/*
+ * Event Ring Entry
+ */
+struct event_ring_entry
+{
+	uint8_t protocol_id /* Event Protocol ID */;
+	uint8_t opcode /* Event Opcode */;
+	__le16 reserved0 /* Reserved */;
+	__le16 echo /* Echo value from ramrod data on the host */;
+	uint8_t fw_return_code /* FW return code for SP ramrods */;
+	uint8_t flags;
+#define EVENT_RING_ENTRY_ASYNC_MASK      0x1 /* 0: synchronous EQE - a completion of SP message. 1: asynchronous EQE */
+#define EVENT_RING_ENTRY_ASYNC_SHIFT     0
+#define EVENT_RING_ENTRY_RESERVED1_MASK  0x7F
+#define EVENT_RING_ENTRY_RESERVED1_SHIFT 1
+	union event_ring_data data;
+};
+
+
+
+
+
+/*
+ * Multi function mode
+ */
+enum mf_mode
+{
+	ERROR_MODE /* Unsupported mode */,
+	MF_OVLAN /* Multi function based on outer VLAN */,
+	MF_NPAR /* Multi function based on MAC address (NIC partitioning) */,
+	MAX_MF_MODE
+};
+
+
+/*
+ * Per-protocol connection types
+ */
+enum protocol_type
+{
+	PROTOCOLID_ISCSI /* iSCSI */,
+	PROTOCOLID_FCOE /* FCoE */,
+	PROTOCOLID_ROCE /* RoCE */,
+	PROTOCOLID_CORE /* Core (light L2, slow path core) */,
+	PROTOCOLID_ETH /* Ethernet */,
+	PROTOCOLID_IWARP /* iWARP */,
+	PROTOCOLID_TOE /* TOE */,
+	PROTOCOLID_PREROCE /* Pre (tapeout) RoCE */,
+	PROTOCOLID_COMMON /* ProtocolCommon */,
+	PROTOCOLID_TCP /* TCP */,
+	MAX_PROTOCOL_TYPE
+};
+
+
+/*
+ * Ustorm Queue Zone
+ */
+struct ustorm_eth_queue_zone
+{
+	struct coalescing_timeset int_coalescing_timeset /* Rx interrupt coalescing TimeSet */;
+	uint8_t reserved[3];
+};
+
+
+struct ustorm_queue_zone
+{
+	struct ustorm_eth_queue_zone eth;
+	struct common_queue_zone common;
+};
+
+
+
+/*
+ * status block structure
+ */
+struct cau_pi_entry
+{
+	__le32 prod;
+#define CAU_PI_ENTRY_PROD_VAL_MASK    0xFFFF /* A per protocol indexPROD value. */
+#define CAU_PI_ENTRY_PROD_VAL_SHIFT   0
+#define CAU_PI_ENTRY_PI_TIMESET_MASK  0x7F /* This value determines the TimeSet that the PI is associated with  */
+#define CAU_PI_ENTRY_PI_TIMESET_SHIFT 16
+#define CAU_PI_ENTRY_FSM_SEL_MASK     0x1 /* Select the FSM within the SB */
+#define CAU_PI_ENTRY_FSM_SEL_SHIFT    23
+#define CAU_PI_ENTRY_RESERVED_MASK    0xFF /* Select the FSM within the SB */
+#define CAU_PI_ENTRY_RESERVED_SHIFT   24
+};
+
+
+/*
+ * status block structure
+ */
+struct cau_sb_entry
+{
+	__le32 data;
+#define CAU_SB_ENTRY_SB_PROD_MASK      0xFFFFFF /* The SB PROD index which is sent to the IGU. */
+#define CAU_SB_ENTRY_SB_PROD_SHIFT     0
+#define CAU_SB_ENTRY_STATE0_MASK       0xF /* RX state */
+#define CAU_SB_ENTRY_STATE0_SHIFT      24
+#define CAU_SB_ENTRY_STATE1_MASK       0xF /* TX state */
+#define CAU_SB_ENTRY_STATE1_SHIFT      28
+	__le32 params;
+#define CAU_SB_ENTRY_SB_TIMESET0_MASK  0x7F /* Indicates the RX TimeSet that this SB is associated with. */
+#define CAU_SB_ENTRY_SB_TIMESET0_SHIFT 0
+#define CAU_SB_ENTRY_SB_TIMESET1_MASK  0x7F /* Indicates the TX TimeSet that this SB is associated with. */
+#define CAU_SB_ENTRY_SB_TIMESET1_SHIFT 7
+#define CAU_SB_ENTRY_TIMER_RES0_MASK   0x3 /* This value will determine the RX FSM timer resolution in ticks  */
+#define CAU_SB_ENTRY_TIMER_RES0_SHIFT  14
+#define CAU_SB_ENTRY_TIMER_RES1_MASK   0x3 /* This value will determine the TX FSM timer resolution in ticks  */
+#define CAU_SB_ENTRY_TIMER_RES1_SHIFT  16
+#define CAU_SB_ENTRY_VF_NUMBER_MASK    0xFF
+#define CAU_SB_ENTRY_VF_NUMBER_SHIFT   18
+#define CAU_SB_ENTRY_VF_VALID_MASK     0x1
+#define CAU_SB_ENTRY_VF_VALID_SHIFT    26
+#define CAU_SB_ENTRY_PF_NUMBER_MASK    0xF
+#define CAU_SB_ENTRY_PF_NUMBER_SHIFT   27
+#define CAU_SB_ENTRY_TPH_MASK          0x1 /* If set then indicates that the TPH STAG is equal to the SB number. Otherwise the STAG will be equal to all ones. */
+#define CAU_SB_ENTRY_TPH_SHIFT         31
+};
+
+
+/*
+ * core doorbell data
+ */
+struct core_db_data
+{
+	uint8_t params;
+#define CORE_DB_DATA_DEST_MASK         0x3 /* destination of doorbell (use enum db_dest) */
+#define CORE_DB_DATA_DEST_SHIFT        0
+#define CORE_DB_DATA_AGG_CMD_MASK      0x3 /* aggregative command to CM (use enum db_agg_cmd_sel) */
+#define CORE_DB_DATA_AGG_CMD_SHIFT     2
+#define CORE_DB_DATA_BYPASS_EN_MASK    0x1 /* enable QM bypass */
+#define CORE_DB_DATA_BYPASS_EN_SHIFT   4
+#define CORE_DB_DATA_RESERVED_MASK     0x1
+#define CORE_DB_DATA_RESERVED_SHIFT    5
+#define CORE_DB_DATA_AGG_VAL_SEL_MASK  0x3 /* aggregative value selection */
+#define CORE_DB_DATA_AGG_VAL_SEL_SHIFT 6
+	uint8_t agg_flags /* bit for every DQ counter flags in CM context that DQ can increment */;
+	__le16 spq_prod;
+};
+
+
+/*
+ * Enum of doorbell aggregative command selection
+ */
+enum db_agg_cmd_sel
+{
+	DB_AGG_CMD_NOP /* No operation */,
+	DB_AGG_CMD_SET /* Set the value */,
+	DB_AGG_CMD_ADD /* Add the value */,
+	DB_AGG_CMD_MAX /* Set max of current and new value */,
+	MAX_DB_AGG_CMD_SEL
+};
+
+
+/*
+ * Enum of doorbell destination
+ */
+enum db_dest
+{
+	DB_DEST_XCM /* TX doorbell to XCM */,
+	DB_DEST_UCM /* RX doorbell to UCM */,
+	DB_DEST_TCM /* RX doorbell to TCM */,
+	DB_NUM_DESTINATIONS,
+	MAX_DB_DEST
+};
+
+
+/*
+ * Enum of doorbell DPM types
+ */
+enum db_dpm_type
+{
+	DPM_LEGACY /* Legacy DPM- to Xstorm RAM */,
+	DPM_ROCE /* RoCE DPM- to NIG */,
+	DPM_L2_INLINE /* L2 DPM inline- to PBF, with packet data on doorbell */,
+	DPM_L2_BD /* L2 DPM with BD- to PBF, with TX BD data on doorbell */,
+	MAX_DB_DPM_TYPE
+};
+
+
+/*
+ * Structure for doorbell data, in L2 DPM mode, for the first doorbell in a DPM burst
+ */
+struct db_l2_dpm_data
+{
+	__le16 icid /* internal CID */;
+	__le16 bd_prod /* bd producer value to update */;
+	__le32 params;
+#define DB_L2_DPM_DATA_SIZE_MASK       0x3F /* Size in QWORD-s of the DPM burst */
+#define DB_L2_DPM_DATA_SIZE_SHIFT      0
+#define DB_L2_DPM_DATA_DPM_TYPE_MASK   0x3 /* Type of DPM transaction (DPM_L2_INLINE or DPM_L2_BD) (use enum db_dpm_type) */
+#define DB_L2_DPM_DATA_DPM_TYPE_SHIFT  6
+#define DB_L2_DPM_DATA_NUM_BDS_MASK    0xFF /* number of BD-s */
+#define DB_L2_DPM_DATA_NUM_BDS_SHIFT   8
+#define DB_L2_DPM_DATA_PKT_SIZE_MASK   0x7FF /* size of the packet to be transmitted in bytes */
+#define DB_L2_DPM_DATA_PKT_SIZE_SHIFT  16
+#define DB_L2_DPM_DATA_RESERVED0_MASK  0x1
+#define DB_L2_DPM_DATA_RESERVED0_SHIFT 27
+#define DB_L2_DPM_DATA_SGE_NUM_MASK    0x7 /* In DPM_L2_BD mode: the number of SGE-s */
+#define DB_L2_DPM_DATA_SGE_NUM_SHIFT   28
+#define DB_L2_DPM_DATA_RESERVED1_MASK  0x1
+#define DB_L2_DPM_DATA_RESERVED1_SHIFT 31
+};
+
+
+/*
+ * Structure for SGE in a DPM doorbell of type DPM_L2_BD
+ */
+struct db_l2_dpm_sge
+{
+	struct regpair addr /* Single continuous buffer */;
+	__le16 nbytes /* Number of bytes in this BD. */;
+	__le16 bitfields;
+#define DB_L2_DPM_SGE_TPH_ST_INDEX_MASK  0x1FF /* The TPH STAG index value */
+#define DB_L2_DPM_SGE_TPH_ST_INDEX_SHIFT 0
+#define DB_L2_DPM_SGE_RESERVED0_MASK     0x3
+#define DB_L2_DPM_SGE_RESERVED0_SHIFT    9
+#define DB_L2_DPM_SGE_ST_VALID_MASK      0x1 /* Indicate if ST hint is requested or not */
+#define DB_L2_DPM_SGE_ST_VALID_SHIFT     11
+#define DB_L2_DPM_SGE_RESERVED1_MASK     0xF
+#define DB_L2_DPM_SGE_RESERVED1_SHIFT    12
+	__le32 reserved2;
+};
+
+
+/*
+ * Structure for doorbell address, in legacy mode
+ */
+struct db_legacy_addr
+{
+	__le32 addr;
+#define DB_LEGACY_ADDR_RESERVED0_MASK  0x3
+#define DB_LEGACY_ADDR_RESERVED0_SHIFT 0
+#define DB_LEGACY_ADDR_DEMS_MASK       0x7 /* doorbell extraction mode specifier- 0 if not used */
+#define DB_LEGACY_ADDR_DEMS_SHIFT      2
+#define DB_LEGACY_ADDR_ICID_MASK       0x7FFFFFF /* internal CID */
+#define DB_LEGACY_ADDR_ICID_SHIFT      5
+};
+
+
+/*
+ * Structure for doorbell address, in PWM mode
+ */
+struct db_pwm_addr
+{
+	__le32 addr;
+#define DB_PWM_ADDR_RESERVED0_MASK  0x7
+#define DB_PWM_ADDR_RESERVED0_SHIFT 0
+#define DB_PWM_ADDR_OFFSET_MASK     0x7F /* Offset in PWM address space */
+#define DB_PWM_ADDR_OFFSET_SHIFT    3
+#define DB_PWM_ADDR_WID_MASK        0x3 /* Window ID */
+#define DB_PWM_ADDR_WID_SHIFT       10
+#define DB_PWM_ADDR_DPI_MASK        0xFFFF /* Doorbell page ID */
+#define DB_PWM_ADDR_DPI_SHIFT       12
+#define DB_PWM_ADDR_RESERVED1_MASK  0xF
+#define DB_PWM_ADDR_RESERVED1_SHIFT 28
+};
+
+
+/*
+ * Parameters to RoCE firmware, passed in EDPM doorbell
+ */
+struct db_roce_dpm_params
+{
+	__le32 params;
+#define DB_ROCE_DPM_PARAMS_SIZE_MASK            0x3F /* Size in QWORD-s of the DPM burst */
+#define DB_ROCE_DPM_PARAMS_SIZE_SHIFT           0
+#define DB_ROCE_DPM_PARAMS_DPM_TYPE_MASK        0x3 /* Type of DPM transacation (DPM_ROCE) (use enum db_dpm_type) */
+#define DB_ROCE_DPM_PARAMS_DPM_TYPE_SHIFT       6
+#define DB_ROCE_DPM_PARAMS_OPCODE_MASK          0xFF /* opcode for ROCE operation */
+#define DB_ROCE_DPM_PARAMS_OPCODE_SHIFT         8
+#define DB_ROCE_DPM_PARAMS_WQE_SIZE_MASK        0x7FF /* the size of the WQE payload in bytes */
+#define DB_ROCE_DPM_PARAMS_WQE_SIZE_SHIFT       16
+#define DB_ROCE_DPM_PARAMS_RESERVED0_MASK       0x1
+#define DB_ROCE_DPM_PARAMS_RESERVED0_SHIFT      27
+#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_MASK  0x1 /* RoCE completion flag */
+#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_SHIFT 28
+#define DB_ROCE_DPM_PARAMS_S_FLG_MASK           0x1 /* RoCE S flag */
+#define DB_ROCE_DPM_PARAMS_S_FLG_SHIFT          29
+#define DB_ROCE_DPM_PARAMS_RESERVED1_MASK       0x3
+#define DB_ROCE_DPM_PARAMS_RESERVED1_SHIFT      30
+};
+
+/*
+ * Structure for doorbell data, in ROCE DPM mode, for the first doorbell in a DPM burst
+ */
+struct db_roce_dpm_data
+{
+	__le16 icid /* internal CID */;
+	__le16 prod_val /* aggregated value to update */;
+	struct db_roce_dpm_params params /* parametes passed to RoCE firmware */;
+};
+
+
+
+/*
+ * Igu interrupt command
+ */
+enum igu_int_cmd
+{
+	IGU_INT_ENABLE=0,
+	IGU_INT_DISABLE=1,
+	IGU_INT_NOP=2,
+	IGU_INT_NOP2=3,
+	MAX_IGU_INT_CMD
+};
+
+
+/*
+ * IGU producer or consumer update command
+ */
+struct igu_prod_cons_update
+{
+	__le32 sb_id_and_flags;
+#define IGU_PROD_CONS_UPDATE_SB_INDEX_MASK        0xFFFFFF
+#define IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT       0
+#define IGU_PROD_CONS_UPDATE_UPDATE_FLAG_MASK     0x1
+#define IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT    24
+#define IGU_PROD_CONS_UPDATE_ENABLE_INT_MASK      0x3 /* interrupt enable/disable/nop (use enum igu_int_cmd) */
+#define IGU_PROD_CONS_UPDATE_ENABLE_INT_SHIFT     25
+#define IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_MASK  0x1 /*  (use enum igu_seg_access) */
+#define IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_SHIFT 27
+#define IGU_PROD_CONS_UPDATE_TIMER_MASK_MASK      0x1
+#define IGU_PROD_CONS_UPDATE_TIMER_MASK_SHIFT     28
+#define IGU_PROD_CONS_UPDATE_RESERVED0_MASK       0x3
+#define IGU_PROD_CONS_UPDATE_RESERVED0_SHIFT      29
+#define IGU_PROD_CONS_UPDATE_COMMAND_TYPE_MASK    0x1 /* must always be set cleared (use enum command_type_bit) */
+#define IGU_PROD_CONS_UPDATE_COMMAND_TYPE_SHIFT   31
+	__le32 reserved1;
+};
+
+
+/*
+ * Igu segments access for default status block only
+ */
+enum igu_seg_access
+{
+	IGU_SEG_ACCESS_REG=0,
+	IGU_SEG_ACCESS_ATTN=1,
+	MAX_IGU_SEG_ACCESS
+};
+
+
+/*
+ * Enumeration for L3 type field of parsing_and_err_flags_union. L3Type: 0 - unknown (not ip) ,1 - Ipv4, 2 - Ipv6 (this field can be filled according to the last-ethertype)
+ */
+enum l3_type
+{
+	e_l3Type_unknown,
+	e_l3Type_ipv4,
+	e_l3Type_ipv6,
+	MAX_L3_TYPE
+};
+
+
+/*
+ * Enumeration for l4Protocol field of parsing_and_err_flags_union. L4-protocol 0 - none, 1 - TCP, 2- UDP. if the packet is IPv4 fragment, and its not the first fragment, the protocol-type should be set to none.
+ */
+enum l4_protocol
+{
+	e_l4Protocol_none,
+	e_l4Protocol_tcp,
+	e_l4Protocol_udp,
+	MAX_L4_PROTOCOL
+};
+
+
+/*
+ * Parsing and error flags field.
+ */
+struct parsing_and_err_flags
+{
+	__le16 flags;
+#define PARSING_AND_ERR_FLAGS_L3TYPE_MASK                      0x3 /* L3Type: 0 - unknown (not ip) ,1 - Ipv4, 2 - Ipv6 (this field can be filled according to the last-ethertype) (use enum l3_type) */
+#define PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT                     0
+#define PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK                  0x3 /* L4-protocol 0 - none, 1 - TCP, 2- UDP. if the packet is IPv4 fragment, and its not the first fragment, the protocol-type should be set to none. (use enum l4_protocol) */
+#define PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT                 2
+#define PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK                    0x1 /* Set if the packet is IPv4 fragment. */
+#define PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT                   4
+#define PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK               0x1 /* Set if VLAN tag exists. Invalid if tunnel type are IP GRE or IP GENEVE. */
+#define PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT              5
+#define PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK        0x1 /* Set if L4 checksum was calculated. */
+#define PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT       6
+#define PARSING_AND_ERR_FLAGS_TIMESYNCPKT_MASK                 0x1 /* Set for PTP packet. */
+#define PARSING_AND_ERR_FLAGS_TIMESYNCPKT_SHIFT                7
+#define PARSING_AND_ERR_FLAGS_TIMESTAMPRECORDED_MASK           0x1 /* Set if PTP timestamp recorded. */
+#define PARSING_AND_ERR_FLAGS_TIMESTAMPRECORDED_SHIFT          8
+#define PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK                  0x1 /* Set if either version-mismatch or hdr-len-error or ipv4-cksm is set or ipv6 ver mismatch */
+#define PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT                 9
+#define PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK                0x1 /* Set if L4 checksum validation failed. Valid only if L4 checksum was calculated. */
+#define PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT               10
+#define PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK                 0x1 /* Set if GRE/VXLAN/GENEVE tunnel detected. */
+#define PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT                11
+#define PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_MASK         0x1 /* Set if VLAN tag exists in tunnel header. */
+#define PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_SHIFT        12
+#define PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK            0x1 /* Set if either tunnel-ipv4-version-mismatch or tunnel-ipv4-hdr-len-error or tunnel-ipv4-cksm is set or tunneling ipv6 ver mismatch */
+#define PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT           13
+#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK  0x1 /* Set if GRE or VXLAN/GENEVE UDP checksum was calculated. */
+#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT 14
+#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK          0x1 /* Set if tunnel L4 checksum validation failed. Valid only if tunnel L4 checksum was calculated. */
+#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT         15
+};
+
+
+/*
+ * Pb context
+ */
+struct pb_context
+{
+	__le32 crc[4];
+};
+
+
+/*
+ * Concrete Function ID.
+ */
+struct pxp_concrete_fid
+{
+	__le16 fid;
+#define PXP_CONCRETE_FID_PFID_MASK     0xF /* Parent PFID */
+#define PXP_CONCRETE_FID_PFID_SHIFT    0
+#define PXP_CONCRETE_FID_PORT_MASK     0x3 /* port number */
+#define PXP_CONCRETE_FID_PORT_SHIFT    4
+#define PXP_CONCRETE_FID_PATH_MASK     0x1 /* path number */
+#define PXP_CONCRETE_FID_PATH_SHIFT    6
+#define PXP_CONCRETE_FID_VFVALID_MASK  0x1
+#define PXP_CONCRETE_FID_VFVALID_SHIFT 7
+#define PXP_CONCRETE_FID_VFID_MASK     0xFF
+#define PXP_CONCRETE_FID_VFID_SHIFT    8
+};
+
+
+/*
+ * Concrete Function ID.
+ */
+struct pxp_pretend_concrete_fid
+{
+	__le16 fid;
+#define PXP_PRETEND_CONCRETE_FID_PFID_MASK      0xF /* Parent PFID */
+#define PXP_PRETEND_CONCRETE_FID_PFID_SHIFT     0
+#define PXP_PRETEND_CONCRETE_FID_RESERVED_MASK  0x7 /* port number. Only when part of ME register. */
+#define PXP_PRETEND_CONCRETE_FID_RESERVED_SHIFT 4
+#define PXP_PRETEND_CONCRETE_FID_VFVALID_MASK   0x1
+#define PXP_PRETEND_CONCRETE_FID_VFVALID_SHIFT  7
+#define PXP_PRETEND_CONCRETE_FID_VFID_MASK      0xFF
+#define PXP_PRETEND_CONCRETE_FID_VFID_SHIFT     8
+};
+
+/*
+ * Function ID.
+ */
+union pxp_pretend_fid
+{
+	struct pxp_pretend_concrete_fid concrete_fid;
+	__le16 opaque_fid;
+};
+
+/*
+ * Pxp Pretend Command Register.
+ */
+struct pxp_pretend_cmd
+{
+	union pxp_pretend_fid fid;
+	__le16 control;
+#define PXP_PRETEND_CMD_PATH_MASK              0x1
+#define PXP_PRETEND_CMD_PATH_SHIFT             0
+#define PXP_PRETEND_CMD_USE_PORT_MASK          0x1
+#define PXP_PRETEND_CMD_USE_PORT_SHIFT         1
+#define PXP_PRETEND_CMD_PORT_MASK              0x3
+#define PXP_PRETEND_CMD_PORT_SHIFT             2
+#define PXP_PRETEND_CMD_RESERVED0_MASK         0xF
+#define PXP_PRETEND_CMD_RESERVED0_SHIFT        4
+#define PXP_PRETEND_CMD_RESERVED1_MASK         0xF
+#define PXP_PRETEND_CMD_RESERVED1_SHIFT        8
+#define PXP_PRETEND_CMD_PRETEND_PATH_MASK      0x1 /* is pretend mode? */
+#define PXP_PRETEND_CMD_PRETEND_PATH_SHIFT     12
+#define PXP_PRETEND_CMD_PRETEND_PORT_MASK      0x1 /* is pretend mode? */
+#define PXP_PRETEND_CMD_PRETEND_PORT_SHIFT     13
+#define PXP_PRETEND_CMD_PRETEND_FUNCTION_MASK  0x1 /* is pretend mode? */
+#define PXP_PRETEND_CMD_PRETEND_FUNCTION_SHIFT 14
+#define PXP_PRETEND_CMD_IS_CONCRETE_MASK       0x1 /* is fid concrete? */
+#define PXP_PRETEND_CMD_IS_CONCRETE_SHIFT      15
+};
+
+
+
+
+/*
+ * PTT Record in PXP Admin Window.
+ */
+struct pxp_ptt_entry
+{
+	__le32 offset;
+#define PXP_PTT_ENTRY_OFFSET_MASK     0x7FFFFF
+#define PXP_PTT_ENTRY_OFFSET_SHIFT    0
+#define PXP_PTT_ENTRY_RESERVED0_MASK  0x1FF
+#define PXP_PTT_ENTRY_RESERVED0_SHIFT 23
+	struct pxp_pretend_cmd pretend;
+};
+
+
+/*
+ * VF Zone A Permission Register.
+ */
+struct pxp_vf_zone_a_permission
+{
+	__le32 control;
+#define PXP_VF_ZONE_A_PERMISSION_VFID_MASK       0xFF
+#define PXP_VF_ZONE_A_PERMISSION_VFID_SHIFT      0
+#define PXP_VF_ZONE_A_PERMISSION_VALID_MASK      0x1
+#define PXP_VF_ZONE_A_PERMISSION_VALID_SHIFT     8
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_MASK  0x7F
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_SHIFT 9
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_MASK  0xFFFF
+#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_SHIFT 16
+};
+
+
+/*
+ * Rdif context
+ */
+struct rdif_task_context
+{
+	__le32 initialRefTag;
+	__le16 appTagValue;
+	__le16 appTagMask;
+	uint8_t flags0;
+#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK            0x1
+#define RDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT           0
+#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK      0x1
+#define RDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT     1
+#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK           0x1 /* 0 = IP checksum, 1 = CRC */
+#define RDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT          2
+#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK         0x1
+#define RDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT        3
+#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK          0x3 /* 1/2/3 - Protection Type */
+#define RDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT         4
+#define RDIF_TASK_CONTEXT_CRC_SEED_MASK                0x1 /* 0=0x0000, 1=0xffff */
+#define RDIF_TASK_CONTEXT_CRC_SEED_SHIFT               6
+#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK         0x1 /* Keep reference tag constant */
+#define RDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT        7
+	uint8_t partialDifData[7];
+	__le16 partialCrcValue;
+	__le16 partialChecksumValue;
+	__le32 offsetInIO;
+	__le16 flags1;
+#define RDIF_TASK_CONTEXT_VALIDATEGUARD_MASK           0x1
+#define RDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT          0
+#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK          0x1
+#define RDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT         1
+#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK          0x1
+#define RDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT         2
+#define RDIF_TASK_CONTEXT_FORWARDGUARD_MASK            0x1
+#define RDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT           3
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK           0x1
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT          4
+#define RDIF_TASK_CONTEXT_FORWARDREFTAG_MASK           0x1
+#define RDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT          5
+#define RDIF_TASK_CONTEXT_INTERVALSIZE_MASK            0x7 /* 0=512B, 1=1KB, 2=2KB, 3=4KB, 4=8KB */
+#define RDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT           6
+#define RDIF_TASK_CONTEXT_HOSTINTERFACE_MASK           0x3 /* 0=None, 1=DIF, 2=DIX */
+#define RDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT          9
+#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK           0x1 /* DIF tag right at the beginning of DIF interval */
+#define RDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT          11
+#define RDIF_TASK_CONTEXT_RESERVED0_MASK               0x1
+#define RDIF_TASK_CONTEXT_RESERVED0_SHIFT              12
+#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK        0x1 /* 0=None, 1=DIF */
+#define RDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT       13
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK   0x1 /* Forward application tag with mask */
+#define RDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT  14
+#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK   0x1 /* Forward reference tag with mask */
+#define RDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT  15
+	__le16 state;
+#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_MASK    0xF
+#define RDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFT_SHIFT   0
+#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_MASK  0xF
+#define RDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFT_SHIFT 4
+#define RDIF_TASK_CONTEXT_ERRORINIO_MASK               0x1
+#define RDIF_TASK_CONTEXT_ERRORINIO_SHIFT              8
+#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK        0x1
+#define RDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT       9
+#define RDIF_TASK_CONTEXT_REFTAGMASK_MASK              0xF /* mask for refernce tag handling */
+#define RDIF_TASK_CONTEXT_REFTAGMASK_SHIFT             10
+#define RDIF_TASK_CONTEXT_RESERVED1_MASK               0x3
+#define RDIF_TASK_CONTEXT_RESERVED1_SHIFT              14
+	__le32 reserved2;
+};
+
+
+
+/*
+ * RSS hash type
+ */
+enum rss_hash_type
+{
+	RSS_HASH_TYPE_DEFAULT=0,
+	RSS_HASH_TYPE_IPV4=1,
+	RSS_HASH_TYPE_TCP_IPV4=2,
+	RSS_HASH_TYPE_IPV6=3,
+	RSS_HASH_TYPE_TCP_IPV6=4,
+	RSS_HASH_TYPE_UDP_IPV4=5,
+	RSS_HASH_TYPE_UDP_IPV6=6,
+	MAX_RSS_HASH_TYPE
+};
+
+
+/*
+ * status block structure
+ */
+struct status_block
+{
+	__le16 pi_array[PIS_PER_SB];
+	__le32 sb_num;
+#define STATUS_BLOCK_SB_NUM_MASK      0x1FF
+#define STATUS_BLOCK_SB_NUM_SHIFT     0
+#define STATUS_BLOCK_ZERO_PAD_MASK    0x7F
+#define STATUS_BLOCK_ZERO_PAD_SHIFT   9
+#define STATUS_BLOCK_ZERO_PAD2_MASK   0xFFFF
+#define STATUS_BLOCK_ZERO_PAD2_SHIFT  16
+	__le32 prod_index;
+#define STATUS_BLOCK_PROD_INDEX_MASK  0xFFFFFF
+#define STATUS_BLOCK_PROD_INDEX_SHIFT 0
+#define STATUS_BLOCK_ZERO_PAD3_MASK   0xFF
+#define STATUS_BLOCK_ZERO_PAD3_SHIFT  24
+};
+
+
+/*
+ * Tdif context
+ */
+struct tdif_task_context
+{
+	__le32 initialRefTag;
+	__le16 appTagValue;
+	__le16 appTagMask;
+	__le16 partialCrcValueB;
+	__le16 partialChecksumValueB;
+	__le16 stateB;
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_MASK    0xF
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTB_SHIFT   0
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_MASK  0xF
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTB_SHIFT 4
+#define TDIF_TASK_CONTEXT_ERRORINIOB_MASK               0x1
+#define TDIF_TASK_CONTEXT_ERRORINIOB_SHIFT              8
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_MASK         0x1
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOW_SHIFT        9
+#define TDIF_TASK_CONTEXT_RESERVED0_MASK                0x3F
+#define TDIF_TASK_CONTEXT_RESERVED0_SHIFT               10
+	uint8_t reserved1;
+	uint8_t flags0;
+#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_MASK             0x1
+#define TDIF_TASK_CONTEXT_IGNOREAPPTAG_SHIFT            0
+#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_MASK       0x1
+#define TDIF_TASK_CONTEXT_INITIALREFTAGVALID_SHIFT      1
+#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_MASK            0x1 /* 0 = IP checksum, 1 = CRC */
+#define TDIF_TASK_CONTEXT_HOSTGUARDTYPE_SHIFT           2
+#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_MASK          0x1
+#define TDIF_TASK_CONTEXT_SETERRORWITHEOP_SHIFT         3
+#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_MASK           0x3 /* 1/2/3 - Protection Type */
+#define TDIF_TASK_CONTEXT_PROTECTIONTYPE_SHIFT          4
+#define TDIF_TASK_CONTEXT_CRC_SEED_MASK                 0x1 /* 0=0x0000, 1=0xffff */
+#define TDIF_TASK_CONTEXT_CRC_SEED_SHIFT                6
+#define TDIF_TASK_CONTEXT_RESERVED2_MASK                0x1
+#define TDIF_TASK_CONTEXT_RESERVED2_SHIFT               7
+	__le32 flags1;
+#define TDIF_TASK_CONTEXT_VALIDATEGUARD_MASK            0x1
+#define TDIF_TASK_CONTEXT_VALIDATEGUARD_SHIFT           0
+#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_MASK           0x1
+#define TDIF_TASK_CONTEXT_VALIDATEAPPTAG_SHIFT          1
+#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_MASK           0x1
+#define TDIF_TASK_CONTEXT_VALIDATEREFTAG_SHIFT          2
+#define TDIF_TASK_CONTEXT_FORWARDGUARD_MASK             0x1
+#define TDIF_TASK_CONTEXT_FORWARDGUARD_SHIFT            3
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_MASK            0x1
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAG_SHIFT           4
+#define TDIF_TASK_CONTEXT_FORWARDREFTAG_MASK            0x1
+#define TDIF_TASK_CONTEXT_FORWARDREFTAG_SHIFT           5
+#define TDIF_TASK_CONTEXT_INTERVALSIZE_MASK             0x7 /* 0=512B, 1=1KB, 2=2KB, 3=4KB, 4=8KB */
+#define TDIF_TASK_CONTEXT_INTERVALSIZE_SHIFT            6
+#define TDIF_TASK_CONTEXT_HOSTINTERFACE_MASK            0x3 /* 0=None, 1=DIF, 2=DIX */
+#define TDIF_TASK_CONTEXT_HOSTINTERFACE_SHIFT           9
+#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_MASK            0x1 /* DIF tag right at the beginning of DIF interval */
+#define TDIF_TASK_CONTEXT_DIFBEFOREDATA_SHIFT           11
+#define TDIF_TASK_CONTEXT_RESERVED3_MASK                0x1 /* reserved */
+#define TDIF_TASK_CONTEXT_RESERVED3_SHIFT               12
+#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_MASK         0x1 /* 0=None, 1=DIF */
+#define TDIF_TASK_CONTEXT_NETWORKINTERFACE_SHIFT        13
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_MASK    0xF
+#define TDIF_TASK_CONTEXT_RECEIVEDDIFBYTESLEFTA_SHIFT   14
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_MASK  0xF
+#define TDIF_TASK_CONTEXT_TRANSMITEDDIFBYTESLEFTA_SHIFT 18
+#define TDIF_TASK_CONTEXT_ERRORINIOA_MASK               0x1
+#define TDIF_TASK_CONTEXT_ERRORINIOA_SHIFT              22
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_MASK        0x1
+#define TDIF_TASK_CONTEXT_CHECKSUMOVERFLOWA_SHIFT       23
+#define TDIF_TASK_CONTEXT_REFTAGMASK_MASK               0xF /* mask for refernce tag handling */
+#define TDIF_TASK_CONTEXT_REFTAGMASK_SHIFT              24
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_MASK    0x1 /* Forward application tag with mask */
+#define TDIF_TASK_CONTEXT_FORWARDAPPTAGWITHMASK_SHIFT   28
+#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_MASK    0x1 /* Forward reference tag with mask */
+#define TDIF_TASK_CONTEXT_FORWARDREFTAGWITHMASK_SHIFT   29
+#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_MASK          0x1 /* Keep reference tag constant */
+#define TDIF_TASK_CONTEXT_KEEPREFTAGCONST_SHIFT         30
+#define TDIF_TASK_CONTEXT_RESERVED4_MASK                0x1
+#define TDIF_TASK_CONTEXT_RESERVED4_SHIFT               31
+	__le32 offsetInIOB;
+	__le16 partialCrcValueA;
+	__le16 partialChecksumValueA;
+	__le32 offsetInIOA;
+	uint8_t partialDifDataA[8];
+	uint8_t partialDifDataB[8];
+};
+
+
+/*
+ * Timers context
+ */
+struct timers_context
+{
+	__le32 logical_client_0;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK     0xFFFFFFF /* Expiration time of logical client 0 */
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC0_MASK              0x1 /* Valid bit of logical client 0 */
+#define TIMERS_CONTEXT_VALIDLC0_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC0_MASK             0x1 /* Active bit of logical client 0 */
+#define TIMERS_CONTEXT_ACTIVELC0_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED0_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED0_SHIFT            30
+	__le32 logical_client_1;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK     0xFFFFFFF /* Expiration time of logical client 1 */
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC1_MASK              0x1 /* Valid bit of logical client 1 */
+#define TIMERS_CONTEXT_VALIDLC1_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC1_MASK             0x1 /* Active bit of logical client 1 */
+#define TIMERS_CONTEXT_ACTIVELC1_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED1_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED1_SHIFT            30
+	__le32 logical_client_2;
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK     0xFFFFFFF /* Expiration time of logical client 2 */
+#define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT    0
+#define TIMERS_CONTEXT_VALIDLC2_MASK              0x1 /* Valid bit of logical client 2 */
+#define TIMERS_CONTEXT_VALIDLC2_SHIFT             28
+#define TIMERS_CONTEXT_ACTIVELC2_MASK             0x1 /* Active bit of logical client 2 */
+#define TIMERS_CONTEXT_ACTIVELC2_SHIFT            29
+#define TIMERS_CONTEXT_RESERVED2_MASK             0x3
+#define TIMERS_CONTEXT_RESERVED2_SHIFT            30
+	__le32 host_expiration_fields;
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_MASK  0xFFFFFFF /* Expiration time on host (closest one) */
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALUE_SHIFT 0
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_MASK  0x1 /* Valid bit of host expiration */
+#define TIMERS_CONTEXT_HOSTEXPRIRATIONVALID_SHIFT 28
+#define TIMERS_CONTEXT_RESERVED3_MASK             0x7
+#define TIMERS_CONTEXT_RESERVED3_SHIFT            29
+};
+
+
+/*
+ * Enum for next_protocol field of tunnel_parsing_flags
+ */
+enum tunnel_next_protocol
+{
+	e_unknown=0,
+	e_l2=1,
+	e_ipv4=2,
+	e_ipv6=3,
+	MAX_TUNNEL_NEXT_PROTOCOL
+};
+
+#endif /* __COMMON_HSI__ */
diff --git a/providers/qedr/qelr_hsi.h b/providers/qedr/qelr_hsi.h
new file mode 100644
index 0000000..8eaf183
--- /dev/null
+++ b/providers/qedr/qelr_hsi.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __QED_HSI_ROCE__
+#define __QED_HSI_ROCE__
+/********************************/
+/* Add include to common target */
+/********************************/
+#include "common_hsi.h"
+
+/************************************************************************/
+/* Add include to common roce target for both eCore and protocol roce driver */
+/************************************************************************/
+#include "roce_common.h"
+/************************************************************************/
+/* Add include to qed hsi rdma target for both roce and iwarp qed driver */
+/************************************************************************/
+#include "qelr_hsi_rdma.h"
+
+/* Affiliated asynchronous events / errors enumeration */
+enum roce_async_events_type
+{
+	ROCE_ASYNC_EVENT_NONE,
+	ROCE_ASYNC_EVENT_COMM_EST,
+	ROCE_ASYNC_EVENT_SQ_DRAINED,
+	ROCE_ASYNC_EVENT_SRQ_LIMIT,
+	ROCE_ASYNC_EVENT_LAST_WQE_REACHED,
+	ROCE_ASYNC_EVENT_CQ_ERR,
+	ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR,
+	ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR,
+	ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR,
+	ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR,
+	ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR,
+	ROCE_ASYNC_EVENT_SRQ_EMPTY,
+	MAX_ROCE_ASYNC_EVENTS_TYPE
+};
+
+#endif /* __QED_HSI_ROCE__ */
diff --git a/providers/qedr/qelr_hsi_rdma.h b/providers/qedr/qelr_hsi_rdma.h
new file mode 100644
index 0000000..c18ce86
--- /dev/null
+++ b/providers/qedr/qelr_hsi_rdma.h
@@ -0,0 +1,914 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __QED_HSI_RDMA__
+#define __QED_HSI_RDMA__
+/************************************************************************/
+/* Add include to common rdma target for both eCore and protocol rdma driver */
+/************************************************************************/
+#include "rdma_common.h"
+
+/*
+ * rdma completion notification queue element
+ */
+struct rdma_cnqe
+{
+	struct regpair cq_handle;
+};
+
+
+struct rdma_cqe_responder
+{
+	struct regpair srq_wr_id;
+	struct regpair qp_handle;
+	__le32 imm_data_or_inv_r_Key /* immediate data in case imm_flg is set, or invalidated r_key in case inv_flg is set */;
+	__le32 length;
+	__le32 imm_data_hi /* High bytes of immediate data in case imm_flg is set in iWARP only */;
+	__le16 rq_cons /* Valid only when status is WORK_REQUEST_FLUSHED_ERR. Indicates an aggregative flush on all posted RQ WQEs until the reported rq_cons. */;
+	uint8_t flags;
+#define RDMA_CQE_RESPONDER_TOGGLE_BIT_MASK  0x1 /* indicates a valid completion written by FW. FW toggle this bit each time it finishes producing all PBL entries */
+#define RDMA_CQE_RESPONDER_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_RESPONDER_TYPE_MASK        0x3 /*  (use enum rdma_cqe_type) */
+#define RDMA_CQE_RESPONDER_TYPE_SHIFT       1
+#define RDMA_CQE_RESPONDER_INV_FLG_MASK     0x1 /* r_key invalidated indicator */
+#define RDMA_CQE_RESPONDER_INV_FLG_SHIFT    3
+#define RDMA_CQE_RESPONDER_IMM_FLG_MASK     0x1 /* immediate data indicator */
+#define RDMA_CQE_RESPONDER_IMM_FLG_SHIFT    4
+#define RDMA_CQE_RESPONDER_RDMA_FLG_MASK    0x1 /* 1=this CQE relates to an RDMA Write. 0=Send. */
+#define RDMA_CQE_RESPONDER_RDMA_FLG_SHIFT   5
+#define RDMA_CQE_RESPONDER_RESERVED2_MASK   0x3
+#define RDMA_CQE_RESPONDER_RESERVED2_SHIFT  6
+	uint8_t status;
+};
+
+struct rdma_cqe_requester
+{
+	__le16 sq_cons;
+	__le16 reserved0;
+	__le32 reserved1;
+	struct regpair qp_handle;
+	struct regpair reserved2;
+	__le32 reserved3;
+	__le16 reserved4;
+	uint8_t flags;
+#define RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK  0x1 /* indicates a valid completion written by FW. FW toggle this bit each time it finishes producing all PBL entries */
+#define RDMA_CQE_REQUESTER_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_REQUESTER_TYPE_MASK        0x3 /*  (use enum rdma_cqe_type) */
+#define RDMA_CQE_REQUESTER_TYPE_SHIFT       1
+#define RDMA_CQE_REQUESTER_RESERVED5_MASK   0x1F
+#define RDMA_CQE_REQUESTER_RESERVED5_SHIFT  3
+	uint8_t status;
+};
+
+struct rdma_cqe_common
+{
+	struct regpair reserved0;
+	struct regpair qp_handle;
+	__le16 reserved1[7];
+	uint8_t flags;
+#define RDMA_CQE_COMMON_TOGGLE_BIT_MASK  0x1 /* indicates a valid completion written by FW. FW toggle this bit each time it finishes producing all PBL entries */
+#define RDMA_CQE_COMMON_TOGGLE_BIT_SHIFT 0
+#define RDMA_CQE_COMMON_TYPE_MASK        0x3 /*  (use enum rdma_cqe_type) */
+#define RDMA_CQE_COMMON_TYPE_SHIFT       1
+#define RDMA_CQE_COMMON_RESERVED2_MASK   0x1F
+#define RDMA_CQE_COMMON_RESERVED2_SHIFT  3
+	uint8_t status;
+};
+
+/*
+ * rdma completion queue element
+ */
+union rdma_cqe
+{
+	struct rdma_cqe_responder resp;
+	struct rdma_cqe_requester req;
+	struct rdma_cqe_common cmn;
+};
+
+
+
+
+/*
+ * CQE requester status enumeration
+ */
+enum rdma_cqe_requester_status_enum
+{
+	RDMA_CQE_REQ_STS_OK,
+	RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR,
+	RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR,
+	RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR,
+	RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR,
+	RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR,
+	RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR,
+	RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR,
+	RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR,
+	RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR,
+	RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR,
+	RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR,
+	MAX_RDMA_CQE_REQUESTER_STATUS_ENUM
+};
+
+
+
+/*
+ * CQE responder status enumeration
+ */
+enum rdma_cqe_responder_status_enum
+{
+	RDMA_CQE_RESP_STS_OK,
+	RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR,
+	RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR,
+	RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR,
+	RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR,
+	RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR,
+	RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR,
+	RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR,
+	MAX_RDMA_CQE_RESPONDER_STATUS_ENUM
+};
+
+
+/*
+ * CQE type enumeration
+ */
+enum rdma_cqe_type
+{
+	RDMA_CQE_TYPE_REQUESTER,
+	RDMA_CQE_TYPE_RESPONDER_RQ,
+	RDMA_CQE_TYPE_RESPONDER_SRQ,
+	RDMA_CQE_TYPE_INVALID,
+	MAX_RDMA_CQE_TYPE
+};
+
+
+/*
+ * DIF Block size options
+ */
+enum rdma_dif_block_size
+{
+	RDMA_DIF_BLOCK_512=0,
+	RDMA_DIF_BLOCK_4096=1,
+	MAX_RDMA_DIF_BLOCK_SIZE
+};
+
+
+/*
+ * DIF CRC initial value
+ */
+enum rdma_dif_crc_seed
+{
+	RDMA_DIF_CRC_SEED_0000=0,
+	RDMA_DIF_CRC_SEED_FFFF=1,
+	MAX_RDMA_DIF_CRC_SEED
+};
+
+
+/*
+ * RDMA DIF Error Result Structure
+ */
+struct rdma_dif_error_result
+{
+	__le32 error_intervals /* Total number of error intervals in the IO. */;
+	__le32 dif_error_1st_interval /* Number of the first interval that contained error. Set to 0xFFFFFFFF if error occurred in the Runt Block. */;
+	uint8_t flags;
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_CRC_MASK      0x1 /* CRC error occurred. */
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_CRC_SHIFT     0
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_APP_TAG_MASK  0x1 /* App Tag error occurred. */
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_APP_TAG_SHIFT 1
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_REF_TAG_MASK  0x1 /* Ref Tag error occurred. */
+#define RDMA_DIF_ERROR_RESULT_DIF_ERROR_TYPE_REF_TAG_SHIFT 2
+#define RDMA_DIF_ERROR_RESULT_RESERVED0_MASK               0xF
+#define RDMA_DIF_ERROR_RESULT_RESERVED0_SHIFT              3
+#define RDMA_DIF_ERROR_RESULT_TOGGLE_BIT_MASK              0x1 /* Used to indicate the structure is valid. Toggles each time an invalidate region is performed. */
+#define RDMA_DIF_ERROR_RESULT_TOGGLE_BIT_SHIFT             7
+	uint8_t reserved1[55] /* Pad to 64 bytes to ensure efficient word line writing. */;
+};
+
+
+/*
+ * DIF IO direction
+ */
+enum rdma_dif_io_direction_flg
+{
+	RDMA_DIF_DIR_RX=0,
+	RDMA_DIF_DIR_TX=1,
+	MAX_RDMA_DIF_IO_DIRECTION_FLG
+};
+
+
+/*
+ * RDMA DIF Runt Result Structure
+ */
+struct rdma_dif_runt_result
+{
+	__le16 guard_tag /* CRC result of received IO. */;
+	__le16 reserved[3];
+};
+
+
+/*
+ * memory window type enumeration
+ */
+enum rdma_mw_type
+{
+	RDMA_MW_TYPE_1,
+	RDMA_MW_TYPE_2A,
+	MAX_RDMA_MW_TYPE
+};
+
+
+struct rdma_rq_sge
+{
+	struct regpair addr;
+	__le32 length;
+	__le32 flags;
+#define RDMA_RQ_SGE_L_KEY_MASK      0x3FFFFFF /* key of memory relating to this RQ */
+#define RDMA_RQ_SGE_L_KEY_SHIFT     0
+#define RDMA_RQ_SGE_NUM_SGES_MASK   0x7 /* first SGE - number of SGEs in this RQ WQE. Other SGEs - should be set to 0 */
+#define RDMA_RQ_SGE_NUM_SGES_SHIFT  26
+#define RDMA_RQ_SGE_RESERVED0_MASK  0x7
+#define RDMA_RQ_SGE_RESERVED0_SHIFT 29
+};
+
+
+struct rdma_sq_atomic_wqe
+{
+	__le32 reserved1;
+	__le32 length /* Total data length (8 bytes for Atomic) */;
+	__le32 xrc_srq /* Valid only when XRC is set for the QP */;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_ATOMIC_WQE_COMP_FLG_MASK         0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_ATOMIC_WQE_COMP_FLG_SHIFT        0
+#define RDMA_SQ_ATOMIC_WQE_RD_FENCE_FLG_MASK     0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_ATOMIC_WQE_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_ATOMIC_WQE_INV_FENCE_FLG_MASK    0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_ATOMIC_WQE_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_ATOMIC_WQE_SE_FLG_MASK           0x1 /* Dont care for atomic wqe */
+#define RDMA_SQ_ATOMIC_WQE_SE_FLG_SHIFT          3
+#define RDMA_SQ_ATOMIC_WQE_INLINE_FLG_MASK       0x1 /* Should be 0 for atomic wqe */
+#define RDMA_SQ_ATOMIC_WQE_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_ATOMIC_WQE_DIF_ON_HOST_FLG_MASK  0x1 /* Should be 0 for atomic wqe */
+#define RDMA_SQ_ATOMIC_WQE_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_ATOMIC_WQE_RESERVED0_MASK        0x3
+#define RDMA_SQ_ATOMIC_WQE_RESERVED0_SHIFT       6
+	uint8_t wqe_size /* Size of WQE in 16B chunks including SGE */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+	struct regpair remote_va /* remote virtual address */;
+	__le32 r_key /* Remote key */;
+	__le32 reserved2;
+	struct regpair cmp_data /* Data to compare in case of ATOMIC_CMP_AND_SWAP */;
+	struct regpair swap_data /* Swap or add data */;
+};
+
+
+/*
+ * First element (16 bytes) of atomic wqe
+ */
+struct rdma_sq_atomic_wqe_1st
+{
+	__le32 reserved1;
+	__le32 length /* Total data length (8 bytes for Atomic) */;
+	__le32 xrc_srq /* Valid only when XRC is set for the QP */;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_ATOMIC_WQE_1ST_COMP_FLG_MASK       0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_ATOMIC_WQE_1ST_COMP_FLG_SHIFT      0
+#define RDMA_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_MASK   0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_ATOMIC_WQE_1ST_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_MASK  0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_ATOMIC_WQE_1ST_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_ATOMIC_WQE_1ST_SE_FLG_MASK         0x1 /* Dont care for atomic wqe */
+#define RDMA_SQ_ATOMIC_WQE_1ST_SE_FLG_SHIFT        3
+#define RDMA_SQ_ATOMIC_WQE_1ST_INLINE_FLG_MASK     0x1 /* Should be 0 for atomic wqe */
+#define RDMA_SQ_ATOMIC_WQE_1ST_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_ATOMIC_WQE_1ST_RESERVED0_MASK      0x7
+#define RDMA_SQ_ATOMIC_WQE_1ST_RESERVED0_SHIFT     5
+	uint8_t wqe_size /* Size of WQE in 16B chunks including all SGEs. Set to number of SGEs + 1. */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+};
+
+
+/*
+ * Second element (16 bytes) of atomic wqe
+ */
+struct rdma_sq_atomic_wqe_2nd
+{
+	struct regpair remote_va /* remote virtual address */;
+	__le32 r_key /* Remote key */;
+	__le32 reserved2;
+};
+
+
+/*
+ * Third element (16 bytes) of atomic wqe
+ */
+struct rdma_sq_atomic_wqe_3rd
+{
+	struct regpair cmp_data /* Data to compare in case of ATOMIC_CMP_AND_SWAP */;
+	struct regpair swap_data /* Swap or add data */;
+};
+
+
+struct rdma_sq_bind_wqe
+{
+	struct regpair addr;
+	__le32 l_key;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_BIND_WQE_COMP_FLG_MASK       0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_BIND_WQE_COMP_FLG_SHIFT      0
+#define RDMA_SQ_BIND_WQE_RD_FENCE_FLG_MASK   0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_BIND_WQE_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_BIND_WQE_INV_FENCE_FLG_MASK  0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_BIND_WQE_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_BIND_WQE_SE_FLG_MASK         0x1 /* Dont care for bind wqe */
+#define RDMA_SQ_BIND_WQE_SE_FLG_SHIFT        3
+#define RDMA_SQ_BIND_WQE_INLINE_FLG_MASK     0x1 /* Should be 0 for bind wqe */
+#define RDMA_SQ_BIND_WQE_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_BIND_WQE_RESERVED0_MASK      0x7
+#define RDMA_SQ_BIND_WQE_RESERVED0_SHIFT     5
+	uint8_t wqe_size /* Size of WQE in 16B chunks */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+	uint8_t bind_ctrl;
+#define RDMA_SQ_BIND_WQE_ZERO_BASED_MASK     0x1 /* zero based indication */
+#define RDMA_SQ_BIND_WQE_ZERO_BASED_SHIFT    0
+#define RDMA_SQ_BIND_WQE_MW_TYPE_MASK        0x1 /*  (use enum rdma_mw_type) */
+#define RDMA_SQ_BIND_WQE_MW_TYPE_SHIFT       1
+#define RDMA_SQ_BIND_WQE_RESERVED1_MASK      0x3F
+#define RDMA_SQ_BIND_WQE_RESERVED1_SHIFT     2
+	uint8_t access_ctrl;
+#define RDMA_SQ_BIND_WQE_REMOTE_READ_MASK    0x1
+#define RDMA_SQ_BIND_WQE_REMOTE_READ_SHIFT   0
+#define RDMA_SQ_BIND_WQE_REMOTE_WRITE_MASK   0x1
+#define RDMA_SQ_BIND_WQE_REMOTE_WRITE_SHIFT  1
+#define RDMA_SQ_BIND_WQE_ENABLE_ATOMIC_MASK  0x1
+#define RDMA_SQ_BIND_WQE_ENABLE_ATOMIC_SHIFT 2
+#define RDMA_SQ_BIND_WQE_LOCAL_READ_MASK     0x1
+#define RDMA_SQ_BIND_WQE_LOCAL_READ_SHIFT    3
+#define RDMA_SQ_BIND_WQE_LOCAL_WRITE_MASK    0x1
+#define RDMA_SQ_BIND_WQE_LOCAL_WRITE_SHIFT   4
+#define RDMA_SQ_BIND_WQE_RESERVED2_MASK      0x7
+#define RDMA_SQ_BIND_WQE_RESERVED2_SHIFT     5
+	uint8_t reserved3;
+	uint8_t length_hi /* upper 8 bits of the registered MW length */;
+	__le32 length_lo /* lower 32 bits of the registered MW length */;
+	__le32 parent_l_key /* l_key of the parent MR */;
+	__le32 reserved4;
+};
+
+
+/*
+ * First element (16 bytes) of bind wqe
+ */
+struct rdma_sq_bind_wqe_1st
+{
+	struct regpair addr;
+	__le32 l_key;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_BIND_WQE_1ST_COMP_FLG_MASK       0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_BIND_WQE_1ST_COMP_FLG_SHIFT      0
+#define RDMA_SQ_BIND_WQE_1ST_RD_FENCE_FLG_MASK   0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_BIND_WQE_1ST_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_BIND_WQE_1ST_INV_FENCE_FLG_MASK  0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_BIND_WQE_1ST_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_BIND_WQE_1ST_SE_FLG_MASK         0x1 /* Dont care for bind wqe */
+#define RDMA_SQ_BIND_WQE_1ST_SE_FLG_SHIFT        3
+#define RDMA_SQ_BIND_WQE_1ST_INLINE_FLG_MASK     0x1 /* Should be 0 for bind wqe */
+#define RDMA_SQ_BIND_WQE_1ST_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_BIND_WQE_1ST_RESERVED0_MASK      0x7
+#define RDMA_SQ_BIND_WQE_1ST_RESERVED0_SHIFT     5
+	uint8_t wqe_size /* Size of WQE in 16B chunks */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+};
+
+
+/*
+ * Second element (16 bytes) of bind wqe
+ */
+struct rdma_sq_bind_wqe_2nd
+{
+	uint8_t bind_ctrl;
+#define RDMA_SQ_BIND_WQE_2ND_ZERO_BASED_MASK     0x1 /* zero based indication */
+#define RDMA_SQ_BIND_WQE_2ND_ZERO_BASED_SHIFT    0
+#define RDMA_SQ_BIND_WQE_2ND_MW_TYPE_MASK        0x1 /*  (use enum rdma_mw_type) */
+#define RDMA_SQ_BIND_WQE_2ND_MW_TYPE_SHIFT       1
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_MASK      0x3F
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED1_SHIFT     2
+	uint8_t access_ctrl;
+#define RDMA_SQ_BIND_WQE_2ND_REMOTE_READ_MASK    0x1
+#define RDMA_SQ_BIND_WQE_2ND_REMOTE_READ_SHIFT   0
+#define RDMA_SQ_BIND_WQE_2ND_REMOTE_WRITE_MASK   0x1
+#define RDMA_SQ_BIND_WQE_2ND_REMOTE_WRITE_SHIFT  1
+#define RDMA_SQ_BIND_WQE_2ND_ENABLE_ATOMIC_MASK  0x1
+#define RDMA_SQ_BIND_WQE_2ND_ENABLE_ATOMIC_SHIFT 2
+#define RDMA_SQ_BIND_WQE_2ND_LOCAL_READ_MASK     0x1
+#define RDMA_SQ_BIND_WQE_2ND_LOCAL_READ_SHIFT    3
+#define RDMA_SQ_BIND_WQE_2ND_LOCAL_WRITE_MASK    0x1
+#define RDMA_SQ_BIND_WQE_2ND_LOCAL_WRITE_SHIFT   4
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED2_MASK      0x7
+#define RDMA_SQ_BIND_WQE_2ND_RESERVED2_SHIFT     5
+	uint8_t reserved3;
+	uint8_t length_hi /* upper 8 bits of the registered MW length */;
+	__le32 length_lo /* lower 32 bits of the registered MW length */;
+	__le32 parent_l_key /* l_key of the parent MR */;
+	__le32 reserved4;
+};
+
+
+/*
+ * Structure with only the SQ WQE common fields. Size is of one SQ element (16B)
+ */
+struct rdma_sq_common_wqe
+{
+	__le32 reserved1[3];
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_COMMON_WQE_COMP_FLG_MASK       0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_COMMON_WQE_COMP_FLG_SHIFT      0
+#define RDMA_SQ_COMMON_WQE_RD_FENCE_FLG_MASK   0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_COMMON_WQE_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_COMMON_WQE_INV_FENCE_FLG_MASK  0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_COMMON_WQE_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_COMMON_WQE_SE_FLG_MASK         0x1 /* If set, signal the responder to generate a solicited event on this WQE (only relevant in SENDs and RDMA write with Imm) */
+#define RDMA_SQ_COMMON_WQE_SE_FLG_SHIFT        3
+#define RDMA_SQ_COMMON_WQE_INLINE_FLG_MASK     0x1 /* if set, indicates inline data is following this WQE instead of SGEs (only relevant in SENDs and RDMA writes) */
+#define RDMA_SQ_COMMON_WQE_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_COMMON_WQE_RESERVED0_MASK      0x7
+#define RDMA_SQ_COMMON_WQE_RESERVED0_SHIFT     5
+	uint8_t wqe_size /* Size of WQE in 16B chunks including all SGEs or inline data. In case there are SGEs: set to number of SGEs + 1. In case of inline data: set to the whole number of 16B which contain the inline data + 1. */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+};
+
+
+struct rdma_sq_fmr_wqe
+{
+	struct regpair addr;
+	__le32 l_key;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_FMR_WQE_COMP_FLG_MASK                0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_FMR_WQE_COMP_FLG_SHIFT               0
+#define RDMA_SQ_FMR_WQE_RD_FENCE_FLG_MASK            0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_FMR_WQE_RD_FENCE_FLG_SHIFT           1
+#define RDMA_SQ_FMR_WQE_INV_FENCE_FLG_MASK           0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_FMR_WQE_INV_FENCE_FLG_SHIFT          2
+#define RDMA_SQ_FMR_WQE_SE_FLG_MASK                  0x1 /* Dont care for FMR wqe */
+#define RDMA_SQ_FMR_WQE_SE_FLG_SHIFT                 3
+#define RDMA_SQ_FMR_WQE_INLINE_FLG_MASK              0x1 /* Should be 0 for FMR wqe */
+#define RDMA_SQ_FMR_WQE_INLINE_FLG_SHIFT             4
+#define RDMA_SQ_FMR_WQE_DIF_ON_HOST_FLG_MASK         0x1 /* If set, indicated host memory of this WQE is DIF protected. */
+#define RDMA_SQ_FMR_WQE_DIF_ON_HOST_FLG_SHIFT        5
+#define RDMA_SQ_FMR_WQE_RESERVED0_MASK               0x3
+#define RDMA_SQ_FMR_WQE_RESERVED0_SHIFT              6
+	uint8_t wqe_size /* Size of WQE in 16B chunks */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+	uint8_t fmr_ctrl;
+#define RDMA_SQ_FMR_WQE_PAGE_SIZE_LOG_MASK           0x1F /* 0 is 4k, 1 is 8k... */
+#define RDMA_SQ_FMR_WQE_PAGE_SIZE_LOG_SHIFT          0
+#define RDMA_SQ_FMR_WQE_ZERO_BASED_MASK              0x1 /* zero based indication */
+#define RDMA_SQ_FMR_WQE_ZERO_BASED_SHIFT             5
+#define RDMA_SQ_FMR_WQE_BIND_EN_MASK                 0x1 /* indication whether bind is enabled for this MR */
+#define RDMA_SQ_FMR_WQE_BIND_EN_SHIFT                6
+#define RDMA_SQ_FMR_WQE_RESERVED1_MASK               0x1
+#define RDMA_SQ_FMR_WQE_RESERVED1_SHIFT              7
+	uint8_t access_ctrl;
+#define RDMA_SQ_FMR_WQE_REMOTE_READ_MASK             0x1
+#define RDMA_SQ_FMR_WQE_REMOTE_READ_SHIFT            0
+#define RDMA_SQ_FMR_WQE_REMOTE_WRITE_MASK            0x1
+#define RDMA_SQ_FMR_WQE_REMOTE_WRITE_SHIFT           1
+#define RDMA_SQ_FMR_WQE_ENABLE_ATOMIC_MASK           0x1
+#define RDMA_SQ_FMR_WQE_ENABLE_ATOMIC_SHIFT          2
+#define RDMA_SQ_FMR_WQE_LOCAL_READ_MASK              0x1
+#define RDMA_SQ_FMR_WQE_LOCAL_READ_SHIFT             3
+#define RDMA_SQ_FMR_WQE_LOCAL_WRITE_MASK             0x1
+#define RDMA_SQ_FMR_WQE_LOCAL_WRITE_SHIFT            4
+#define RDMA_SQ_FMR_WQE_RESERVED2_MASK               0x7
+#define RDMA_SQ_FMR_WQE_RESERVED2_SHIFT              5
+	uint8_t reserved3;
+	uint8_t length_hi /* upper 8 bits of the registered MR length */;
+	__le32 length_lo /* lower 32 bits of the registered MR length. In case of DIF the length is specified including the DIF guards. */;
+	struct regpair pbl_addr /* Address of PBL */;
+	__le32 dif_base_ref_tag /* Ref tag of the first DIF Block. */;
+	__le16 dif_app_tag /* App tag of all DIF Blocks. */;
+	__le16 dif_app_tag_mask /* Bitmask for verifying dif_app_tag. */;
+	__le16 dif_runt_crc_value /* In TX IO, in case the runt_valid_flg is set, this value is used to validate the last Block in the IO. */;
+	__le16 dif_flags;
+#define RDMA_SQ_FMR_WQE_DIF_IO_DIRECTION_FLG_MASK    0x1 /* 0=RX, 1=TX (use enum rdma_dif_io_direction_flg) */
+#define RDMA_SQ_FMR_WQE_DIF_IO_DIRECTION_FLG_SHIFT   0
+#define RDMA_SQ_FMR_WQE_DIF_BLOCK_SIZE_MASK          0x1 /* DIF block size. 0=512B 1=4096B (use enum rdma_dif_block_size) */
+#define RDMA_SQ_FMR_WQE_DIF_BLOCK_SIZE_SHIFT         1
+#define RDMA_SQ_FMR_WQE_DIF_RUNT_VALID_FLG_MASK      0x1 /* In TX IO, indicates the runt_value field is valid. In RX IO, indicates the calculated runt value is to be placed on host buffer. */
+#define RDMA_SQ_FMR_WQE_DIF_RUNT_VALID_FLG_SHIFT     2
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_CRC_GUARD_MASK  0x1 /* In TX IO, indicates CRC of each DIF guard tag is checked. */
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_CRC_GUARD_SHIFT 3
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_REF_TAG_MASK    0x1 /* In TX IO, indicates Ref tag of each DIF guard tag is checked. */
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_REF_TAG_SHIFT   4
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_APP_TAG_MASK    0x1 /* In TX IO, indicates App tag of each DIF guard tag is checked. */
+#define RDMA_SQ_FMR_WQE_DIF_VALIDATE_APP_TAG_SHIFT   5
+#define RDMA_SQ_FMR_WQE_DIF_CRC_SEED_MASK            0x1 /* DIF CRC Seed to use. 0=0x000 1=0xFFFF (use enum rdma_dif_crc_seed) */
+#define RDMA_SQ_FMR_WQE_DIF_CRC_SEED_SHIFT           6
+#define RDMA_SQ_FMR_WQE_RESERVED4_MASK               0x1FF
+#define RDMA_SQ_FMR_WQE_RESERVED4_SHIFT              7
+	__le32 Reserved5;
+};
+
+
+/*
+ * First element (16 bytes) of fmr wqe
+ */
+struct rdma_sq_fmr_wqe_1st
+{
+	struct regpair addr;
+	__le32 l_key;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_FMR_WQE_1ST_COMP_FLG_MASK         0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_FMR_WQE_1ST_COMP_FLG_SHIFT        0
+#define RDMA_SQ_FMR_WQE_1ST_RD_FENCE_FLG_MASK     0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_FMR_WQE_1ST_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_FMR_WQE_1ST_INV_FENCE_FLG_MASK    0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_FMR_WQE_1ST_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_FMR_WQE_1ST_SE_FLG_MASK           0x1 /* Dont care for FMR wqe */
+#define RDMA_SQ_FMR_WQE_1ST_SE_FLG_SHIFT          3
+#define RDMA_SQ_FMR_WQE_1ST_INLINE_FLG_MASK       0x1 /* Should be 0 for FMR wqe */
+#define RDMA_SQ_FMR_WQE_1ST_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_FMR_WQE_1ST_DIF_ON_HOST_FLG_MASK  0x1 /* If set, indicated host memory of this WQE is DIF protected. */
+#define RDMA_SQ_FMR_WQE_1ST_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_FMR_WQE_1ST_RESERVED0_MASK        0x3
+#define RDMA_SQ_FMR_WQE_1ST_RESERVED0_SHIFT       6
+	uint8_t wqe_size /* Size of WQE in 16B chunks */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+};
+
+
+/*
+ * Second element (16 bytes) of fmr wqe
+ */
+struct rdma_sq_fmr_wqe_2nd
+{
+	uint8_t fmr_ctrl;
+#define RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_MASK  0x1F /* 0 is 4k, 1 is 8k... */
+#define RDMA_SQ_FMR_WQE_2ND_PAGE_SIZE_LOG_SHIFT 0
+#define RDMA_SQ_FMR_WQE_2ND_ZERO_BASED_MASK     0x1 /* zero based indication */
+#define RDMA_SQ_FMR_WQE_2ND_ZERO_BASED_SHIFT    5
+#define RDMA_SQ_FMR_WQE_2ND_BIND_EN_MASK        0x1 /* indication whether bind is enabled for this MR */
+#define RDMA_SQ_FMR_WQE_2ND_BIND_EN_SHIFT       6
+#define RDMA_SQ_FMR_WQE_2ND_RESERVED1_MASK      0x1
+#define RDMA_SQ_FMR_WQE_2ND_RESERVED1_SHIFT     7
+	uint8_t access_ctrl;
+#define RDMA_SQ_FMR_WQE_2ND_REMOTE_READ_MASK    0x1
+#define RDMA_SQ_FMR_WQE_2ND_REMOTE_READ_SHIFT   0
+#define RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE_MASK   0x1
+#define RDMA_SQ_FMR_WQE_2ND_REMOTE_WRITE_SHIFT  1
+#define RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC_MASK  0x1
+#define RDMA_SQ_FMR_WQE_2ND_ENABLE_ATOMIC_SHIFT 2
+#define RDMA_SQ_FMR_WQE_2ND_LOCAL_READ_MASK     0x1
+#define RDMA_SQ_FMR_WQE_2ND_LOCAL_READ_SHIFT    3
+#define RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE_MASK    0x1
+#define RDMA_SQ_FMR_WQE_2ND_LOCAL_WRITE_SHIFT   4
+#define RDMA_SQ_FMR_WQE_2ND_RESERVED2_MASK      0x7
+#define RDMA_SQ_FMR_WQE_2ND_RESERVED2_SHIFT     5
+	uint8_t reserved3;
+	uint8_t length_hi /* upper 8 bits of the registered MR length */;
+	__le32 length_lo /* lower 32 bits of the registered MR length. In case of zero based MR, will hold FBO */;
+	struct regpair pbl_addr /* Address of PBL */;
+};
+
+
+/*
+ * Third element (16 bytes) of fmr wqe
+ */
+struct rdma_sq_fmr_wqe_3rd
+{
+	__le32 dif_base_ref_tag /* Ref tag of the first DIF Block. */;
+	__le16 dif_app_tag /* App tag of all DIF Blocks. */;
+	__le16 dif_app_tag_mask /* Bitmask for verifying dif_app_tag. */;
+	__le16 dif_runt_crc_value /* In TX IO, in case the runt_valid_flg is set, this value is used to validate the last Block in the IO. */;
+	__le16 dif_flags;
+#define RDMA_SQ_FMR_WQE_3RD_DIF_IO_DIRECTION_FLG_MASK    0x1 /* 0=RX, 1=TX (use enum rdma_dif_io_direction_flg) */
+#define RDMA_SQ_FMR_WQE_3RD_DIF_IO_DIRECTION_FLG_SHIFT   0
+#define RDMA_SQ_FMR_WQE_3RD_DIF_BLOCK_SIZE_MASK          0x1 /* DIF block size. 0=512B 1=4096B (use enum rdma_dif_block_size) */
+#define RDMA_SQ_FMR_WQE_3RD_DIF_BLOCK_SIZE_SHIFT         1
+#define RDMA_SQ_FMR_WQE_3RD_DIF_RUNT_VALID_FLG_MASK      0x1 /* In TX IO, indicates the runt_value field is valid. In RX IO, indicates the calculated runt value is to be placed on host buffer. */
+#define RDMA_SQ_FMR_WQE_3RD_DIF_RUNT_VALID_FLG_SHIFT     2
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_CRC_GUARD_MASK  0x1 /* In TX IO, indicates CRC of each DIF guard tag is checked. */
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_CRC_GUARD_SHIFT 3
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_REF_TAG_MASK    0x1 /* In TX IO, indicates Ref tag of each DIF guard tag is checked. */
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_REF_TAG_SHIFT   4
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_APP_TAG_MASK    0x1 /* In TX IO, indicates App tag of each DIF guard tag is checked. */
+#define RDMA_SQ_FMR_WQE_3RD_DIF_VALIDATE_APP_TAG_SHIFT   5
+#define RDMA_SQ_FMR_WQE_3RD_DIF_CRC_SEED_MASK            0x1 /* DIF CRC Seed to use. 0=0x000 1=0xFFFF (use enum rdma_dif_crc_seed) */
+#define RDMA_SQ_FMR_WQE_3RD_DIF_CRC_SEED_SHIFT           6
+#define RDMA_SQ_FMR_WQE_3RD_RESERVED4_MASK               0x1FF
+#define RDMA_SQ_FMR_WQE_3RD_RESERVED4_SHIFT              7
+	__le32 Reserved5;
+};
+
+
+struct rdma_sq_local_inv_wqe
+{
+	struct regpair reserved;
+	__le32 inv_l_key /* The invalidate local key */;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_LOCAL_INV_WQE_COMP_FLG_MASK         0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_LOCAL_INV_WQE_COMP_FLG_SHIFT        0
+#define RDMA_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_MASK     0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_LOCAL_INV_WQE_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_MASK    0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_LOCAL_INV_WQE_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_LOCAL_INV_WQE_SE_FLG_MASK           0x1 /* Dont care for local invalidate wqe */
+#define RDMA_SQ_LOCAL_INV_WQE_SE_FLG_SHIFT          3
+#define RDMA_SQ_LOCAL_INV_WQE_INLINE_FLG_MASK       0x1 /* Should be 0 for local invalidate wqe */
+#define RDMA_SQ_LOCAL_INV_WQE_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_LOCAL_INV_WQE_DIF_ON_HOST_FLG_MASK  0x1 /* If set, indicated host memory of this WQE is DIF protected. */
+#define RDMA_SQ_LOCAL_INV_WQE_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_LOCAL_INV_WQE_RESERVED0_MASK        0x3
+#define RDMA_SQ_LOCAL_INV_WQE_RESERVED0_SHIFT       6
+	uint8_t wqe_size /* Size of WQE in 16B chunks */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+};
+
+
+struct rdma_sq_rdma_wqe
+{
+	__le32 imm_data /* The immediate data in case of RDMA_WITH_IMM */;
+	__le32 length /* Total data length. If DIF on host is enabled, length does NOT include DIF guards. */;
+	__le32 xrc_srq /* Valid only when XRC is set for the QP */;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_RDMA_WQE_COMP_FLG_MASK                  0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_RDMA_WQE_COMP_FLG_SHIFT                 0
+#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_MASK              0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_RDMA_WQE_RD_FENCE_FLG_SHIFT             1
+#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_MASK             0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_RDMA_WQE_INV_FENCE_FLG_SHIFT            2
+#define RDMA_SQ_RDMA_WQE_SE_FLG_MASK                    0x1 /* If set, signal the responder to generate a solicited event on this WQE */
+#define RDMA_SQ_RDMA_WQE_SE_FLG_SHIFT                   3
+#define RDMA_SQ_RDMA_WQE_INLINE_FLG_MASK                0x1 /* if set, indicates inline data is following this WQE instead of SGEs. Applicable for RDMA_WR or RDMA_WR_WITH_IMM. Should be 0 for RDMA_RD */
+#define RDMA_SQ_RDMA_WQE_INLINE_FLG_SHIFT               4
+#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_MASK           0x1 /* If set, indicated host memory of this WQE is DIF protected. */
+#define RDMA_SQ_RDMA_WQE_DIF_ON_HOST_FLG_SHIFT          5
+#define RDMA_SQ_RDMA_WQE_RESERVED0_MASK                 0x3
+#define RDMA_SQ_RDMA_WQE_RESERVED0_SHIFT                6
+	uint8_t wqe_size /* Size of WQE in 16B chunks including all SGEs or inline data. In case there are SGEs: set to number of SGEs + 1. In case of inline data: set to the whole number of 16B which contain the inline data + 1. */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+	struct regpair remote_va /* Remote virtual address */;
+	__le32 r_key /* Remote key */;
+	uint8_t dif_flags;
+#define RDMA_SQ_RDMA_WQE_DIF_BLOCK_SIZE_MASK            0x1 /* if dif_on_host_flg set: DIF block size. 0=512B 1=4096B (use enum rdma_dif_block_size) */
+#define RDMA_SQ_RDMA_WQE_DIF_BLOCK_SIZE_SHIFT           0
+#define RDMA_SQ_RDMA_WQE_DIF_FIRST_RDMA_IN_IO_FLG_MASK  0x1 /* if dif_on_host_flg set: WQE executes first RDMA on related IO. */
+#define RDMA_SQ_RDMA_WQE_DIF_FIRST_RDMA_IN_IO_FLG_SHIFT 1
+#define RDMA_SQ_RDMA_WQE_DIF_LAST_RDMA_IN_IO_FLG_MASK   0x1 /* if dif_on_host_flg set: WQE executes last RDMA on related IO. */
+#define RDMA_SQ_RDMA_WQE_DIF_LAST_RDMA_IN_IO_FLG_SHIFT  2
+#define RDMA_SQ_RDMA_WQE_RESERVED1_MASK                 0x1F
+#define RDMA_SQ_RDMA_WQE_RESERVED1_SHIFT                3
+	uint8_t reserved2[3];
+};
+
+
+/*
+ * First element (16 bytes) of rdma wqe
+ */
+struct rdma_sq_rdma_wqe_1st
+{
+	__le32 imm_data /* The immediate data in case of RDMA_WITH_IMM */;
+	__le32 length /* Total data length */;
+	__le32 xrc_srq /* Valid only when XRC is set for the QP */;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_RDMA_WQE_1ST_COMP_FLG_MASK         0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_RDMA_WQE_1ST_COMP_FLG_SHIFT        0
+#define RDMA_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_MASK     0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_RDMA_WQE_1ST_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_MASK    0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_RDMA_WQE_1ST_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_RDMA_WQE_1ST_SE_FLG_MASK           0x1 /* If set, signal the responder to generate a solicited event on this WQE */
+#define RDMA_SQ_RDMA_WQE_1ST_SE_FLG_SHIFT          3
+#define RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG_MASK       0x1 /* if set, indicates inline data is following this WQE instead of SGEs. Applicable for RDMA_WR or RDMA_WR_WITH_IMM. Should be 0 for RDMA_RD */
+#define RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_MASK  0x1 /* If set, indicated host memory of this WQE is DIF protected. */
+#define RDMA_SQ_RDMA_WQE_1ST_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_MASK        0x3
+#define RDMA_SQ_RDMA_WQE_1ST_RESERVED0_SHIFT       6
+	uint8_t wqe_size /* Size of WQE in 16B chunks including all SGEs or inline data. In case there are SGEs: set to number of SGEs + 1. In case of inline data: set to the whole number of 16B which contain the inline data + 1. */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+};
+
+
+/*
+ * Second element (16 bytes) of rdma wqe
+ */
+struct rdma_sq_rdma_wqe_2nd
+{
+	struct regpair remote_va /* Remote virtual address */;
+	__le32 r_key /* Remote key */;
+	uint8_t dif_flags;
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_BLOCK_SIZE_MASK         0x1 /* if dif_on_host_flg set: DIF block size. 0=512B 1=4096B (use enum rdma_dif_block_size) */
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_BLOCK_SIZE_SHIFT        0
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_FIRST_SEGMENT_FLG_MASK  0x1 /* if dif_on_host_flg set: WQE executes first DIF on related MR. */
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_FIRST_SEGMENT_FLG_SHIFT 1
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_LAST_SEGMENT_FLG_MASK   0x1 /* if dif_on_host_flg set: WQE executes last DIF on related MR. */
+#define RDMA_SQ_RDMA_WQE_2ND_DIF_LAST_SEGMENT_FLG_SHIFT  2
+#define RDMA_SQ_RDMA_WQE_2ND_RESERVED1_MASK              0x1F
+#define RDMA_SQ_RDMA_WQE_2ND_RESERVED1_SHIFT             3
+	uint8_t reserved2[3];
+};
+
+
+/*
+ * SQ WQE req type enumeration
+ */
+enum rdma_sq_req_type
+{
+	RDMA_SQ_REQ_TYPE_SEND,
+	RDMA_SQ_REQ_TYPE_SEND_WITH_IMM,
+	RDMA_SQ_REQ_TYPE_SEND_WITH_INVALIDATE,
+	RDMA_SQ_REQ_TYPE_RDMA_WR,
+	RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM,
+	RDMA_SQ_REQ_TYPE_RDMA_RD,
+	RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP,
+	RDMA_SQ_REQ_TYPE_ATOMIC_ADD,
+	RDMA_SQ_REQ_TYPE_LOCAL_INVALIDATE,
+	RDMA_SQ_REQ_TYPE_FAST_MR,
+	RDMA_SQ_REQ_TYPE_BIND,
+	RDMA_SQ_REQ_TYPE_INVALID,
+	MAX_RDMA_SQ_REQ_TYPE
+};
+
+
+struct rdma_sq_send_wqe
+{
+	__le32 inv_key_or_imm_data /* the r_key to invalidate in case of SEND_WITH_INVALIDATE, or the immediate data in case of SEND_WITH_IMM */;
+	__le32 length /* Total data length */;
+	__le32 xrc_srq /* Valid only when XRC is set for the QP */;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_SEND_WQE_COMP_FLG_MASK         0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_SEND_WQE_COMP_FLG_SHIFT        0
+#define RDMA_SQ_SEND_WQE_RD_FENCE_FLG_MASK     0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_SEND_WQE_RD_FENCE_FLG_SHIFT    1
+#define RDMA_SQ_SEND_WQE_INV_FENCE_FLG_MASK    0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_SEND_WQE_INV_FENCE_FLG_SHIFT   2
+#define RDMA_SQ_SEND_WQE_SE_FLG_MASK           0x1 /* If set, signal the responder to generate a solicited event on this WQE */
+#define RDMA_SQ_SEND_WQE_SE_FLG_SHIFT          3
+#define RDMA_SQ_SEND_WQE_INLINE_FLG_MASK       0x1 /* if set, indicates inline data is following this WQE instead of SGEs */
+#define RDMA_SQ_SEND_WQE_INLINE_FLG_SHIFT      4
+#define RDMA_SQ_SEND_WQE_DIF_ON_HOST_FLG_MASK  0x1 /* Should be 0 for send wqe */
+#define RDMA_SQ_SEND_WQE_DIF_ON_HOST_FLG_SHIFT 5
+#define RDMA_SQ_SEND_WQE_RESERVED0_MASK        0x3
+#define RDMA_SQ_SEND_WQE_RESERVED0_SHIFT       6
+	uint8_t wqe_size /* Size of WQE in 16B chunks including all SGEs or inline data. In case there are SGEs: set to number of SGEs + 1. In case of inline data: set to the whole number of 16B which contain the inline data + 1. */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+	__le32 reserved1[4];
+};
+
+
+struct rdma_sq_send_wqe_1st
+{
+	__le32 inv_key_or_imm_data /* the r_key to invalidate in case of SEND_WITH_INVALIDATE, or the immediate data in case of SEND_WITH_IMM */;
+	__le32 length /* Total data length */;
+	__le32 xrc_srq /* Valid only when XRC is set for the QP */;
+	uint8_t req_type /* Type of WQE */;
+	uint8_t flags;
+#define RDMA_SQ_SEND_WQE_1ST_COMP_FLG_MASK       0x1 /* If set, completion will be generated when the WQE is completed */
+#define RDMA_SQ_SEND_WQE_1ST_COMP_FLG_SHIFT      0
+#define RDMA_SQ_SEND_WQE_1ST_RD_FENCE_FLG_MASK   0x1 /* If set, all pending RDMA read or Atomic operations will be completed before start processing this WQE */
+#define RDMA_SQ_SEND_WQE_1ST_RD_FENCE_FLG_SHIFT  1
+#define RDMA_SQ_SEND_WQE_1ST_INV_FENCE_FLG_MASK  0x1 /* If set, all pending operations will be completed before start processing this WQE */
+#define RDMA_SQ_SEND_WQE_1ST_INV_FENCE_FLG_SHIFT 2
+#define RDMA_SQ_SEND_WQE_1ST_SE_FLG_MASK         0x1 /* If set, signal the responder to generate a solicited event on this WQE */
+#define RDMA_SQ_SEND_WQE_1ST_SE_FLG_SHIFT        3
+#define RDMA_SQ_SEND_WQE_1ST_INLINE_FLG_MASK     0x1 /* if set, indicates inline data is following this WQE instead of SGEs */
+#define RDMA_SQ_SEND_WQE_1ST_INLINE_FLG_SHIFT    4
+#define RDMA_SQ_SEND_WQE_1ST_RESERVED0_MASK      0x7
+#define RDMA_SQ_SEND_WQE_1ST_RESERVED0_SHIFT     5
+	uint8_t wqe_size /* Size of WQE in 16B chunks including all SGEs or inline data. In case there are SGEs: set to number of SGEs + 1. In case of inline data: set to the whole number of 16B which contain the inline data + 1. */;
+	uint8_t prev_wqe_size /* Previous WQE size in 16B chunks */;
+};
+
+
+struct rdma_sq_send_wqe_2st
+{
+	__le32 reserved1[4];
+};
+
+
+struct rdma_sq_sge
+{
+	__le32 length /* Total length of the send. If DIF on host is enabled, SGE length includes the DIF guards. */;
+	struct regpair addr;
+	__le32 l_key;
+};
+
+
+struct rdma_srq_wqe_header
+{
+	struct regpair wr_id;
+	uint8_t num_sges /* number of SGEs in WQE */;
+	uint8_t reserved2[7];
+};
+
+struct rdma_srq_sge
+{
+	struct regpair addr;
+	__le32 length;
+	__le32 l_key;
+};
+
+/*
+ * rdma srq sge
+ */
+union rdma_srq_elm
+{
+	struct rdma_srq_wqe_header header;
+	struct rdma_srq_sge sge;
+};
+
+
+
+
+/*
+ * Rdma doorbell data for flags update
+ */
+struct rdma_pwm_flags_data
+{
+	__le16 icid /* internal CID */;
+	uint8_t agg_flags /* aggregative flags */;
+	uint8_t reserved;
+};
+
+
+/*
+ * Rdma doorbell data for SQ and RQ
+ */
+struct rdma_pwm_val16_data
+{
+	__le16 icid /* internal CID */;
+	__le16 value /* aggregated value to update */;
+};
+
+
+union rdma_pwm_val16_data_union
+{
+	struct rdma_pwm_val16_data as_struct /* Parameters field */;
+	__le32 as_dword;
+};
+
+
+/*
+ * Rdma doorbell data for CQ
+ */
+struct rdma_pwm_val32_data
+{
+	__le16 icid /* internal CID */;
+	uint8_t agg_flags /* bit for every DQ counter flags in CM context that DQ can increment */;
+	uint8_t params;
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_MASK    0x3 /* aggregative command to CM (use enum db_agg_cmd_sel) */
+#define RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT   0
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_MASK  0x1 /* enable QM bypass */
+#define RDMA_PWM_VAL32_DATA_BYPASS_EN_SHIFT 2
+#define RDMA_PWM_VAL32_DATA_RESERVED_MASK   0x1F
+#define RDMA_PWM_VAL32_DATA_RESERVED_SHIFT  3
+	__le32 value /* aggregated value to update */;
+};
+
+
+union rdma_pwm_val32_data_union
+{
+	struct rdma_pwm_val32_data as_struct /* Parameters field */;
+	struct regpair as_repair;
+};
+
+#endif /* __QED_HSI_RDMA__ */
diff --git a/providers/qedr/rdma_common.h b/providers/qedr/rdma_common.h
new file mode 100644
index 0000000..0c25793
--- /dev/null
+++ b/providers/qedr/rdma_common.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __RDMA_COMMON__
+#define __RDMA_COMMON__
+/************************/
+/* RDMA FW CONSTANTS */
+/************************/
+
+#define RDMA_RESERVED_LKEY			(0)			//Reserved lkey
+#define RDMA_RING_PAGE_SIZE			(0x1000)	//4KB pages
+
+#define	RDMA_MAX_SGE_PER_SQ_WQE		(4)		//max number of SGEs in a single request
+#define	RDMA_MAX_SGE_PER_RQ_WQE		(4)		//max number of SGEs in a single request
+
+#define RDMA_MAX_DATA_SIZE_IN_WQE	(0x7FFFFFFF)	//max size of data in single request
+
+#define RDMA_REQ_RD_ATOMIC_ELM_SIZE		(0x50)
+#define RDMA_RESP_RD_ATOMIC_ELM_SIZE	(0x20)
+
+#define RDMA_MAX_CQS				(64*1024)
+#define RDMA_MAX_TIDS				(128*1024-1)
+#define RDMA_MAX_PDS				(64*1024)
+
+#define RDMA_NUM_STATISTIC_COUNTERS			MAX_NUM_VPORTS
+#define RDMA_NUM_STATISTIC_COUNTERS_K2			MAX_NUM_VPORTS_K2
+#define RDMA_NUM_STATISTIC_COUNTERS_BB			MAX_NUM_VPORTS_BB
+
+#define RDMA_TASK_TYPE (PROTOCOLID_ROCE)
+
+
+struct rdma_srq_id
+{
+	__le16 srq_idx /* SRQ index */;
+	__le16 opaque_fid;
+};
+
+
+struct rdma_srq_producers
+{
+	__le32 sge_prod /* Current produced sge in SRQ */;
+	__le32 wqe_prod /* Current produced WQE to SRQ */;
+};
+
+#endif /* __RDMA_COMMON__ */
diff --git a/providers/qedr/roce_common.h b/providers/qedr/roce_common.h
new file mode 100644
index 0000000..b01c2ad
--- /dev/null
+++ b/providers/qedr/roce_common.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __ROCE_COMMON__
+#define __ROCE_COMMON__
+/************************************************************************/
+/* Add include to common rdma target for both eCore and protocol rdma driver */
+/************************************************************************/
+#include "rdma_common.h"
+/************************/
+/* ROCE FW CONSTANTS */
+/************************/
+
+#define ROCE_REQ_MAX_INLINE_DATA_SIZE (256)	//max size of inline data in single request
+#define ROCE_REQ_MAX_SINGLE_SQ_WQE_SIZE	(288)	//Maximum size of single SQ WQE (rdma wqe and inline data)
+
+#define ROCE_MAX_QPS				(32*1024)
+#define ROCE_DCQCN_NP_MAX_QPS  (64)	/* notification point max QPs*/
+#define ROCE_DCQCN_RP_MAX_QPS  (64)		/* reaction point max QPs*/
+
+#endif /* __ROCE_COMMON__ */
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH rdma-core 2/6] libqedr: verbs
From: Ram Amrani @ 2016-10-20  9:49 UTC (permalink / raw)
  To: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: Ariel.Elior-YGCgFSpz5w/QT0dZR+AlfA,
	Michal.Kalderon-YGCgFSpz5w/QT0dZR+AlfA, Ram Amrani, Ram Amrani
In-Reply-To: <1476956952-17388-1-git-send-email-Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>

From: Ram Amrani <Ram.Amrani-74tsMCuadCbQT0dZR+AlfA@public.gmane.org>

Introducing verbs - create, modify, query and destroy for QPs CQs and etc.

Signed-off-by: Ram Amrani <Ram.Amrani-YGCgFSpz5w/QT0dZR+AlfA@public.gmane.org>
---
 providers/qedr/qelr_verbs.c | 1948 +++++++++++++++++++++++++++++++++++++++++++
 providers/qedr/qelr_verbs.h |   83 ++
 2 files changed, 2031 insertions(+)
 create mode 100644 providers/qedr/qelr_verbs.c
 create mode 100644 providers/qedr/qelr_verbs.h

diff --git a/providers/qedr/qelr_verbs.c b/providers/qedr/qelr_verbs.c
new file mode 100644
index 0000000..496493a
--- /dev/null
+++ b/providers/qedr/qelr_verbs.c
@@ -0,0 +1,1948 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <config.h>
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <pthread.h>
+#include <malloc.h>
+#include <sys/mman.h>
+#include <netinet/in.h>
+#include <unistd.h>
+
+#include "qelr.h"
+#include "qelr_abi.h"
+#include "qelr_chain.h"
+#include "qelr_verbs.h"
+
+#define PTR_LO(x) ((uint32_t)(((uint64_t)(x)) & 0xffffffff))
+#define PTR_HI(x) ((uint32_t)(((uint64_t)(x)) >> 32))
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <execinfo.h>
+
+/* Fast path debug prints */
+#define FP_DP_VERBOSE(...)
+/* #define FP_DP_VERBOSE(...)	DP_VERBOSE(__VA_ARGS__) */
+
+#define QELR_SQE_ELEMENT_SIZE	(sizeof(struct rdma_sq_sge))
+#define QELR_RQE_ELEMENT_SIZE	(sizeof(struct rdma_rq_sge))
+#define QELR_CQE_SIZE		(sizeof(union rdma_cqe))
+
+static void qelr_inc_sw_cons_u16(struct qelr_qp_hwq_info *info)
+{
+	info->cons = (info->cons + 1) % info->max_wr;
+	info->wqe_cons++;
+}
+
+static void qelr_inc_sw_prod_u16(struct qelr_qp_hwq_info *info)
+{
+	info->prod = (info->prod + 1) % info->max_wr;
+}
+
+int qelr_query_device(struct ibv_context *context,
+		      struct ibv_device_attr *attr)
+{
+	struct ibv_query_device cmd;
+	uint64_t fw_ver;
+	unsigned int major, minor, revision, eng;
+	int status;
+
+	bzero(attr, sizeof(*attr));
+	status = ibv_cmd_query_device(context, attr, &fw_ver, &cmd,
+				      sizeof(cmd));
+
+	major = (fw_ver >> 24) & 0xff;
+	minor = (fw_ver >> 16) & 0xff;
+	revision = (fw_ver >> 8) & 0xff;
+	eng = fw_ver & 0xff;
+
+	snprintf(attr->fw_ver, sizeof(attr->fw_ver),
+		 "%d.%d.%d.%d", major, minor, revision, eng);
+
+	return status;
+}
+
+int qelr_query_port(struct ibv_context *context, uint8_t port,
+		    struct ibv_port_attr *attr)
+{
+	struct ibv_query_port cmd;
+	int status;
+
+	status = ibv_cmd_query_port(context, port, attr, &cmd, sizeof(cmd));
+	return status;
+}
+
+struct ibv_pd *qelr_alloc_pd(struct ibv_context *context)
+{
+	struct qelr_alloc_pd_req cmd;
+	struct qelr_alloc_pd_resp resp;
+	struct qelr_pd *pd;
+	struct qelr_devctx *cxt = get_qelr_ctx(context);
+
+	pd = malloc(sizeof(*pd));
+	if (!pd)
+		return NULL;
+
+	bzero(pd, sizeof(*pd));
+	memset(&cmd, 0, sizeof(cmd));
+
+	if (ibv_cmd_alloc_pd(context, &pd->ibv_pd, &cmd.cmd, sizeof(cmd),
+			     &resp.ibv_resp, sizeof(resp))) {
+		free(pd);
+		return NULL;
+	}
+
+	pd->pd_id = resp.pd_id;
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_INIT, "Allocated pd: %d\n", pd->pd_id);
+
+	return &pd->ibv_pd;
+}
+
+int qelr_dealloc_pd(struct ibv_pd *ibpd)
+{
+	int rc = 0;
+	struct qelr_pd *pd = get_qelr_pd(ibpd);
+	struct qelr_devctx *cxt = get_qelr_ctx(ibpd->context);
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_INIT, "Deallocated pd: %d\n",
+		   pd->pd_id);
+
+	rc = ibv_cmd_dealloc_pd(ibpd);
+
+	if (rc)
+		return rc;
+
+	free(pd);
+
+	return rc;
+}
+
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *ibpd, void *addr,
+			   size_t len, int access)
+{
+	struct qelr_mr *mr;
+	struct ibv_reg_mr cmd;
+	struct qelr_reg_mr_resp resp;
+	struct qelr_pd *pd = get_qelr_pd(ibpd);
+	struct qelr_devctx *cxt = get_qelr_ctx(ibpd->context);
+
+	uint64_t hca_va = (uintptr_t) addr;
+
+	mr = malloc(sizeof(*mr));
+	if (!mr)
+		return NULL;
+
+	bzero(mr, sizeof(*mr));
+
+	if (ibv_cmd_reg_mr(ibpd, addr, len, hca_va,
+			   access, &mr->ibv_mr, &cmd, sizeof(cmd),
+			   &resp.ibv_resp, sizeof(resp))) {
+		free(mr);
+		return NULL;
+	}
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_MR,
+		   "MR Register %p completed succesfully pd_id=%d addr=%p len=%zu access=%d lkey=%x rkey=%x\n",
+		   mr, pd->pd_id, addr, len, access, mr->ibv_mr.lkey,
+		   mr->ibv_mr.rkey);
+
+	return &mr->ibv_mr;
+}
+
+int qelr_dereg_mr(struct ibv_mr *mr)
+{
+	struct qelr_devctx *cxt = get_qelr_ctx(mr->context);
+	int rc;
+
+	rc = ibv_cmd_dereg_mr(mr);
+	if (rc)
+		return rc;
+
+	free(mr);
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_MR,
+		   "MR DERegister %p completed succesfully\n", mr);
+	return 0;
+}
+
+static void consume_cqe(struct qelr_cq *cq)
+{
+	if (cq->latest_cqe == cq->toggle_cqe)
+		cq->chain_toggle ^= RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+
+	cq->latest_cqe = qelr_chain_consume(&cq->chain);
+}
+
+static inline int qelr_cq_entries(int entries)
+{
+	/* FW requires an extra entry */
+	return entries + 1;
+}
+
+struct ibv_cq *qelr_create_cq(struct ibv_context *context, int cqe,
+			      struct ibv_comp_channel *channel,
+			      int comp_vector)
+{
+	struct qelr_devctx *cxt = get_qelr_ctx(context);
+	struct qelr_create_cq_resp resp;
+	struct qelr_create_cq_req cmd;
+	struct qelr_cq *cq;
+	int chain_size;
+	int rc;
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+		   "create cq: context=%p, cqe=%d, channel=%p, comp_vector=%d\n",
+		   context, cqe, channel, comp_vector);
+
+	if (!cqe || cqe > cxt->max_cqes) {
+		DP_ERR(cxt->dbg_fp,
+		       "create cq: failed. attempted to allocate %d cqes but valid range is 1...%d\n",
+		       cqe, cqe > cxt->max_cqes);
+		return NULL;
+	}
+
+	/* allocate CQ structure */
+	cq = calloc(1, sizeof(*cq));
+	if (!cq)
+		return NULL;
+
+	/* allocate CQ buffer */
+	chain_size = qelr_cq_entries(cqe) * QELR_CQE_SIZE;
+	rc = qelr_chain_alloc(&cq->chain, chain_size, cxt->kernel_page_size,
+			      QELR_CQE_SIZE);
+	if (rc)
+		goto err_0;
+
+	cmd.addr = (uintptr_t) cq->chain.addr;
+	cmd.len = cq->chain.size;
+	rc = ibv_cmd_create_cq(context, cqe, channel, comp_vector,
+			       &cq->ibv_cq, &cmd.ibv_cmd, sizeof(cmd),
+			       &resp.ibv_resp, sizeof(resp));
+	if (rc) {
+		DP_ERR(cxt->dbg_fp, "create cq: failed with rc = %d\n", rc);
+		goto err_1;
+	}
+
+	/* map the doorbell and prepare its data */
+	cq->db.data.icid = htole16(resp.icid);
+	cq->db.data.params = DB_AGG_CMD_SET <<
+		RDMA_PWM_VAL32_DATA_AGG_CMD_SHIFT;
+	cq->db_addr = cxt->db_addr + resp.db_offset;
+
+	/* point to the very last element, passing this we will toggle */
+	cq->toggle_cqe = qelr_chain_get_last_elem(&cq->chain);
+	cq->chain_toggle = RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK;
+	cq->latest_cqe = NULL; /* must be different from chain_toggle */
+	consume_cqe(cq);
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+		   "create cq: successfully created %p\n", cq);
+
+	return &cq->ibv_cq;
+
+err_1:
+	qelr_chain_free(&cq->chain);
+err_0:
+	free(cq);
+
+	return NULL;
+}
+
+int qelr_destroy_cq(struct ibv_cq *ibv_cq)
+{
+	struct qelr_devctx *cxt = get_qelr_ctx(ibv_cq->context);
+	struct qelr_cq *cq = get_qelr_cq(ibv_cq);
+	int rc;
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ, "destroy cq: %p\n", cq);
+
+	rc = ibv_cmd_destroy_cq(ibv_cq);
+	if (rc) {
+		DP_ERR(cxt->dbg_fp,
+		       "destroy cq: failed to destroy %p, got %d.\n", cq, rc);
+		return rc;
+	}
+
+	qelr_chain_free(&cq->chain);
+	free(cq);
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+		   "destroy cq: successfully destroyed %p\n", cq);
+
+	return 0;
+}
+
+static void qelr_free_rq(struct qelr_qp *qp)
+{
+	free(qp->rqe_wr_id);
+}
+
+static void qelr_free_sq(struct qelr_qp *qp)
+{
+	free(qp->wqe_wr_id);
+}
+
+static void qelr_chain_free_sq(struct qelr_qp *qp)
+{
+	qelr_chain_free(&qp->sq.chain);
+}
+
+static void qelr_chain_free_rq(struct qelr_qp *qp)
+{
+	qelr_chain_free(&qp->rq.chain);
+}
+
+static inline int qelr_create_qp_buffers_sq(struct qelr_devctx *cxt,
+					    struct qelr_qp *qp,
+					    struct ibv_qp_init_attr *attrs)
+{
+	uint32_t max_send_wr, max_send_sges, max_send_buf;
+	int chain_size;
+	int rc;
+
+	/* SQ */
+	max_send_wr = attrs->cap.max_send_wr;
+	max_send_wr = max_t(uint32_t, max_send_wr, 1);
+	max_send_wr = min_t(uint32_t, max_send_wr, cxt->max_send_wr);
+	max_send_sges = max_send_wr * cxt->sges_per_send_wr;
+	max_send_buf = max_send_sges * QELR_SQE_ELEMENT_SIZE;
+
+	chain_size = max_send_buf;
+	rc = qelr_chain_alloc(&qp->sq.chain, chain_size, cxt->kernel_page_size,
+			      QELR_SQE_ELEMENT_SIZE);
+	if (rc)
+		DP_ERR(cxt->dbg_fp, "create qp: failed to map SQ, got %d", rc);
+
+	qp->sq.max_wr = max_send_wr;
+	qp->sq.max_sges = cxt->sges_per_send_wr;
+
+	return rc;
+}
+
+static inline int qelr_create_qp_buffers_rq(struct qelr_devctx *cxt,
+					    struct qelr_qp *qp,
+					    struct ibv_qp_init_attr *attrs)
+{
+	uint32_t max_recv_wr, max_recv_sges, max_recv_buf;
+	int chain_size;
+	int rc;
+
+	/* RQ */
+	max_recv_wr = attrs->cap.max_recv_wr;
+	max_recv_wr = max_t(uint32_t, max_recv_wr, 1);
+	max_recv_wr = min_t(uint32_t, max_recv_wr, cxt->max_recv_wr);
+	max_recv_sges = max_recv_wr * cxt->sges_per_recv_wr;
+	max_recv_buf = max_recv_sges * QELR_RQE_ELEMENT_SIZE;
+	qp->rq.max_wr = max_recv_wr;
+	qp->rq.max_sges = RDMA_MAX_SGE_PER_RQ_WQE;
+
+	chain_size = max_recv_buf;
+	rc = qelr_chain_alloc(&qp->rq.chain, chain_size, cxt->kernel_page_size,
+			      QELR_RQE_ELEMENT_SIZE);
+	if (rc)
+		DP_ERR(cxt->dbg_fp, "create qp: failed to map RQ, got %d", rc);
+
+	qp->rq.max_wr = max_recv_wr;
+	qp->rq.max_sges = cxt->sges_per_recv_wr;
+
+	return rc;
+}
+
+static inline int qelr_create_qp_buffers(struct qelr_devctx *cxt,
+					 struct qelr_qp *qp,
+					 struct ibv_qp_init_attr *attrs)
+{
+	int rc;
+
+	rc = qelr_create_qp_buffers_sq(cxt, qp, attrs);
+	if (rc)
+		return rc;
+
+	rc = qelr_create_qp_buffers_rq(cxt, qp, attrs);
+	if (rc) {
+		qelr_chain_free_sq(qp);
+		return rc;
+	}
+
+	return 0;
+}
+
+static inline int qelr_configure_qp_sq(struct qelr_devctx *cxt,
+				       struct qelr_qp *qp,
+				       struct ibv_qp_init_attr *attrs,
+				       struct qelr_create_qp_resp *resp)
+{
+	qp->sq.icid = resp->sq_icid;
+	qp->sq.db_data.data.icid = htole16(resp->sq_icid);
+	qp->sq.prod = 0;
+	qp->sq.db = cxt->db_addr + resp->sq_db_offset;
+	qp->sq.edpm_db = cxt->db_addr;
+
+	/* shadow SQ */
+	qp->wqe_wr_id = calloc(qp->sq.max_wr, sizeof(*qp->wqe_wr_id));
+	if (!qp->wqe_wr_id) {
+		DP_ERR(cxt->dbg_fp,
+		       "create qp: failed shdow SQ memory allocation\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static inline int qelr_configure_qp_rq(struct qelr_devctx *cxt,
+				       struct qelr_qp *qp,
+				       struct ibv_qp_init_attr *attrs,
+				       struct qelr_create_qp_resp *resp)
+{
+	/* RQ */
+	qp->rq.icid = resp->rq_icid;
+	qp->rq.db_data.data.icid = htole16(resp->rq_icid);
+	qp->rq.db = cxt->db_addr + resp->rq_db_offset;
+	qp->rq.prod = 0;
+
+	/* shadow RQ */
+	qp->rqe_wr_id = calloc(qp->rq.max_wr, sizeof(*qp->rqe_wr_id));
+	if (!qp->rqe_wr_id) {
+		DP_ERR(cxt->dbg_fp,
+		       "create qp: failed shdow RQ memory allocation\n");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static inline int qelr_configure_qp(struct qelr_devctx *cxt, struct qelr_qp *qp,
+				    struct ibv_qp_init_attr *attrs,
+				    struct qelr_create_qp_resp *resp)
+{
+	int rc;
+
+	/* general */
+	pthread_spin_init(&qp->q_lock, PTHREAD_PROCESS_PRIVATE);
+	qp->qp_id = resp->qp_id;
+	qp->state = QELR_QPS_RST;
+	qp->sq_sig_all = attrs->sq_sig_all;
+	qp->atomic_supported = resp->atomic_supported;
+
+	rc = qelr_configure_qp_sq(cxt, qp, attrs, resp);
+	if (rc)
+		return rc;
+	rc = qelr_configure_qp_rq(cxt, qp, attrs, resp);
+	if (rc)
+		qelr_free_sq(qp);
+
+	return rc;
+}
+
+static inline void qelr_print_qp_init_attr(
+		struct qelr_devctx *cxt,
+		struct ibv_qp_init_attr *attr)
+{
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP,
+		   "create qp: send_cq=%p, recv_cq=%p, srq=%p, max_inline_data=%d, max_recv_sge=%d, max_recv_wr=%d, max_send_sge=%d, max_send_wr=%d, qp_type=%d, sq_sig_all=%d\n",
+		   attr->send_cq, attr->recv_cq, attr->srq,
+		   attr->cap.max_inline_data, attr->cap.max_recv_sge,
+		   attr->cap.max_recv_wr, attr->cap.max_send_sge,
+		   attr->cap.max_send_wr, attr->qp_type, attr->sq_sig_all);
+}
+
+static inline void
+qelr_create_qp_configure_sq_req(struct qelr_qp *qp,
+				struct qelr_create_qp_req *req)
+{
+	req->sq_addr = (uintptr_t)qp->sq.chain.addr;
+	req->sq_len = qp->sq.chain.size;
+}
+
+static inline void
+qelr_create_qp_configure_rq_req(struct qelr_qp *qp,
+				struct qelr_create_qp_req *req)
+{
+	req->rq_addr = (uintptr_t)qp->rq.chain.addr;
+	req->rq_len = qp->rq.chain.size;
+}
+
+static inline void
+qelr_create_qp_configure_req(struct qelr_qp *qp,
+			     struct qelr_create_qp_req *req)
+{
+	memset(req, 0, sizeof(*req));
+	req->qp_handle_hi = PTR_HI(qp);
+	req->qp_handle_lo = PTR_LO(qp);
+	qelr_create_qp_configure_sq_req(qp, req);
+	qelr_create_qp_configure_rq_req(qp, req);
+}
+
+struct ibv_qp *qelr_create_qp(struct ibv_pd *pd,
+			      struct ibv_qp_init_attr *attrs)
+{
+	struct qelr_devctx *cxt = get_qelr_ctx(pd->context);
+	struct qelr_create_qp_resp resp;
+	struct qelr_create_qp_req req;
+	struct qelr_qp *qp;
+	int rc;
+
+	qelr_print_qp_init_attr(cxt, attrs);
+
+	qp = calloc(1, sizeof(*qp));
+	if (!qp)
+		return NULL;
+
+	rc = qelr_create_qp_buffers(cxt, qp, attrs);
+	if (rc)
+		goto err0;
+
+	qelr_create_qp_configure_req(qp, &req);
+
+	rc = ibv_cmd_create_qp(pd, &qp->ibv_qp, attrs, &req.ibv_qp, sizeof(req),
+			       &resp.ibv_resp, sizeof(resp));
+	if (rc) {
+		DP_ERR(cxt->dbg_fp,
+		       "create qp: failed on ibv_cmd_create_qp with %d\n", rc);
+		goto err1;
+	}
+
+	rc = qelr_configure_qp(cxt, qp, attrs, &resp);
+	if (rc)
+		goto err2;
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP,
+		   "create qp: successfully created %p. handle_hi=%x handle_lo=%x\n",
+		   qp, req.qp_handle_hi, req.qp_handle_lo);
+
+	return &qp->ibv_qp;
+
+err2:
+	rc = ibv_cmd_destroy_qp(&qp->ibv_qp);
+	if (rc)
+		DP_ERR(cxt->dbg_fp, "create qp: fatal fault. rc=%d\n", rc);
+err1:
+	qelr_chain_free_sq(qp);
+	qelr_chain_free_rq(qp);
+err0:
+	free(qp);
+
+	return NULL;
+}
+
+static void qelr_print_ah_attr(struct qelr_devctx *cxt, struct ibv_ah_attr *attr)
+{
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP,
+		   "grh.dgid=[%lx:%lx], grh.flow_label=%d, grh.sgid_index=%d, grh.hop_limit=%d, grh.traffic_class=%d, dlid=%d, sl=%d, src_path_bits=%d, static_rate = %d, port_num=%d\n",
+		   attr->grh.dgid.global.interface_id,
+		   attr->grh.dgid.global.subnet_prefix,
+		   attr->grh.flow_label, attr->grh.hop_limit,
+		   attr->grh.sgid_index, attr->grh.traffic_class, attr->dlid,
+		   attr->sl, attr->src_path_bits,
+		   attr->static_rate, attr->port_num);
+}
+
+static void qelr_print_qp_attr(struct qelr_devctx *cxt, struct ibv_qp_attr *attr)
+{
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP,
+		   "\tqp_state=%d\tcur_qp_state=%d\tpath_mtu=%d\tpath_mig_state=%d\tqkey=%d\trq_psn=%d\tsq_psn=%d\tdest_qp_num=%d\tqp_access_flags=%d\tmax_inline_data=%d\tmax_recv_sge=%d\tmax_recv_wr=%d\tmax_send_sge=%d\tmax_send_wr=%d\tpkey_index=%d\talt_pkey_index=%d\ten_sqd_async_notify=%d\tsq_draining=%d\tmax_rd_atomic=%d\tmax_dest_rd_atomic=%d\tmin_rnr_timer=%d\tport_num=%d\ttimeout=%d\tretry_cnt=%d\trnr_retry=%d\talt_port_num=%d\talt_timeout=%d\n",
+		   attr->qp_state, attr->cur_qp_state, attr->path_mtu,
+		   attr->path_mig_state, attr->qkey, attr->rq_psn, attr->sq_psn,
+		   attr->dest_qp_num, attr->qp_access_flags,
+		   attr->cap.max_inline_data, attr->cap.max_recv_sge,
+		   attr->cap.max_recv_wr, attr->cap.max_send_sge,
+		   attr->cap.max_send_wr, attr->pkey_index,
+		   attr->alt_pkey_index, attr->en_sqd_async_notify,
+		   attr->sq_draining, attr->max_rd_atomic,
+		   attr->max_dest_rd_atomic, attr->min_rnr_timer,
+		   attr->port_num, attr->timeout, attr->retry_cnt,
+		   attr->rnr_retry, attr->alt_port_num, attr->alt_timeout);
+
+	qelr_print_ah_attr(cxt, &attr->ah_attr);
+	qelr_print_ah_attr(cxt, &attr->alt_ah_attr);
+}
+
+int qelr_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr,
+		    int attr_mask, struct ibv_qp_init_attr *init_attr)
+{
+	struct ibv_query_qp cmd;
+	struct qelr_devctx *cxt = get_qelr_ctx(qp->context);
+	int rc;
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP, "QP Query %p, attr_mask=0x%x\n",
+		   get_qelr_qp(qp), attr_mask);
+
+	rc = ibv_cmd_query_qp(qp, attr, attr_mask,
+			      init_attr, &cmd, sizeof(cmd));
+
+	qelr_print_qp_attr(cxt, attr);
+
+	return rc;
+}
+
+static enum qelr_qp_state get_qelr_qp_state(enum ibv_qp_state qps)
+{
+	switch (qps) {
+	case IBV_QPS_RESET:
+		return QELR_QPS_RST;
+	case IBV_QPS_INIT:
+		return QELR_QPS_INIT;
+	case IBV_QPS_RTR:
+		return QELR_QPS_RTR;
+	case IBV_QPS_RTS:
+		return QELR_QPS_RTS;
+	case IBV_QPS_SQD:
+		return QELR_QPS_SQD;
+	case IBV_QPS_SQE:
+		return QELR_QPS_SQE;
+	case IBV_QPS_ERR:
+	default:
+		return QELR_QPS_ERR;
+	};
+}
+
+static void qelr_reset_qp_hwq_info(struct qelr_qp_hwq_info *q)
+{
+	qelr_chain_reset(&q->chain);
+	q->prod = 0;
+	q->cons = 0;
+	q->wqe_cons = 0;
+	q->db_data.data.value = 0;
+}
+
+static int qelr_update_qp_state(struct qelr_qp *qp,
+				enum ibv_qp_state new_ib_state)
+{
+	int status = 0;
+	enum qelr_qp_state new_state;
+
+	new_state = get_qelr_qp_state(new_ib_state);
+
+	pthread_spin_lock(&qp->q_lock);
+
+	if (new_state == qp->state) {
+		pthread_spin_unlock(&qp->q_lock);
+		return 0;
+	}
+
+	switch (qp->state) {
+	case QELR_QPS_RST:
+		switch (new_state) {
+		case QELR_QPS_INIT:
+			qp->prev_wqe_size = 0;
+			qelr_reset_qp_hwq_info(&qp->sq);
+			qelr_reset_qp_hwq_info(&qp->rq);
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QELR_QPS_INIT:
+		/* INIT->XXX */
+		switch (new_state) {
+		case QELR_QPS_RTR:
+			/* Update doorbell (in case post_recv was done before
+			 * move to RTR)
+			 */
+			wmb();
+			writel(qp->rq.db_data.raw, qp->rq.db);
+			wc_wmb();
+			break;
+		case QELR_QPS_ERR:
+			break;
+		default:
+			/* invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QELR_QPS_RTR:
+		/* RTR->XXX */
+		switch (new_state) {
+		case QELR_QPS_RTS:
+			break;
+		case QELR_QPS_ERR:
+			break;
+		default:
+			/* invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QELR_QPS_RTS:
+		/* RTS->XXX */
+		switch (new_state) {
+		case QELR_QPS_SQD:
+		case QELR_QPS_SQE:
+			break;
+		case QELR_QPS_ERR:
+			break;
+		default:
+			/* invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QELR_QPS_SQD:
+		/* SQD->XXX */
+		switch (new_state) {
+		case QELR_QPS_RTS:
+		case QELR_QPS_SQE:
+		case QELR_QPS_ERR:
+			break;
+		default:
+			/* invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QELR_QPS_SQE:
+		switch (new_state) {
+		case QELR_QPS_RTS:
+		case QELR_QPS_ERR:
+			break;
+		default:
+			/* invalid state change. */
+			status = -EINVAL;
+			break;
+		};
+		break;
+	case QELR_QPS_ERR:
+		/* ERR->XXX */
+		switch (new_state) {
+		case QELR_QPS_RST:
+			break;
+		default:
+			status = -EINVAL;
+			break;
+		};
+		break;
+	default:
+		status = -EINVAL;
+		break;
+	};
+	if (!status)
+		qp->state = new_state;
+
+	pthread_spin_unlock(&qp->q_lock);
+
+	return status;
+}
+
+int qelr_modify_qp(struct ibv_qp *ibqp, struct ibv_qp_attr *attr,
+		     int attr_mask)
+{
+	struct ibv_modify_qp cmd;
+	struct qelr_qp *qp = get_qelr_qp(ibqp);
+	struct qelr_devctx *cxt = get_qelr_ctx(ibqp->context);
+	int rc;
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP, "QP Modify %p, attr_mask=0x%x\n",
+		   qp, attr_mask);
+
+	qelr_print_qp_attr(cxt, attr);
+
+	rc = ibv_cmd_modify_qp(ibqp, attr, attr_mask, &cmd, sizeof(cmd));
+
+	if (!rc && (attr_mask & IBV_QP_STATE)) {
+		DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP, "QP Modify state %d->%d\n",
+			   qp->state, attr->qp_state);
+		qelr_update_qp_state(qp, attr->qp_state);
+	}
+
+	return rc;
+}
+
+int qelr_destroy_qp(struct ibv_qp *ibqp)
+{
+	struct qelr_devctx *cxt = get_qelr_ctx(ibqp->context);
+	struct qelr_qp *qp = get_qelr_qp(ibqp);
+	int rc = 0;
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP, "destroy qp: %p\n", qp);
+
+	rc = ibv_cmd_destroy_qp(ibqp);
+	if (rc) {
+		DP_ERR(cxt->dbg_fp,
+		       "destroy qp: failed to destroy %p, got %d.\n", qp, rc);
+		return rc;
+	}
+
+	qelr_free_sq(qp);
+	qelr_free_rq(qp);
+	qelr_chain_free_sq(qp);
+	qelr_chain_free_rq(qp);
+	free(qp);
+
+	DP_VERBOSE(cxt->dbg_fp, QELR_MSG_QP,
+		   "destroy cq: succesfully destroyed %p\n", qp);
+
+	return 0;
+}
+
+static int sge_data_len(struct ibv_sge *sg_list, int num_sge)
+{
+	int i, len = 0;
+
+	for (i = 0; i < num_sge; i++)
+		len += sg_list[i].length;
+	return len;
+}
+
+static void swap_wqe_data64(uint64_t *p)
+{
+	int i;
+
+	for (i = 0; i < ROCE_WQE_ELEM_SIZE / sizeof(uint64_t); i++, p++)
+		*p = htobe64(htole64(*p));
+}
+
+static void qelr_init_edpm_info(struct qelr_qp *qp, struct qelr_devctx *cxt)
+{
+	memset(&qp->edpm, 0, sizeof(qp->edpm));
+
+	qp->edpm.rdma_ext = (struct qelr_rdma_ext *)&qp->edpm.dpm_payload;
+	if (qelr_chain_is_full(&qp->sq.chain))
+		qp->edpm.is_edpm = 1;
+}
+
+#define QELR_IB_OPCODE_SEND_ONLY                         0x04
+#define QELR_IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE          0x05
+#define QELR_IB_OPCODE_RDMA_WRITE_ONLY                   0x0a
+#define QELR_IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE    0x0b
+#define QELR_IS_IMM(opcode) \
+	((opcode == QELR_IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE) || \
+	 (opcode == QELR_IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE))
+
+static inline void qelr_edpm_set_msg_data(struct qelr_qp *qp,
+					  uint8_t opcode,
+					  uint16_t length,
+					  uint8_t se,
+					  uint8_t comp)
+{
+	uint32_t wqe_size = length +
+		(QELR_IS_IMM(opcode) ? sizeof(uint32_t) : 0);
+	uint32_t dpm_size = wqe_size + sizeof(struct db_roce_dpm_data);
+
+	if (!qp->edpm.is_edpm)
+		return;
+
+	SET_FIELD(qp->edpm.msg.data.params.params,
+		  DB_ROCE_DPM_PARAMS_SIZE,
+		  (dpm_size + sizeof(uint64_t) - 1) / sizeof(uint64_t));
+
+	SET_FIELD(qp->edpm.msg.data.params.params,
+		  DB_ROCE_DPM_PARAMS_DPM_TYPE, DPM_ROCE);
+
+	SET_FIELD(qp->edpm.msg.data.params.params,
+		  DB_ROCE_DPM_PARAMS_OPCODE,
+		  opcode);
+
+	SET_FIELD(qp->edpm.msg.data.params.params,
+		  DB_ROCE_DPM_PARAMS_WQE_SIZE,
+		  wqe_size);
+
+	SET_FIELD(qp->edpm.msg.data.params.params,
+		  DB_ROCE_DPM_PARAMS_COMPLETION_FLG, comp ? 1 : 0);
+
+	SET_FIELD(qp->edpm.msg.data.params.params,
+		  DB_ROCE_DPM_PARAMS_S_FLG,
+		  se ? 1 : 0);
+}
+
+static inline void qelr_edpm_set_inv_imm(struct qelr_qp *qp,
+					 uint32_t inv_key_or_imm_data)
+{
+	if (!qp->edpm.is_edpm)
+		return;
+
+	memcpy(&qp->edpm.dpm_payload[qp->edpm.dpm_payload_offset],
+	       &inv_key_or_imm_data, sizeof(inv_key_or_imm_data));
+
+	qp->edpm.dpm_payload_offset += sizeof(inv_key_or_imm_data);
+	qp->edpm.dpm_payload_size += sizeof(inv_key_or_imm_data);
+}
+
+static inline void qelr_edpm_set_rdma_ext(struct qelr_qp *qp,
+					  uint64_t remote_addr,
+					  uint32_t rkey)
+{
+	if (!qp->edpm.is_edpm)
+		return;
+
+	qp->edpm.rdma_ext->remote_va = htonll(remote_addr);
+	qp->edpm.rdma_ext->remote_key = htonl(rkey);
+	qp->edpm.dpm_payload_offset += sizeof(*qp->edpm.rdma_ext);
+	qp->edpm.dpm_payload_size += sizeof(*qp->edpm.rdma_ext);
+}
+
+static inline void qelr_edpm_set_payload(struct qelr_qp *qp, char *buf,
+					 uint32_t length)
+{
+	if (!qp->edpm.is_edpm)
+		return;
+
+	memcpy(&qp->edpm.dpm_payload[qp->edpm.dpm_payload_offset],
+	       buf,
+	       length);
+
+	qp->edpm.dpm_payload_offset += length;
+}
+
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+
+static uint32_t qelr_prepare_sq_inline_data(struct qelr_qp *qp,
+					    uint8_t *wqe_size,
+					    struct ibv_send_wr *wr,
+					    struct ibv_send_wr **bad_wr,
+					    uint8_t *bits, uint8_t bit)
+{
+	int i, seg_siz;
+	char *seg_prt, *wqe;
+	uint32_t data_size = sge_data_len(wr->sg_list, wr->num_sge);
+
+	if (data_size > ROCE_REQ_MAX_INLINE_DATA_SIZE) {
+		DP_ERR(stderr, "Too much inline data in WR: %d\n", data_size);
+		*bad_wr = wr;
+		return 0;
+	}
+
+	if (!data_size)
+		return data_size;
+
+	/* set the bit */
+	*bits |= bit;
+
+	seg_prt = NULL;
+	wqe = NULL;
+	seg_siz = 0;
+
+	/* copy data inline */
+	for (i = 0; i < wr->num_sge; i++) {
+		uint32_t len = wr->sg_list[i].length;
+		void *src = (void *)wr->sg_list[i].addr;
+
+		qelr_edpm_set_payload(qp, src, wr->sg_list[i].length);
+
+		while (len > 0) {
+			uint32_t cur;
+
+			/* new segment required */
+			if (!seg_siz) {
+				wqe = (char *)qelr_chain_produce(&qp->sq.chain);
+				seg_prt = wqe;
+				seg_siz = sizeof(struct rdma_sq_common_wqe);
+				(*wqe_size)++;
+			}
+
+			/* calculate currently allowed length */
+			cur = MIN(len, seg_siz);
+
+			memcpy(seg_prt, src, cur);
+
+			/* update segment variables */
+			seg_prt += cur;
+			seg_siz -= cur;
+			/* update sge variables */
+			src += cur;
+			len -= cur;
+
+			/* swap fully-completed segments */
+			if (!seg_siz)
+				swap_wqe_data64((uint64_t *)wqe);
+		}
+	}
+
+	/* swap last not completed segment */
+	if (seg_siz)
+		swap_wqe_data64((uint64_t *)wqe);
+
+	if (qp->edpm.is_edpm) {
+		qp->edpm.dpm_payload_size += data_size;
+		qp->edpm.rdma_ext->dma_length = htonl(data_size);
+	}
+
+	return data_size;
+}
+
+static uint32_t qelr_prepare_sq_sges(struct qelr_qp *qp,
+				     uint8_t *wqe_size,
+				     struct ibv_send_wr *wr)
+{
+	uint32_t data_size = 0;
+	int i;
+
+	for (i = 0; i < wr->num_sge; i++) {
+		struct rdma_sq_sge *sge = qelr_chain_produce(&qp->sq.chain);
+
+		TYPEPTR_ADDR_SET(sge, addr, wr->sg_list[i].addr);
+		sge->l_key = htole32(wr->sg_list[i].lkey);
+		sge->length = htole32(wr->sg_list[i].length);
+		data_size += wr->sg_list[i].length;
+	}
+
+	if (wqe_size)
+		*wqe_size += wr->num_sge;
+
+	return data_size;
+}
+
+static uint32_t qelr_prepare_sq_rdma_data(struct qelr_qp *qp,
+					  struct rdma_sq_rdma_wqe_1st *rwqe,
+					  struct rdma_sq_rdma_wqe_2nd *rwqe2,
+					  struct ibv_send_wr *wr,
+					  struct ibv_send_wr **bad_wr)
+{
+	memset(rwqe2, 0, sizeof(*rwqe2));
+	rwqe2->r_key = htole32(wr->wr.rdma.rkey);
+	TYPEPTR_ADDR_SET(rwqe2, remote_va, wr->wr.rdma.remote_addr);
+
+	if (wr->send_flags & IBV_SEND_INLINE) {
+		uint8_t flags = 0;
+
+		SET_FIELD2(flags, RDMA_SQ_RDMA_WQE_1ST_INLINE_FLG, 1);
+		return qelr_prepare_sq_inline_data(qp, &rwqe->wqe_size, wr,
+						   bad_wr, &rwqe->flags, flags);
+	}
+	/* else */
+	qp->edpm.is_edpm = 0;
+
+	return qelr_prepare_sq_sges(qp, &rwqe->wqe_size, wr);
+}
+
+static uint32_t qelr_prepare_sq_send_data(struct qelr_qp *qp,
+					  struct rdma_sq_send_wqe_1st *swqe,
+					  struct rdma_sq_send_wqe_2st *swqe2,
+					  struct ibv_send_wr *wr,
+					  struct ibv_send_wr **bad_wr)
+{
+	memset(swqe2, 0, sizeof(*swqe2));
+	if (wr->send_flags & IBV_SEND_INLINE) {
+		uint8_t flags = 0;
+
+		SET_FIELD2(flags, RDMA_SQ_SEND_WQE_INLINE_FLG, 1);
+		return qelr_prepare_sq_inline_data(qp, &swqe->wqe_size, wr,
+						   bad_wr, &swqe->flags, flags);
+	}
+
+	qp->edpm.is_edpm = 0;
+
+	/* else */
+
+	return qelr_prepare_sq_sges(qp, &swqe->wqe_size, wr);
+}
+
+static enum ibv_wc_opcode qelr_ibv_to_wc_opcode(enum ibv_wr_opcode opcode)
+{
+	switch (opcode) {
+	case IBV_WR_RDMA_WRITE:
+	case IBV_WR_RDMA_WRITE_WITH_IMM:
+		return IBV_WC_RDMA_WRITE;
+	case IBV_WR_SEND_WITH_IMM:
+	case IBV_WR_SEND:
+		return IBV_WC_SEND;
+	case IBV_WR_RDMA_READ:
+		return IBV_WC_RDMA_READ;
+	case IBV_WR_ATOMIC_CMP_AND_SWP:
+		return IBV_WC_COMP_SWAP;
+	case IBV_WR_ATOMIC_FETCH_AND_ADD:
+		return IBV_WC_FETCH_ADD;
+	default:
+		return IBV_WC_SEND;
+	}
+}
+
+static void doorbell_edpm_qp(struct qelr_qp *qp)
+{
+	uint32_t offset = 0;
+	uint64_t data;
+	uint64_t *dpm_payload = (uint64_t *)qp->edpm.dpm_payload;
+	uint32_t num_dwords;
+	int bytes = 0;
+
+	if (!qp->edpm.is_edpm)
+		return;
+
+	wmb();
+
+	qp->edpm.msg.data.icid = qp->sq.db_data.data.icid;
+	qp->edpm.msg.data.prod_val = qp->sq.db_data.data.value;
+
+	writeq(qp->edpm.msg.raw, qp->sq.edpm_db);
+
+	bytes += sizeof(uint64_t);
+
+	num_dwords = (qp->edpm.dpm_payload_size + sizeof(uint64_t) - 1) /
+		sizeof(uint64_t);
+
+	while (offset < num_dwords) {
+		data = dpm_payload[offset];
+
+		writeq(data,
+		       qp->sq.edpm_db + sizeof(qp->edpm.msg.data) + offset *
+		       sizeof(uint64_t));
+
+		bytes += sizeof(uint64_t);
+		/* Need to place a barrier after every 64 bytes */
+		if (bytes == 64) {
+			wc_wmb();
+			bytes = 0;
+		}
+		offset++;
+	}
+
+	wc_wmb();
+}
+
+int qelr_post_send(struct ibv_qp *ib_qp, struct ibv_send_wr *wr,
+		   struct ibv_send_wr **bad_wr)
+{
+	int status = 0;
+	struct qelr_qp *qp = get_qelr_qp(ib_qp);
+	struct qelr_devctx *cxt = get_qelr_ctx(ib_qp->context);
+	uint8_t se, comp, fence;
+	uint16_t db_val;
+	*bad_wr = NULL;
+
+	pthread_spin_lock(&qp->q_lock);
+
+	if (qp->state != QELR_QPS_RTS && qp->state != QELR_QPS_SQD) {
+		pthread_spin_unlock(&qp->q_lock);
+		*bad_wr = wr;
+		return -EINVAL;
+	}
+
+	while (wr) {
+		struct rdma_sq_common_wqe *wqe;
+		struct rdma_sq_send_wqe_1st *swqe;
+		struct rdma_sq_send_wqe_2st *swqe2;
+		struct rdma_sq_rdma_wqe_1st *rwqe;
+		struct rdma_sq_rdma_wqe_2nd *rwqe2;
+		struct rdma_sq_atomic_wqe_1st *awqe1;
+		struct rdma_sq_atomic_wqe_2nd *awqe2;
+		struct rdma_sq_atomic_wqe_3rd *awqe3;
+
+		if ((qelr_chain_get_elem_left_u32(&qp->sq.chain) <
+					QELR_MAX_SQ_WQE_SIZE) ||
+		     (wr->num_sge > qp->sq.max_sges)) {
+			status = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+
+		qelr_init_edpm_info(qp, cxt);
+
+		wqe = qelr_chain_produce(&qp->sq.chain);
+
+		comp = (!!(wr->send_flags & IBV_SEND_SIGNALED)) ||
+				(!!qp->sq_sig_all);
+		qp->wqe_wr_id[qp->sq.prod].signaled = comp;
+
+		/* common fields */
+		wqe->flags = 0;
+		se = !!(wr->send_flags & IBV_SEND_SOLICITED);
+		fence = !!(wr->send_flags & IBV_SEND_FENCE);
+		SET_FIELD2(wqe->flags, RDMA_SQ_COMMON_WQE_SE_FLG, se);
+		SET_FIELD2(wqe->flags, RDMA_SQ_COMMON_WQE_COMP_FLG, comp);
+		SET_FIELD2(wqe->flags, RDMA_SQ_COMMON_WQE_RD_FENCE_FLG, fence);
+		wqe->prev_wqe_size = qp->prev_wqe_size;
+
+		qp->wqe_wr_id[qp->sq.prod].opcode =
+		qelr_ibv_to_wc_opcode(wr->opcode);
+
+		switch (wr->opcode) {
+		case IBV_WR_SEND_WITH_IMM:
+			wqe->req_type = RDMA_SQ_REQ_TYPE_SEND_WITH_IMM;
+			swqe = (struct rdma_sq_send_wqe_1st *)wqe;
+
+			swqe->wqe_size = 2;
+			swqe2 = (struct rdma_sq_send_wqe_2st *)
+					qelr_chain_produce(&qp->sq.chain);
+			swqe->inv_key_or_imm_data =
+					htonl(htole32(wr->imm_data));
+			qelr_edpm_set_inv_imm(qp, swqe->inv_key_or_imm_data);
+			swqe->length = htole32(
+					qelr_prepare_sq_send_data(qp, swqe,
+								  swqe2, wr,
+								  bad_wr));
+			qelr_edpm_set_msg_data(qp,
+					       QELR_IB_OPCODE_SEND_ONLY_WITH_IMMEDIATE,
+					       swqe->length,
+					       se, comp);
+			qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
+			qp->prev_wqe_size = swqe->wqe_size;
+			qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
+			FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				      "SEND w/ IMM length = %d imm data=%x\n",
+				      swqe->length, wr->imm_data);
+			break;
+
+		case IBV_WR_SEND:
+			wqe->req_type = RDMA_SQ_REQ_TYPE_SEND;
+			swqe = (struct rdma_sq_send_wqe_1st *)wqe;
+
+			swqe->wqe_size = 2;
+			swqe2 = (struct rdma_sq_send_wqe_2st *)
+					qelr_chain_produce(&qp->sq.chain);
+			swqe->length = htole32(
+					qelr_prepare_sq_send_data(qp, swqe,
+								  swqe2, wr,
+								  bad_wr));
+			qelr_edpm_set_msg_data(qp, QELR_IB_OPCODE_SEND_ONLY,
+					       swqe->length,
+					       se, comp);
+			qp->wqe_wr_id[qp->sq.prod].wqe_size = swqe->wqe_size;
+			qp->prev_wqe_size = swqe->wqe_size;
+			qp->wqe_wr_id[qp->sq.prod].bytes_len = swqe->length;
+			FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				      "SEND w/o IMM length = %d\n",
+				      swqe->length);
+			break;
+
+		case IBV_WR_RDMA_WRITE_WITH_IMM:
+			wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR_WITH_IMM;
+			rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
+
+			rwqe->wqe_size = 2;
+			rwqe->imm_data = htonl(htole32(wr->imm_data));
+			qelr_edpm_set_rdma_ext(qp, wr->wr.rdma.remote_addr,
+					       wr->wr.rdma.rkey);
+			qelr_edpm_set_inv_imm(qp, rwqe->imm_data);
+			rwqe2 = (struct rdma_sq_rdma_wqe_2nd *)
+					qelr_chain_produce(&qp->sq.chain);
+			rwqe->length = htole32(
+					qelr_prepare_sq_rdma_data(qp, rwqe,
+								  rwqe2, wr,
+								  bad_wr));
+			qelr_edpm_set_msg_data(qp,
+					       QELR_IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE,
+					       rwqe->length + sizeof(*qp->edpm.rdma_ext),
+					       se, comp);
+			qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
+			qp->prev_wqe_size = rwqe->wqe_size;
+			qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
+			FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				      "RDMA WRITE w/ IMM length = %d imm data=%x\n",
+				      rwqe->length, rwqe->imm_data);
+			break;
+
+		case IBV_WR_RDMA_WRITE:
+			wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_WR;
+			rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
+
+			rwqe->wqe_size = 2;
+			qelr_edpm_set_rdma_ext(qp, wr->wr.rdma.remote_addr,
+					       wr->wr.rdma.rkey);
+			rwqe2 = (struct rdma_sq_rdma_wqe_2nd *)
+					qelr_chain_produce(&qp->sq.chain);
+			rwqe->length = htole32(
+				qelr_prepare_sq_rdma_data(qp, rwqe, rwqe2, wr,
+							  bad_wr));
+			qelr_edpm_set_msg_data(qp,
+					       QELR_IB_OPCODE_RDMA_WRITE_ONLY,
+					       rwqe->length + sizeof(*qp->edpm.rdma_ext),
+					       se, comp);
+			qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
+			qp->prev_wqe_size = rwqe->wqe_size;
+			qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
+			FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				      "RDMA WRITE w/o IMM length = %d\n",
+				      rwqe->length);
+			break;
+
+		case IBV_WR_RDMA_READ:
+			wqe->req_type = RDMA_SQ_REQ_TYPE_RDMA_RD;
+			rwqe = (struct rdma_sq_rdma_wqe_1st *)wqe;
+
+			rwqe->wqe_size = 2;
+			rwqe2 = (struct rdma_sq_rdma_wqe_2nd *)
+					qelr_chain_produce(&qp->sq.chain);
+			rwqe->length = htole32(
+					qelr_prepare_sq_rdma_data(qp, rwqe,
+								  rwqe2, wr,
+								  bad_wr));
+
+			qp->wqe_wr_id[qp->sq.prod].wqe_size = rwqe->wqe_size;
+			qp->prev_wqe_size = rwqe->wqe_size;
+			qp->wqe_wr_id[qp->sq.prod].bytes_len = rwqe->length;
+			FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				      "RDMA READ length = %d\n", rwqe->length);
+			break;
+
+		case IBV_WR_ATOMIC_CMP_AND_SWP:
+		case IBV_WR_ATOMIC_FETCH_AND_ADD:
+			FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ, "ATOMIC\n");
+			if (!qp->atomic_supported) {
+				DP_ERR(cxt->dbg_fp,
+				       "Atomic not supported on this machine\n");
+				status = -EINVAL;
+				*bad_wr = wr;
+				break;
+			}
+			awqe1 = (struct rdma_sq_atomic_wqe_1st *)wqe;
+			awqe1->wqe_size = 4;
+
+			awqe2 = (struct rdma_sq_atomic_wqe_2nd *)
+					qelr_chain_produce(&qp->sq.chain);
+			TYPEPTR_ADDR_SET(awqe2, remote_va,
+					 wr->wr.atomic.remote_addr);
+			awqe2->r_key = htole32(wr->wr.atomic.rkey);
+
+			awqe3 = (struct rdma_sq_atomic_wqe_3rd *)
+				qelr_chain_produce(&qp->sq.chain);
+
+			if (wr->opcode == IBV_WR_ATOMIC_FETCH_AND_ADD) {
+				wqe->req_type = RDMA_SQ_REQ_TYPE_ATOMIC_ADD;
+				TYPEPTR_ADDR_SET(awqe3, swap_data,
+						 wr->wr.atomic.compare_add);
+			} else {
+				wqe->req_type =
+					RDMA_SQ_REQ_TYPE_ATOMIC_CMP_AND_SWAP;
+				TYPEPTR_ADDR_SET(awqe3, swap_data,
+						 wr->wr.atomic.swap);
+				TYPEPTR_ADDR_SET(awqe3, cmp_data,
+						 wr->wr.atomic.compare_add);
+			}
+
+			qelr_prepare_sq_sges(qp, NULL, wr);
+
+			qp->wqe_wr_id[qp->sq.prod].wqe_size = awqe1->wqe_size;
+			qp->prev_wqe_size = awqe1->wqe_size;
+
+			break;
+
+		default:
+			*bad_wr = wr;
+			break;
+		}
+
+		if (*bad_wr) {
+			/* restore prod to its position before this WR was
+			 * processed
+			 */
+			qelr_chain_set_prod(&qp->sq.chain,
+					    le16toh(qp->sq.db_data.data.value),
+					    wqe);
+			/* restore prev_wqe_size */
+			qp->prev_wqe_size = wqe->prev_wqe_size;
+			status = -EINVAL;
+			DP_ERR(cxt->dbg_fp, "POST SEND FAILED\n");
+			break; /* out of the loop */
+		}
+
+		qp->wqe_wr_id[qp->sq.prod].wr_id = wr->wr_id;
+
+		qelr_inc_sw_prod_u16(&qp->sq);
+
+		db_val = le16toh(qp->sq.db_data.data.value) + 1;
+		qp->sq.db_data.data.value = htole16(db_val);
+
+		wr = wr->next;
+
+		/* Doorbell */
+		doorbell_edpm_qp(qp);
+	}
+
+	if (!qp->edpm.is_edpm) {
+		wmb();
+
+		writel(qp->sq.db_data.raw, qp->sq.db);
+
+		wc_wmb();
+	}
+
+	pthread_spin_unlock(&qp->q_lock);
+
+	return status;
+}
+
+int qelr_post_recv(struct ibv_qp *ibqp, struct ibv_recv_wr *wr,
+		   struct ibv_recv_wr **bad_wr)
+{
+	int status = 0;
+	struct qelr_qp *qp =  get_qelr_qp(ibqp);
+	struct qelr_devctx *cxt = get_qelr_ctx(ibqp->context);
+	uint16_t db_val;
+
+	pthread_spin_lock(&qp->q_lock);
+
+	if (qp->state == QELR_QPS_RST || qp->state == QELR_QPS_ERR) {
+		pthread_spin_unlock(&qp->q_lock);
+		*bad_wr = wr;
+		return -EINVAL;
+	}
+
+	while (wr) {
+		int i;
+
+		if (qelr_chain_get_elem_left_u32(&qp->rq.chain) <
+		    QELR_MAX_RQ_WQE_SIZE || wr->num_sge > qp->rq.max_sges) {
+			DP_ERR(cxt->dbg_fp,
+			       "Can't post WR  (%d < %d) || (%d > %d)\n",
+			       qelr_chain_get_elem_left_u32(&qp->rq.chain),
+			       QELR_MAX_RQ_WQE_SIZE, wr->num_sge,
+			       qp->rq.max_sges);
+			status = -ENOMEM;
+			*bad_wr = wr;
+			break;
+		}
+		FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+			      "RQ WR: SGEs: %d with wr_id[%d] = %lx\n",
+			      wr->num_sge, qp->rq.prod, wr->wr_id);
+		for (i = 0; i < wr->num_sge; i++) {
+			uint32_t flags = 0;
+			struct rdma_rq_sge *rqe;
+
+			/* first one must include the number of SGE in the
+			 * list
+			 */
+			if (!i)
+				SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES,
+					  wr->num_sge);
+
+			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY,
+				  wr->sg_list[i].lkey);
+			rqe = qelr_chain_produce(&qp->rq.chain);
+			RQ_SGE_SET(rqe, wr->sg_list[i].addr,
+				   wr->sg_list[i].length, flags);
+			FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				      "[%d]: len %d key %x addr %x:%x\n", i,
+				      rqe->length, rqe->flags, rqe->addr.hi,
+				      rqe->addr.lo);
+		}
+		/* Special case of no sges. FW requires between 1-4 sges...
+		 * in this case we need to post 1 sge with length zero. this is
+		 * because rdma write with immediate consumes an RQ.
+		 */
+		if (!wr->num_sge) {
+			uint32_t flags = 0;
+			struct rdma_rq_sge *rqe;
+
+			/* first one must include the number of SGE in the
+			 * list
+			 */
+			SET_FIELD(flags, RDMA_RQ_SGE_L_KEY, 0);
+			SET_FIELD(flags, RDMA_RQ_SGE_NUM_SGES, 1);
+
+			rqe = qelr_chain_produce(&qp->rq.chain);
+			RQ_SGE_SET(rqe, 0, 0, flags);
+			i = 1;
+		}
+
+		qp->rqe_wr_id[qp->rq.prod].wr_id = wr->wr_id;
+		qp->rqe_wr_id[qp->rq.prod].wqe_size = i;
+
+		qelr_inc_sw_prod_u16(&qp->rq);
+
+		wmb();
+
+		db_val = le16toh(qp->rq.db_data.data.value) + 1;
+		qp->rq.db_data.data.value = htole16(db_val);
+
+		writel(qp->rq.db_data.raw, qp->rq.db);
+
+		wc_wmb();
+
+		wr = wr->next;
+	}
+
+	FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ, "POST: Elements in RespQ: %d\n",
+		      qelr_chain_get_elem_left_u32(&qp->rq.chain));
+	pthread_spin_unlock(&qp->q_lock);
+
+	return status;
+}
+
+static int is_valid_cqe(struct qelr_cq *cq, union rdma_cqe *cqe)
+{
+	struct rdma_cqe_requester *resp_cqe = &cqe->req;
+
+	return (resp_cqe->flags & RDMA_CQE_REQUESTER_TOGGLE_BIT_MASK) ==
+		cq->chain_toggle;
+}
+
+static enum rdma_cqe_type cqe_get_type(union rdma_cqe *cqe)
+{
+	struct rdma_cqe_requester *resp_cqe = &cqe->req;
+
+	return GET_FIELD(resp_cqe->flags, RDMA_CQE_REQUESTER_TYPE);
+}
+
+static struct qelr_qp *cqe_get_qp(union rdma_cqe *cqe)
+{
+	struct rdma_cqe_requester *resp_cqe = &cqe->req;
+	struct qelr_qp *qp;
+
+	qp = (struct qelr_qp *)HILO_U64(resp_cqe->qp_handle.hi,
+					resp_cqe->qp_handle.lo);
+	return qp;
+}
+
+static int process_req(struct qelr_qp *qp, struct qelr_cq *cq, int num_entries,
+		       struct ibv_wc *wc, uint16_t hw_cons,
+		       enum ibv_wc_status status, int force)
+{
+	struct qelr_devctx *cxt = get_qelr_ctx(qp->ibv_qp.context);
+	uint16_t cnt = 0;
+
+	while (num_entries && qp->sq.wqe_cons != hw_cons) {
+		if (!qp->wqe_wr_id[qp->sq.cons].signaled && !force) {
+			/* skip WC */
+			goto next_cqe;
+		}
+
+		/* fill WC */
+		wc->status = status;
+		wc->wc_flags = 0;
+		wc->qp_num = qp->qp_id;
+
+		/* common section */
+		wc->wr_id = qp->wqe_wr_id[qp->sq.cons].wr_id;
+		wc->opcode = qp->wqe_wr_id[qp->sq.cons].opcode;
+
+		switch (wc->opcode) {
+		case IBV_WC_RDMA_WRITE:
+			wc->byte_len = qp->wqe_wr_id[qp->sq.cons].bytes_len;
+			DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				   "POLL REQ CQ: IBV_WC_RDMA_WRITE byte_len=%d\n",
+				   qp->wqe_wr_id[qp->sq.cons].bytes_len);
+			break;
+		case IBV_WC_COMP_SWAP:
+		case IBV_WC_FETCH_ADD:
+			wc->byte_len = 8;
+			break;
+		case IBV_WC_RDMA_READ:
+		case IBV_WC_SEND:
+		case IBV_WC_BIND_MW:
+			DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				   "POLL REQ CQ: IBV_WC_RDMA_READ / IBV_WC_SEND\n");
+			break;
+		default:
+			break;
+		}
+
+		num_entries--;
+		wc++;
+		cnt++;
+next_cqe:
+		while (qp->wqe_wr_id[qp->sq.cons].wqe_size--)
+			qelr_chain_consume(&qp->sq.chain);
+		qelr_inc_sw_cons_u16(&qp->sq);
+	}
+
+	return cnt;
+}
+
+static int qelr_poll_cq_req(struct qelr_qp *qp, struct qelr_cq *cq,
+			    int num_entries, struct ibv_wc *wc,
+			    struct rdma_cqe_requester *req)
+{
+	struct qelr_devctx *cxt = get_qelr_ctx(qp->ibv_qp.context);
+	int cnt = 0;
+
+	switch (req->status) {
+	case RDMA_CQE_REQ_STS_OK:
+		cnt = process_req(qp, cq, num_entries, wc, req->sq_cons,
+				  IBV_WC_SUCCESS, 0);
+		break;
+	case RDMA_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR:
+		DP_ERR(cxt->dbg_fp,
+		       "Error: POLL CQ with ROCE_CQE_REQ_STS_WORK_REQUEST_FLUSHED_ERR. QP icid=0x%x\n",
+		       qp->sq.icid);
+		cnt = process_req(qp, cq, num_entries, wc, req->sq_cons,
+				  IBV_WC_WR_FLUSH_ERR, 0);
+		break;
+	default: /* other errors case */
+		/* process all WQE before the consumer */
+		qp->state = QELR_QPS_ERR;
+		cnt = process_req(qp, cq, num_entries, wc, req->sq_cons - 1,
+				  IBV_WC_SUCCESS, 0);
+		wc += cnt;
+		/* if we have extra WC fill it with actual error info */
+		if (cnt < num_entries) {
+			enum ibv_wc_status wc_status;
+
+			switch (req->status) {
+			case	RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_BAD_RESPONSE_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_BAD_RESP_ERR;
+				break;
+			case	RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_LENGTH_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_LOC_LEN_ERR;
+				break;
+			case    RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_QP_OPERATION_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_LOC_QP_OP_ERR;
+				break;
+			case    RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_LOCAL_PROTECTION_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_LOC_PROT_ERR;
+				break;
+			case    RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_MEMORY_MGT_OPERATION_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_MW_BIND_ERR;
+				break;
+			case    RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_INVALID_REQUEST_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_REM_INV_REQ_ERR;
+				break;
+			case    RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_ACCESS_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_REM_ACCESS_ERR;
+				break;
+			case    RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_REMOTE_OPERATION_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_REM_OP_ERR;
+				break;
+			case    RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "Error: POLL CQ with RDMA_CQE_REQ_STS_RNR_NAK_RETRY_CNT_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_RNR_RETRY_EXC_ERR;
+				break;
+			case    RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR:
+				DP_ERR(cxt->dbg_fp,
+				       "RDMA_CQE_REQ_STS_TRANSPORT_RETRY_CNT_ERR. QP icid=0x%x\n",
+				       qp->sq.icid);
+				wc_status = IBV_WC_RETRY_EXC_ERR;
+				break;
+			default:
+				DP_ERR(cxt->dbg_fp,
+				       "IBV_WC_GENERAL_ERR. QP icid=0x%x\n",
+					qp->sq.icid);
+				wc_status = IBV_WC_GENERAL_ERR;
+			}
+
+			cnt += process_req(qp, cq, 1, wc, req->sq_cons,
+					   wc_status, 1 /* force use of WC */);
+		}
+	}
+
+	return cnt;
+}
+
+static void __process_resp_one(struct qelr_qp *qp, struct qelr_cq *cq,
+			       struct ibv_wc *wc,
+			       struct rdma_cqe_responder *resp, uint64_t wr_id)
+{
+	struct qelr_devctx *cxt = get_qelr_ctx(qp->ibv_qp.context);
+	enum ibv_wc_status wc_status = IBV_WC_SUCCESS;
+	uint8_t flags;
+
+	wc->opcode = IBV_WC_RECV;
+
+	FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ, "\n");
+
+	switch (resp->status) {
+	case RDMA_CQE_RESP_STS_LOCAL_ACCESS_ERR:
+		wc_status = IBV_WC_LOC_ACCESS_ERR;
+		break;
+	case RDMA_CQE_RESP_STS_LOCAL_LENGTH_ERR:
+		wc_status = IBV_WC_LOC_LEN_ERR;
+		break;
+	case RDMA_CQE_RESP_STS_LOCAL_QP_OPERATION_ERR:
+		wc_status = IBV_WC_LOC_QP_OP_ERR;
+		break;
+	case RDMA_CQE_RESP_STS_LOCAL_PROTECTION_ERR:
+		wc_status = IBV_WC_LOC_PROT_ERR;
+		break;
+	case RDMA_CQE_RESP_STS_MEMORY_MGT_OPERATION_ERR:
+		wc_status = IBV_WC_MW_BIND_ERR;
+		break;
+	case RDMA_CQE_RESP_STS_REMOTE_INVALID_REQUEST_ERR:
+		wc_status = IBV_WC_REM_INV_RD_REQ_ERR;
+		break;
+	case RDMA_CQE_RESP_STS_OK:
+		wc_status = IBV_WC_SUCCESS;
+		wc->byte_len = le32toh(resp->length);
+
+		flags = resp->flags & QELR_RESP_RDMA_IMM;
+
+		switch (flags) {
+		case QELR_RESP_RDMA_IMM:
+			/* update opcode */
+			wc->opcode = IBV_WC_RECV_RDMA_WITH_IMM;
+			/* fall to set imm data */
+		case QELR_RESP_IMM:
+			wc->imm_data =
+				ntohl(le32toh(resp->imm_data_or_inv_r_Key));
+			wc->wc_flags |= IBV_WC_WITH_IMM;
+			FP_DP_VERBOSE(cxt->dbg_fp, QELR_MSG_CQ,
+				      "POLL CQ RQ2: RESP_RDMA_IMM imm_data = %x resp_len=%d\n",
+				      wc->imm_data, wc->byte_len);
+			break;
+		case QELR_RESP_RDMA:
+			DP_ERR(cxt->dbg_fp, "Invalid flags detected\n");
+			break;
+		default:
+			/* valid configuration, but nothing to do here */
+			break;
+		}
+
+		wc->wr_id = wr_id;
+		break;
+	default:
+		wc->status = IBV_WC_GENERAL_ERR;
+		DP_ERR(cxt->dbg_fp, "Invalid CQE status detected\n");
+	}
+
+	/* fill WC */
+	wc->status = wc_status;
+	wc->qp_num = qp->qp_id;
+}
+
+static int process_resp_one(struct qelr_qp *qp, struct qelr_cq *cq,
+			    struct ibv_wc *wc, struct rdma_cqe_responder *resp)
+{
+	uint64_t wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
+
+	__process_resp_one(qp, cq, wc, resp, wr_id);
+
+	while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
+		qelr_chain_consume(&qp->rq.chain);
+
+	qelr_inc_sw_cons_u16(&qp->rq);
+
+	return 1;
+}
+
+static int process_resp_flush(struct qelr_qp *qp, struct qelr_cq *cq,
+			      int num_entries, struct ibv_wc *wc,
+			      uint16_t hw_cons)
+{
+	uint16_t cnt = 0;
+
+	while (num_entries && qp->rq.wqe_cons != hw_cons) {
+		/* fill WC */
+		wc->status = IBV_WC_WR_FLUSH_ERR;
+		wc->qp_num = qp->qp_id;
+		wc->byte_len = 0;
+		wc->wr_id = qp->rqe_wr_id[qp->rq.cons].wr_id;
+		num_entries--;
+		wc++;
+		cnt++;
+		while (qp->rqe_wr_id[qp->rq.cons].wqe_size--)
+			qelr_chain_consume(&qp->rq.chain);
+		qelr_inc_sw_cons_u16(&qp->rq);
+	}
+
+	return cnt;
+}
+
+/* return latest CQE (needs processing) */
+static union rdma_cqe *get_cqe(struct qelr_cq *cq)
+{
+	return cq->latest_cqe;
+}
+
+static void try_consume_req_cqe(struct qelr_cq *cq, struct qelr_qp *qp,
+				struct rdma_cqe_requester *req, int *update)
+{
+	if (le16toh(req->sq_cons) == qp->sq.wqe_cons) {
+		consume_cqe(cq);
+		*update |= 1;
+	}
+}
+
+/* used with flush only, when resp->rq_cons is valid */
+static void try_consume_resp_cqe(struct qelr_cq *cq, struct qelr_qp *qp,
+				 struct rdma_cqe_responder *resp, int *update)
+{
+	if (le16toh(resp->rq_cons) == qp->rq.wqe_cons) {
+		consume_cqe(cq);
+		*update |= 1;
+	}
+}
+
+static int qelr_poll_cq_resp(struct qelr_qp *qp, struct qelr_cq *cq,
+			     int num_entries, struct ibv_wc *wc,
+			     struct rdma_cqe_responder *resp, int *update)
+{
+	int cnt;
+
+	if (resp->status == RDMA_CQE_RESP_STS_WORK_REQUEST_FLUSHED_ERR) {
+		cnt = process_resp_flush(qp, cq, num_entries, wc,
+					 resp->rq_cons);
+		try_consume_resp_cqe(cq, qp, resp, update);
+	} else {
+		cnt = process_resp_one(qp, cq, wc, resp);
+		consume_cqe(cq);
+		*update |= 1;
+	}
+
+	return cnt;
+}
+
+static void doorbell_cq(struct qelr_cq *cq, uint32_t cons, uint8_t flags)
+{
+	wmb();
+	cq->db.data.agg_flags = flags;
+	cq->db.data.value = htole32(cons);
+
+	writeq(cq->db.raw, cq->db_addr);
+	wc_wmb();
+}
+
+int qelr_poll_cq(struct ibv_cq *ibcq, int num_entries, struct ibv_wc *wc)
+{
+	struct qelr_cq *cq = get_qelr_cq(ibcq);
+	int done = 0;
+	union rdma_cqe *cqe = get_cqe(cq);
+	int update = 0;
+	uint32_t db_cons;
+
+	while (num_entries && is_valid_cqe(cq, cqe)) {
+		int cnt = 0;
+		struct qelr_qp *qp;
+
+		/* prevent speculative reads of any field of CQE */
+		rmb();
+
+		qp = cqe_get_qp(cqe);
+		if (!qp) {
+			DP_ERR(stderr,
+			       "Error: CQE QP pointer is NULL. CQE=%p\n", cqe);
+			break;
+		}
+
+		switch (cqe_get_type(cqe)) {
+		case RDMA_CQE_TYPE_REQUESTER:
+			cnt = qelr_poll_cq_req(qp, cq, num_entries, wc,
+					       &cqe->req);
+			try_consume_req_cqe(cq, qp, &cqe->req, &update);
+			break;
+		case RDMA_CQE_TYPE_RESPONDER_RQ:
+			cnt = qelr_poll_cq_resp(qp, cq, num_entries, wc,
+						&cqe->resp, &update);
+			break;
+		case RDMA_CQE_TYPE_INVALID:
+		default:
+			printf("Error: invalid CQE type = %d\n",
+			       cqe_get_type(cqe));
+		}
+		num_entries -= cnt;
+		wc += cnt;
+		done += cnt;
+
+		cqe = get_cqe(cq);
+	}
+
+	db_cons = qelr_chain_get_cons_idx_u32(&cq->chain) - 1;
+	if (update) {
+		/* doorbell notifies about latest VALID entry,
+		 * but chain already point to the next INVALID one
+		 */
+		doorbell_cq(cq, db_cons, cq->arm_flags);
+		FP_DP_VERBOSE(stderr, QELR_MSG_CQ, "doorbell_cq cons=%x\n",
+			      db_cons);
+	}
+
+	return done;
+}
+
+void qelr_cq_event(struct ibv_cq *ibcq)
+{
+	/* Trigger received, can reset arm flags */
+	struct qelr_cq *cq = get_qelr_cq(ibcq);
+
+	cq->arm_flags = 0;
+}
+
+int qelr_arm_cq(struct ibv_cq *ibcq, int solicited)
+{
+	struct qelr_cq *cq = get_qelr_cq(ibcq);
+	uint32_t db_cons;
+
+	db_cons = qelr_chain_get_cons_idx_u32(&cq->chain) - 1;
+	FP_DP_VERBOSE(get_qelr_ctx(ibcq->context)->dbg_fp, QELR_MSG_CQ,
+		      "Arm CQ cons=%x solicited=%d\n", db_cons, solicited);
+
+	cq->arm_flags = solicited ? DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD :
+				    DQ_UCM_ROCE_CQ_ARM_CF_CMD;
+
+	doorbell_cq(cq, db_cons, cq->arm_flags);
+
+	return 0;
+}
+
+void qelr_async_event(struct ibv_async_event *event)
+{
+	struct qelr_cq *cq = NULL;
+	struct qelr_qp *qp = NULL;
+
+	switch (event->event_type) {
+	case IBV_EVENT_CQ_ERR:
+		cq = get_qelr_cq(event->element.cq);
+		break;
+	case IBV_EVENT_QP_FATAL:
+	case IBV_EVENT_QP_REQ_ERR:
+	case IBV_EVENT_QP_ACCESS_ERR:
+	case IBV_EVENT_PATH_MIG_ERR:{
+			qp = get_qelr_qp(event->element.qp);
+			break;
+		}
+	case IBV_EVENT_SQ_DRAINED:
+	case IBV_EVENT_PATH_MIG:
+	case IBV_EVENT_COMM_EST:
+	case IBV_EVENT_QP_LAST_WQE_REACHED:
+		break;
+	case IBV_EVENT_PORT_ACTIVE:
+	case IBV_EVENT_PORT_ERR:
+		break;
+	default:
+		break;
+	}
+
+	fprintf(stderr, "qelr_async_event not implemented yet cq=%p qp=%p\n",
+		cq, qp);
+}
diff --git a/providers/qedr/qelr_verbs.h b/providers/qedr/qelr_verbs.h
new file mode 100644
index 0000000..f10b76b
--- /dev/null
+++ b/providers/qedr/qelr_verbs.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 2015-2016  QLogic Corporation
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and /or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __QELR_VERBS_H__
+#define __QELR_VERBS_H__
+
+#include <inttypes.h>
+#include <stddef.h>
+#include <endian.h>
+
+#include <infiniband/driver.h>
+#include <infiniband/arch.h>
+
+struct ibv_device *qelr_driver_init(const char *, int);
+
+int qelr_query_device(struct ibv_context *, struct ibv_device_attr *);
+int qelr_query_port(struct ibv_context *, uint8_t, struct ibv_port_attr *);
+
+struct ibv_pd *qelr_alloc_pd(struct ibv_context *);
+int qelr_dealloc_pd(struct ibv_pd *);
+
+struct ibv_mr *qelr_reg_mr(struct ibv_pd *, void *, size_t,
+			   int ibv_access_flags);
+int qelr_dereg_mr(struct ibv_mr *);
+
+struct ibv_cq *qelr_create_cq(struct ibv_context *, int,
+			      struct ibv_comp_channel *, int);
+int qelr_destroy_cq(struct ibv_cq *);
+int qelr_poll_cq(struct ibv_cq *, int, struct ibv_wc *);
+void qelr_cq_event(struct ibv_cq *);
+int qelr_arm_cq(struct ibv_cq *, int);
+
+int qelr_query_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr);
+int qelr_modify_srq(struct ibv_srq *ibv_srq, struct ibv_srq_attr *attr,
+		    int attr_mask);
+struct ibv_srq *qelr_create_srq(struct ibv_pd *, struct ibv_srq_init_attr *);
+int qelr_destroy_srq(struct ibv_srq *ibv_srq);
+int qelr_post_srq_recv(struct ibv_srq *, struct ibv_recv_wr *,
+		       struct ibv_recv_wr **bad_wr);
+
+struct ibv_qp *qelr_create_qp(struct ibv_pd *, struct ibv_qp_init_attr *);
+int qelr_modify_qp(struct ibv_qp *, struct ibv_qp_attr *,
+		   int ibv_qp_attr_mask);
+int qelr_query_qp(struct ibv_qp *qp, struct ibv_qp_attr *attr, int attr_mask,
+		  struct ibv_qp_init_attr *init_attr);
+int qelr_destroy_qp(struct ibv_qp *);
+
+int qelr_post_send(struct ibv_qp *, struct ibv_send_wr *,
+		   struct ibv_send_wr **);
+int qelr_post_recv(struct ibv_qp *, struct ibv_recv_wr *,
+		   struct ibv_recv_wr **);
+
+void qelr_async_event(struct ibv_async_event *event);
+#endif /* __QELR_VERBS_H__ */
-- 
2.7.4

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox