* [PATCH 08/12] dapl-2.0: scm: new cm_ep linking broke UD mode over socket cm
From: Davis, Arlin R @ 2010-05-19 18:11 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
Add EP locking around modify_qp for EP state.
Add new dapli_ep_check for debugging EP
Cleanup extra CR's
Change socket errno to dapl_socket_errno() abstraction
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/openib_scm/cm.c | 177 ++++++++++++++++++++++++++++++++++++--------------
1 files changed, 128 insertions(+), 49 deletions(-)
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 6958b67..b6ffbe9 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -60,6 +60,48 @@
#include "dapl_ep_util.h"
#include "dapl_osd.h"
+#ifdef DAPL_DBG
+/* Check for EP linking to IA and proper connect state */
+void dapli_ep_check(DAPL_EP *ep)
+{
+ DAPL_IA *ia_ptr = ep->header.owner_ia;
+ DAPL_EP *ep_ptr, *next_ep_ptr;
+ int found = 0;
+
+ dapl_os_lock(&ia_ptr->header.lock);
+ ep_ptr = (dapl_llist_is_empty (&ia_ptr->ep_list_head)
+ ? NULL : dapl_llist_peek_head (&ia_ptr->ep_list_head));
+
+ while (ep_ptr != NULL) {
+ next_ep_ptr =
+ dapl_llist_next_entry(&ia_ptr->ep_list_head,
+ &ep_ptr->header.ia_list_entry);
+ if (ep == ep_ptr) {
+ found++;
+ if ((ep->cr_ptr && ep->param.ep_state
+ != DAT_EP_STATE_COMPLETION_PENDING) ||
+ (!ep->cr_ptr && ep->param.ep_state
+ != DAT_EP_STATE_ACTIVE_CONNECTION_PENDING))
+ goto err;
+ else
+ goto match;
+ }
+ ep_ptr = next_ep_ptr;
+ }
+err:
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " dapli_ep_check ERR: %s %s ep=%p state=%d magic=0x%x\n",
+ ep->cr_ptr ? "PASSIVE":"ACTIVE",
+ found ? "WRONG_STATE":"NOT_FOUND" ,
+ ep, ep->param.ep_state, ep->header.magic);
+match:
+ dapl_os_unlock(&ia_ptr->header.lock);
+ return;
+}
+#else
+#define dapli_ep_check(ep)
+#endif
+
#if defined(_WIN32) || defined(_WIN64)
enum DAPL_FD_EVENTS {
DAPL_FD_READ = 0x1,
@@ -311,13 +353,13 @@ void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr)
void dapls_cm_release(dp_ib_cm_handle_t cm_ptr)
{
dapl_os_lock(&cm_ptr->lock);
- cm_ptr->ref_count--;
- if (cm_ptr->ref_count) {
- dapl_os_unlock(&cm_ptr->lock);
- return;
- }
- dapl_os_unlock(&cm_ptr->lock);
- dapli_cm_dealloc(cm_ptr);
+ cm_ptr->ref_count--;
+ if (cm_ptr->ref_count) {
+ dapl_os_unlock(&cm_ptr->lock);
+ return;
+ }
+ dapl_os_unlock(&cm_ptr->lock);
+ dapli_cm_dealloc(cm_ptr);
}
static dp_ib_cm_handle_t dapli_cm_alloc(DAPL_EP *ep_ptr)
@@ -416,7 +458,9 @@ DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
dapl_os_unlock(&cm_ptr->lock);
/* send disc date, close socket, schedule destroy */
+ dapl_os_lock(&cm_ptr->ep->header.lock);
dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0,0,0);
+ dapl_os_unlock(&cm_ptr->ep->header.lock);
send(cm_ptr->socket, (char *)&disc_data, sizeof(disc_data), 0);
/* disconnect events for RC's only */
@@ -452,7 +496,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
dapl_log(DAPL_DBG_TYPE_ERR,
" CONN_PENDING: %s ERR %s -> %s %d\n",
err == -1 ? "POLL" : "SOCKOPT",
- err == -1 ? strerror(errno) : strerror(err),
+ err == -1 ? strerror(dapl_socket_errno()) : strerror(err),
inet_ntoa(((struct sockaddr_in *)
&cm_ptr->addr)->sin_addr),
ntohs(((struct sockaddr_in *)
@@ -475,9 +519,10 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
}
if (len != (exp + ntohs(cm_ptr->msg.p_size))) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_PENDING len ERR %s, wcnt=%d(%d) -> %s\n",
- strerror(errno), len,
+ " CONN_PENDING len ERR 0x%x %s, wcnt=%d(%d) -> %s\n",
+ err, strerror(err), len,
exp + ntohs(cm_ptr->msg.p_size),
inet_ntoa(((struct sockaddr_in *)
ep_ptr->param.
@@ -530,16 +575,19 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
/* create, connect, sockopt, and exchange QP information */
if ((cm_ptr->socket =
socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " connect: socket create ERR %s\n", strerror(errno));
+ " connect: socket create ERR 0x%x %s\n",
+ err, strerror(err));
goto bail;
}
ret = dapl_config_socket(cm_ptr->socket);
if (ret < 0) {
dapl_log(DAPL_DBG_TYPE_ERR,
- " connect: config socket %d ERR %d %s\n",
- cm_ptr->socket, ret, strerror(dapl_socket_errno()));
+ " connect: config socket %d RET %d ERR 0x%x %s\n",
+ cm_ptr->socket, ret,
+ dapl_socket_errno(), strerror(dapl_socket_errno()));
dat_ret = DAT_INTERNAL_ERROR;
goto bail;
}
@@ -556,6 +604,10 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
ret = dapl_connect_socket(cm_ptr->socket, (struct sockaddr *)&cm_ptr->addr,
sizeof(cm_ptr->addr));
if (ret && ret != EAGAIN) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " connect: dapl_connect_socket RET %d ERR 0x%x %s\n",
+ ret, dapl_socket_errno(),
+ strerror(dapl_socket_errno()));
dat_ret = DAT_INVALID_ADDRESS;
goto bail;
}
@@ -572,9 +624,10 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
/* get local address information from socket */
sl = sizeof(cm_ptr->msg.daddr.so);
if (getsockname(cm_ptr->socket, (struct sockaddr *)&cm_ptr->msg.daddr.so, &sl)) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " connect getsockname ERROR: %s -> %s r_qual %d\n",
- strerror(errno),
+ " connect getsockname ERROR: 0x%x %s -> %s r_qual %d\n",
+ err, strerror(err),
inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr),
(unsigned int)r_qual);;
}
@@ -604,8 +657,7 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
return DAT_SUCCESS;
bail:
dapl_log(DAPL_DBG_TYPE_ERR,
- " connect ERROR: %s -> %s r_qual %d\n",
- strerror(errno),
+ " connect ERROR: -> %s r_qual %d\n",
inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr),
(unsigned int)r_qual);
@@ -629,9 +681,10 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
len = recv(cm_ptr->socket, (char *)&cm_ptr->msg, exp, 0);
if (len != exp || ntohs(cm_ptr->msg.ver) != DCM_VER) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_WARN,
- " CONN_RTU read: sk %d ERR %s, rcnt=%d, v=%d -> %s PORT L-%x R-%x PID L-%x R-%x\n",
- cm_ptr->socket, strerror(errno), len, ntohs(cm_ptr->msg.ver),
+ " CONN_RTU read: sk %d ERR 0x%x, rcnt=%d, v=%d -> %s PORT L-%x R-%x PID L-%x R-%x\n",
+ cm_ptr->socket, err, len, ntohs(cm_ptr->msg.ver),
inet_ntoa(((struct sockaddr_in *)&cm_ptr->addr)->sin_addr),
ntohs(((struct sockaddr_in *)&cm_ptr->msg.daddr.so)->sin_port),
ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port),
@@ -639,7 +692,7 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
ntohs(*(uint16_t*)&cm_ptr->msg.resv[2]));
/* Retry; corner case where server tcp stack resets under load */
- if (dapl_socket_errno() == ECONNRESET) {
+ if (err == ECONNRESET) {
closesocket(cm_ptr->socket);
cm_ptr->socket = DAPL_INVALID_SOCKET;
dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr,
@@ -692,9 +745,10 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
if (exp) {
len = recv(cm_ptr->socket, cm_ptr->msg.p_data, exp, 0);
if (len != exp) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_RTU read pdata: ERR %s, rcnt=%d -> %s\n",
- strerror(errno), len,
+ " CONN_RTU read pdata: ERR 0x%x %s, rcnt=%d -> %s\n",
+ err, strerror(err), len,
inet_ntoa(((struct sockaddr_in *)
ep_ptr->param.
remote_ia_address_ptr)->sin_addr));
@@ -721,6 +775,7 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
}
/* modify QP to RTR and then to RTS with remote info */
+ dapl_os_lock(&ep_ptr->header.lock);
if (dapls_modify_qp_state(ep_ptr->qp_handle,
IBV_QPS_RTR,
cm_ptr->msg.saddr.ib.qpn,
@@ -736,6 +791,7 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
&cm_ptr->msg.daddr.so)->sin_addr),
ntohs(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_port));
+ dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
if (dapls_modify_qp_state(ep_ptr->qp_handle,
@@ -753,16 +809,20 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
&cm_ptr->msg.daddr.so)->sin_addr),
ntohs(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_port));
+ dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
+ dapl_os_unlock(&ep_ptr->header.lock);
dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n");
/* complete handshake after final QP state change, Just ver+op */
cm_ptr->state = DCM_CONNECTED;
cm_ptr->msg.op = ntohs(DCM_RTU);
if (send(cm_ptr->socket, (char *)&cm_ptr->msg, 4, 0) == -1) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " CONN_RTU: write error = %s\n", strerror(errno));
+ " CONN_RTU: write ERR = 0x%x %s\n",
+ err, strerror(err));
goto bail;
}
/* post the event with private data */
@@ -821,6 +881,7 @@ ud_bail:
} else
#endif
{
+ dapli_ep_check(cm_ptr->ep);
dapl_evd_connection_callback(cm_ptr, event, cm_ptr->msg.p_data,
DCM_MAX_PDATA_SIZE, ep_ptr);
}
@@ -848,7 +909,7 @@ dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
struct sockaddr_in addr;
ib_cm_srvc_handle_t cm_ptr = NULL;
DAT_RETURN dat_status = DAT_SUCCESS;
- int opt = 1;
+ int opt = 1;
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" setup listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
@@ -864,23 +925,26 @@ dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
/* bind, listen, set sockopt, accept, exchange data */
if ((cm_ptr->socket =
socket(AF_INET, SOCK_STREAM, IPPROTO_TCP)) == DAPL_INVALID_SOCKET) {
- dapl_log(DAPL_DBG_TYPE_ERR, " ERR: listen socket create: %s\n",
- strerror(errno));
+ int err = dapl_socket_errno();
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " listen: socket create: ERR 0x%x %s\n",
+ err, strerror(err));
dat_status = DAT_INSUFFICIENT_RESOURCES;
goto bail;
}
- setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR, (char*)&opt, sizeof(opt));
+ setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR, (char*)&opt, sizeof(opt));
addr.sin_port = htons(serviceID + 1000);
addr.sin_family = AF_INET;
addr.sin_addr = ((struct sockaddr_in *) &ia_ptr->hca_ptr->hca_address)->sin_addr;
if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0)
|| (listen(cm_ptr->socket, 128) < 0)) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_CM,
- " listen: ERROR %s on port %d\n",
- strerror(errno), serviceID + 1000);
- if (dapl_socket_errno() == EADDRINUSE)
+ " listen: ERROR 0x%x %s on port %d\n",
+ err, strerror(err), serviceID + 1000);
+ if (err == EADDRINUSE)
dat_status = DAT_CONN_QUAL_IN_USE;
else
dat_status = DAT_CONN_QUAL_UNAVAILABLE;
@@ -933,9 +997,10 @@ static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
&acm_ptr->msg.daddr.so,
(socklen_t *) &len);
if (acm_ptr->socket == DAPL_INVALID_SOCKET) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT: ERR %s on FD %d l_cr %p\n",
- strerror(errno), cm_ptr->socket, cm_ptr);
+ " ACCEPT: ERR 0x%x %s on FD %d l_cr %p\n",
+ err, strerror(err), cm_ptr->socket, cm_ptr);
dapls_cm_release(acm_ptr);
return;
}
@@ -948,11 +1013,14 @@ static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
/* no delay for small packets */
ret = setsockopt(acm_ptr->socket, IPPROTO_TCP, TCP_NODELAY,
(char *)&opt, sizeof(opt));
- if (ret)
+ if (ret) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT: NODELAY setsockopt: 0x%x 0x%x %s\n",
- ret, dapl_socket_errno(), strerror(dapl_socket_errno()));
-
+ " ACCEPT: NODELAY setsockopt:"
+ " RET %d ERR 0x%x %s\n",
+ ret, err, strerror(err));
+ }
+
/* get local address information from socket */
sl = sizeof(acm_ptr->addr);
getsockname(acm_ptr->socket, (struct sockaddr *)&acm_ptr->addr, &sl);
@@ -975,9 +1043,10 @@ static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr)
/* read in DST QP info, IA address. check for private data */
len = recv(acm_ptr->socket, (char *)&acm_ptr->msg, exp, 0);
if (len != exp || ntohs(acm_ptr->msg.ver) != DCM_VER) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT read: ERR %s, rcnt=%d, ver=%d\n",
- strerror(errno), len, ntohs(acm_ptr->msg.ver));
+ " ACCEPT read: ERR 0x%x %s, rcnt=%d, ver=%d\n",
+ err, strerror(err), len, ntohs(acm_ptr->msg.ver));
goto bail;
}
@@ -996,9 +1065,10 @@ static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr)
if (exp) {
len = recv(acm_ptr->socket, acm_ptr->msg.p_data, exp, 0);
if (len != exp) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " accept read pdata: ERR %s, rcnt=%d\n",
- strerror(errno), len);
+ " accept read pdata: ERR 0x%x %s, rcnt=%d\n",
+ err, strerror(err), len);
goto bail;
}
p_data = acm_ptr->msg.p_data;
@@ -1092,6 +1162,7 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
#endif
/* modify QP to RTR and then to RTS with remote info already read */
+ dapl_os_lock(&ep_ptr->header.lock);
if (dapls_modify_qp_state(ep_ptr->qp_handle,
IBV_QPS_RTR,
cm_ptr->msg.saddr.ib.qpn,
@@ -1102,6 +1173,7 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
strerror(errno),
inet_ntoa(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_addr));
+ dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
if (dapls_modify_qp_state(ep_ptr->qp_handle,
@@ -1114,8 +1186,10 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
strerror(errno),
inet_ntoa(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_addr));
+ dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
+ dapl_os_unlock(&ep_ptr->header.lock);
/* save remote address information */
dapl_os_memcpy(&ep_ptr->remote_ia_address,
@@ -1143,6 +1217,10 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
cm_ptr->hca = ia_ptr->hca_ptr;
cm_ptr->state = DCM_ACCEPTED;
+ /* Link CM to EP, already queued on work thread */
+ dapl_ep_link_cm(ep_ptr, cm_ptr);
+ cm_ptr->ep = ep_ptr;
+
local.p_size = htons(p_size);
iov[0].iov_base = (void *)&local;
iov[0].iov_len = exp;
@@ -1155,11 +1233,14 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
len = writev(cm_ptr->socket, iov, 1);
if (len != (p_size + exp)) {
+ int err = dapl_socket_errno();
dapl_log(DAPL_DBG_TYPE_ERR,
- " ACCEPT_USR: ERR %s, wcnt=%d -> %s\n",
- strerror(errno), len,
+ " ACCEPT_USR: ERR 0x%x %s, wcnt=%d -> %s\n",
+ err, strerror(err), len,
inet_ntoa(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_addr));
+ dapl_ep_unlink_cm(ep_ptr, cm_ptr);
+ cm_ptr->ep = NULL;
goto bail;
}
@@ -1176,9 +1257,6 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n");
- /* Link CM to EP, already queued on work thread */
- dapl_ep_link_cm(ep_ptr, cm_ptr);
- cm_ptr->ep = ep_ptr;
return DAT_SUCCESS;
bail:
/* schedule cleanup from workq */
@@ -1260,6 +1338,7 @@ ud_bail:
} else
#endif
{
+ dapli_ep_check(cm_ptr->ep);
dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp);
}
return;
@@ -1336,9 +1415,6 @@ dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
return DAT_SUCCESS;
}
- /* RC. Transition to error state to flush queue */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0, 0, 0);
-
return (dapli_socket_disconnect(cm_ptr));
}
@@ -1367,7 +1443,10 @@ dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
IN const ib_cm_events_t ib_cm_event)
{
if (ib_cm_event == IB_CME_TIMEOUT) {
- dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+ dp_ib_cm_handle_t cm_ptr;
+
+ if ((cm_ptr = dapl_get_cm_from_ep(ep_ptr)) == NULL)
+ return;
dapl_log(DAPL_DBG_TYPE_WARN,
"dapls_ib_disc_clean: CONN_TIMEOUT ep %p cm %p %s\n",
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH 07/12] dapl-2.0: common: dat_ep_connect should not set timer UD endpoints
From: Davis, Arlin R @ 2010-05-19 18:11 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
connect for UD type is simply AH resolution and doesn't
need timed. The common code is not designed to handle
multiple timed events on connect requests so just ignore
timing UD AH requests.
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/common/dapl_ep_connect.c | 3 ++-
1 files changed, 2 insertions(+), 1 deletions(-)
diff --git a/dapl/common/dapl_ep_connect.c b/dapl/common/dapl_ep_connect.c
index 1f193ae..9b5829e 100755
--- a/dapl/common/dapl_ep_connect.c
+++ b/dapl/common/dapl_ep_connect.c
@@ -327,7 +327,8 @@ dapl_ep_connect(IN DAT_EP_HANDLE ep_handle,
dapl_os_lock(&ep_ptr->header.lock);
if (ep_ptr->param.ep_state ==
DAT_EP_STATE_ACTIVE_CONNECTION_PENDING
- && timeout != DAT_TIMEOUT_INFINITE) {
+ && timeout != DAT_TIMEOUT_INFINITE &&
+ ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
ep_ptr->cxn_timer =
(DAPL_OS_TIMER *)
dapl_os_alloc(sizeof(DAPL_OS_TIMER));
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH 06/12] dapl-2.0: ucm: fix error path during accept_usr reply failure
From: Davis, Arlin R @ 2010-05-19 18:11 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
if accept_usr fails when sending reply the EP was
being linked to CM instead of properly unlinked.
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/openib_ucm/cm.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index 2cab529..85c8b4b 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -1454,7 +1454,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
dapl_os_unlock(&cm->lock);
if (ucm_reply(cm)) {
- dapl_ep_link_cm(ep, cm);
+ dapl_ep_unlink_cm(ep, cm);
goto bail;
}
dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: accepted!\n");
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH 05/12] dapl-2.0: ibal: changes for EP to CM linking and synchronization
From: Davis, Arlin R @ 2010-05-19 18:11 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
Windows IBAL changes to allocate and manage CM objects
and to link them to the EP. This will insure the CM
IBAL objects and cm_id's are not destroy before EP.
Remove windows only ibal_cm_handle in EP structure.
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/common/dapl_cr_util.c | 2 +-
dapl/common/dapl_ep_util.c | 7 -
dapl/ibal/dapl_ibal_cm.c | 274 ++++++++++++++++++++++++++++++--------------
dapl/ibal/dapl_ibal_qp.c | 14 ++-
dapl/ibal/dapl_ibal_util.h | 23 ++++-
dapl/ibal/udapl.rc | 4 +-
dapl/include/dapl.h | 1 -
7 files changed, 224 insertions(+), 101 deletions(-)
diff --git a/dapl/common/dapl_cr_util.c b/dapl/common/dapl_cr_util.c
index 39b61ad..5970fa0 100644
--- a/dapl/common/dapl_cr_util.c
+++ b/dapl/common/dapl_cr_util.c
@@ -81,7 +81,7 @@ DAPL_CR *dapls_cr_alloc(DAPL_IA * ia_ptr)
/*
* dapls_cr_free
*
- * Free the passed in EP structure.
+ * Free the passed in CR structure.
*
* Input:
* entry point pointer
diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c
index daad78d..9aff242 100644
--- a/dapl/common/dapl_ep_util.c
+++ b/dapl/common/dapl_ep_util.c
@@ -214,13 +214,6 @@ void dapl_ep_dealloc(IN DAPL_EP * ep_ptr)
if (NULL != ep_ptr->cxn_timer) {
dapl_os_free(ep_ptr->cxn_timer, sizeof(DAPL_OS_TIMER));
}
-#if defined(_WIN32) || defined(_WIN64)
- if (ep_ptr->ibal_cm_handle) {
- dapl_os_free(ep_ptr->ibal_cm_handle,
- sizeof(*ep_ptr->ibal_cm_handle));
- ep_ptr->ibal_cm_handle = NULL;
- }
-#endif
#ifdef DAPL_COUNTERS
dapl_os_free(ep_ptr->cntrs, sizeof(DAT_UINT64) * DCNT_EP_ALL_COUNTERS);
diff --git a/dapl/ibal/dapl_ibal_cm.c b/dapl/ibal/dapl_ibal_cm.c
index c51faf8..e3c12ff 100644
--- a/dapl/ibal/dapl_ibal_cm.c
+++ b/dapl/ibal/dapl_ibal_cm.c
@@ -94,7 +94,7 @@ void dapli_print_private_data( char *prefix, const uint8_t *pd, int len )
if ( !pd || len <= 0 )
return;
- dapl_log ( DAPL_DBG_TYPE_CM, "--> %s: private_data:\n ",prefix);
+ dapl_log ( DAPL_DBG_TYPE_CM, "--> %s: private_data(len %d)\n ",prefix,len);
if (len > IB_MAX_REP_PDATA_SIZE)
{
@@ -107,13 +107,70 @@ void dapli_print_private_data( char *prefix, const uint8_t *pd, int len )
for ( i = 0 ; i < len; i++ )
{
dapl_log ( DAPL_DBG_TYPE_CM, "%2x ", pd[i]);
- if ( ((i+1) % 20) == 0 )
+ if ( ((i+1) % 5) == 0 )
dapl_log ( DAPL_DBG_TYPE_CM, "\n ");
}
dapl_log ( DAPL_DBG_TYPE_CM, "\n");
}
#endif
+/* EP-CM linking support */
+dp_ib_cm_handle_t ibal_cm_alloc(void)
+{
+ dp_ib_cm_handle_t cm_ptr;
+
+ /* Allocate CM, init lock, and initialize */
+ if ((cm_ptr = dapl_os_alloc(sizeof(*cm_ptr))) == NULL)
+ return NULL;
+
+ (void)dapl_os_memzero(cm_ptr, sizeof(*cm_ptr));
+ cm_ptr->ref_count = 1;
+
+ if (dapl_os_lock_init(&cm_ptr->lock)) {
+ dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+ return NULL;
+ }
+
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm_ptr->list_entry);
+
+ return cm_ptr;
+}
+
+/* free CM object resources */
+static void ibal_cm_dealloc(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_assert(!cm_ptr->ref_count);
+ dapl_os_lock_destroy(&cm_ptr->lock);
+ dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+}
+
+void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_lock(&cm_ptr->lock);
+ cm_ptr->ref_count++;
+ dapl_os_unlock(&cm_ptr->lock);
+}
+
+void dapls_cm_release(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_lock(&cm_ptr->lock);
+ cm_ptr->ref_count--;
+ if (cm_ptr->ref_count) {
+ dapl_os_unlock(&cm_ptr->lock);
+ return;
+ }
+ dapl_os_unlock(&cm_ptr->lock);
+ ibal_cm_dealloc(cm_ptr);
+}
+
+/* blocking: called from user thread dapl_ep_free() only */
+void dapls_cm_free(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
+
+ /* final reference, alloc */
+ dapls_cm_release(cm_ptr);
+}
static void
dapli_ib_cm_apr_cb (
@@ -147,6 +204,7 @@ dapli_ib_cm_dreq_cb (
ib_cm_drep_t cm_drep;
DAPL_EP *ep_ptr;
int bail=10;
+ dp_ib_cm_handle_t cm_ptr;
dapl_os_assert (p_cm_dreq_rec);
@@ -168,6 +226,14 @@ dapli_ib_cm_dreq_cb (
DAPL_MAGIC_EP );
return;
}
+ cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+ if (!cm_ptr)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
+ return;
+ }
+ dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_dreq_rec->h_cm_dreq.h_qp);
dapl_dbg_log (DAPL_DBG_TYPE_CM,
"--> %s() EP %p, %s sent_discreq %s\n",
@@ -210,10 +276,8 @@ dapli_ib_cm_dreq_cb (
if (ep_ptr->cr_ptr)
{
- dapl_os_assert(ep_ptr->ibal_cm_handle->cid
- == p_cm_dreq_rec->h_cm_dreq.cid);
/* passive side */
- dapls_cr_callback ( ep_ptr->cm_handle,
+ dapls_cr_callback ( cm_ptr,
IB_CME_DISCONNECTED,
(void * __ptr64) p_cm_dreq_rec->p_dreq_pdata,
IB_DREQ_PDATA_SIZE,
@@ -223,7 +287,7 @@ dapli_ib_cm_dreq_cb (
{
/* active side */
dapl_evd_connection_callback (
- (dp_ib_cm_handle_t) &p_cm_dreq_rec->h_cm_dreq,
+ cm_ptr,
IB_CME_DISCONNECTED,
(void * __ptr64)
p_cm_dreq_rec->p_dreq_pdata,
@@ -242,6 +306,7 @@ dapli_ib_cm_drep_cb (
IN ib_cm_drep_rec_t *p_cm_drep_rec )
{
DAPL_EP *ep_ptr;
+ dp_ib_cm_handle_t cm_ptr;
dapl_os_assert (p_cm_drep_rec != NULL);
@@ -260,11 +325,19 @@ dapli_ib_cm_drep_cb (
"--> %s: BAD EP Handle EP=%lx\n", __FUNCTION__,ep_ptr);
return;
}
+ cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+ if (!cm_ptr)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
+ return;
+ }
+ dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_drep_rec->h_qp);
dapl_dbg_log (DAPL_DBG_TYPE_CM,
"--> DiCDpcb: EP %p state %s cm_hdl %p\n",ep_ptr,
dapl_get_ep_state_str(ep_ptr->param.ep_state),
- ep_ptr->cm_handle);
+ cm_ptr);
if ( ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED )
{
@@ -274,17 +347,10 @@ dapli_ib_cm_drep_cb (
return;
}
- if (ep_ptr->cm_handle == DAPL_IB_INVALID_HANDLE )
- {
- dapl_dbg_log (DAPL_DBG_TYPE_CM,
- "--> %s: Invalid EP->CM handle?\n", __FUNCTION__);
- return;
- }
-
if (ep_ptr->cr_ptr)
{
/* passive connection side */
- dapls_cr_callback ( ep_ptr->cm_handle,
+ dapls_cr_callback ( cm_ptr,
IB_CME_DISCONNECTED,
(void * __ptr64) p_cm_drep_rec->p_drep_pdata,
IB_DREP_PDATA_SIZE,
@@ -294,7 +360,7 @@ dapli_ib_cm_drep_cb (
{
/* active connection side */
dapl_evd_connection_callback (
- ep_ptr->cm_handle,
+ cm_ptr,
IB_CME_DISCONNECTED,
(void * __ptr64) p_cm_drep_rec->p_drep_pdata,
IB_DREP_PDATA_SIZE,
@@ -316,6 +382,7 @@ dapli_ib_cm_rep_cb (
DAPL_PRIVATE *prd_ptr;
DAPL_EP *ep_ptr;
dapl_ibal_ca_t *p_ca;
+ dp_ib_cm_handle_t cm_ptr;
dapl_os_assert (p_cm_rep_rec != NULL);
@@ -327,8 +394,17 @@ dapli_ib_cm_rep_cb (
__FUNCTION__, ep_ptr);
return;
}
+ cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+ if (!cm_ptr)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
+ return;
+ }
+ dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_rep_rec->h_cm_rep.h_qp);
+
dapl_dbg_log (DAPL_DBG_TYPE_CM,
- "--> DiCRpcb: EP = %lx local_max_rdma_read_in %d\n",
+ "--> DiCRpcb: EP %lx local_max_rdma_read_in %d\n",
ep_ptr, p_cm_rep_rec->resp_res);
p_ca = (dapl_ibal_ca_t *)
@@ -372,7 +448,7 @@ dapli_ib_cm_rep_cb (
#endif
dapl_evd_connection_callback (
- (dp_ib_cm_handle_t)&p_cm_rep_rec->h_cm_rep,
+ cm_ptr,
cm_cb_op,
(void *) prd_ptr,
IB_REP_PDATA_SIZE,
@@ -386,6 +462,7 @@ dapli_ib_cm_rej_cb (
{
DAPL_EP *ep_ptr;
ib_cm_events_t cm_event;
+ dp_ib_cm_handle_t cm_ptr;
dapl_os_assert (p_cm_rej_rec);
@@ -397,6 +474,14 @@ dapli_ib_cm_rej_cb (
__FUNCTION__, ep_ptr);
return;
}
+ cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+ if (!cm_ptr)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
+ return;
+ }
+ dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_rej_rec->h_qp);
dapl_dbg_log (DAPL_DBG_TYPE_CM,
"--> DiCRjcb: EP = %lx QP = %lx rej reason = 0x%x\n",
@@ -456,7 +541,7 @@ dapli_ib_cm_rej_cb (
if (ep_ptr->cr_ptr)
{
- dapls_cr_callback ( ep_ptr->cm_handle,
+ dapls_cr_callback ( cm_ptr,
cm_event,
(void * __ptr64) p_cm_rej_rec->p_rej_pdata,
IB_REJ_PDATA_SIZE,
@@ -465,7 +550,7 @@ dapli_ib_cm_rej_cb (
else
{
dapl_evd_connection_callback (
- ep_ptr->cm_handle,
+ cm_ptr,
cm_event,
(void * __ptr64) p_cm_rej_rec->p_rej_pdata,
IB_REJ_PDATA_SIZE,
@@ -481,12 +566,7 @@ dapli_ib_cm_req_cb ( IN ib_cm_req_rec_t *p_cm_req_rec )
{
DAPL_SP *sp_ptr;
DAT_SOCK_ADDR6 dest_ia_addr;
- dp_ib_cm_handle_t cm_handle;
-
- struct ibal_cr_data {
- ib_cm_handle_t cm_hdl;
- DAT_SOCK_ADDR6 dst_ip_addr;
- } *crd;
+ dp_ib_cm_handle_t cm_ptr;
dapl_os_assert (p_cm_req_rec);
@@ -517,8 +597,8 @@ dapli_ib_cm_req_cb ( IN ib_cm_req_rec_t *p_cm_req_rec )
* EP struct deallocation is where this memory is released or prior in the
* error case.
*/
- crd = dapl_os_alloc ( sizeof(struct ibal_cr_data) );
- if ( !crd )
+ cm_ptr = ibal_cm_alloc();
+ if (!cm_ptr)
{
dapl_dbg_log ( DAPL_DBG_TYPE_ERR,
"%s: FAILED to alloc IB CM handle storage?\n",
@@ -526,9 +606,6 @@ dapli_ib_cm_req_cb ( IN ib_cm_req_rec_t *p_cm_req_rec )
return;
}
- cm_handle = &crd->cm_hdl;
- dapl_os_memzero ( (void*)crd, sizeof(*crd) );
-
/*
* Save the cm_srvc_handle to avoid the race condition between
* the return of the ib_cm_listen and the notification of a conn req
@@ -577,6 +654,16 @@ dapli_ib_cm_req_cb ( IN ib_cm_req_rec_t *p_cm_req_rec )
#endif /* NO_NAME_SERVICE */
+ /* preserve CR cm handle data */
+ dapl_os_memcpy( (void*)&cm_ptr->ib_cm,
+ (void*)&p_cm_req_rec->h_cm_req,
+ sizeof(ib_cm_handle_t));
+
+ /* preserve remote IP address */
+ dapl_os_memcpy( (void*)&cm_ptr->dst_ip_addr,
+ (void*)&dest_ia_addr,
+ sizeof(dest_ia_addr));
+
#if defined(DAPL_DBG)
{
char ipa[20];
@@ -584,23 +671,13 @@ dapli_ib_cm_req_cb ( IN ib_cm_req_rec_t *p_cm_req_rec )
//rval = ((struct sockaddr_in *) (&dest_ia_addr))->sin_addr.s_addr;
dapl_dbg_log (DAPL_DBG_TYPE_CM|DAPL_DBG_TYPE_CALLBACK,
- "%s: query SA for RemoteAddr: %s\n",
- __FUNCTION__,
- dapli_get_ip_addr_str( (DAT_SOCK_ADDR6*)
- &dest_ia_addr, ipa) );
+ "%s: query SA (CM %lx)->dst_ip_addr: %s\n",
+ __FUNCTION__,cm_ptr,
+ dapli_get_ip_addr_str(
+ (DAT_SOCK_ADDR6*) &cm_ptr->dst_ip_addr, ipa) );
}
#endif
- /* preserve CR cm handle data */
- dapl_os_memcpy( (void*)cm_handle,
- (void*)&p_cm_req_rec->h_cm_req,
- sizeof(ib_cm_handle_t) );
-
- /* preserve remote IP address */
- dapl_os_memcpy( (void*)&crd->dst_ip_addr,
- (void*)&dest_ia_addr,
- sizeof(dest_ia_addr) );
-
/* FIXME - Vu
* We have NOT used/saved the primary and alternative path record
* ie. p_cm_req_rec->p_primary_path and p_cm_req_rec->p_alt_path
@@ -614,10 +691,10 @@ dapli_ib_cm_req_cb ( IN ib_cm_req_rec_t *p_cm_req_rec )
__FUNCTION__, sp_ptr, p_cm_req_rec->resp_res,
p_cm_req_rec->p_req_pdata);
- dapls_cr_callback ( cm_handle,
+ dapls_cr_callback ( cm_ptr,
IB_CME_CONNECTION_REQUEST_PENDING,
(void * __ptr64) p_cm_req_rec->p_req_pdata,
- IB_REQ_PDATA_SIZE,
+ IB_REQ_PDATA_SIZE,
(void * __ptr64) sp_ptr );
}
@@ -627,7 +704,7 @@ dapli_ib_cm_mra_cb (
IN ib_cm_mra_rec_t *p_cm_mra_rec )
{
UNUSED_PARAM( p_cm_mra_rec );
- dapl_dbg_log (DAPL_DBG_TYPE_CM | DAPL_DBG_TYPE_CALLBACK,
+ dapl_dbg_log (DAPL_DBG_TYPE_CM | DAPL_DBG_TYPE_CALLBACK,
"--> DiCMcb: CM callback MRA\n");
}
@@ -635,7 +712,8 @@ static void
dapli_ib_cm_rtu_cb (
IN ib_cm_rtu_rec_t *p_cm_rtu_rec )
{
- DAPL_EP *ep_ptr;
+ DAPL_EP *ep_ptr;
+ dp_ib_cm_handle_t cm_ptr;
dapl_os_assert (p_cm_rtu_rec != NULL);
@@ -647,9 +725,18 @@ dapli_ib_cm_rtu_cb (
__FUNCTION__, ep_ptr);
return;
}
+ cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+ if (!cm_ptr)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
+ return;
+ }
+ dapl_os_assert(cm_ptr->ib_cm.h_qp == p_cm_rtu_rec->h_qp);
dapl_dbg_log (DAPL_DBG_TYPE_CM | DAPL_DBG_TYPE_CALLBACK,
- "--> DiCRucb: EP %lx QP %lx\n", ep_ptr, ep_ptr->qp_handle);
+ "--> DiCRucb: EP %lx QP %lx CR %lx\n",
+ ep_ptr, ep_ptr->qp_handle, ep_ptr->cr_ptr);
if (ep_ptr->cr_ptr)
{
@@ -657,7 +744,8 @@ dapli_ib_cm_rtu_cb (
sp_ptr = ((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr;
- dapls_cr_callback ( ep_ptr->cm_handle,
+ /* passive connection side */
+ dapls_cr_callback ( cm_ptr,
IB_CME_CONNECTED,
(void * __ptr64) p_cm_rtu_rec->p_rtu_pdata,
IB_RTU_PDATA_SIZE,
@@ -667,7 +755,7 @@ dapli_ib_cm_rtu_cb (
else
{
dapl_evd_connection_callback (
- ep_ptr->cm_handle,
+ cm_ptr,
IB_CME_CONNECTED,
(void * __ptr64) p_cm_rtu_rec->p_rtu_pdata,
IB_RTU_PDATA_SIZE,
@@ -698,18 +786,21 @@ dapls_ib_cm_remote_addr (
{
DAPL_HEADER *header;
- void *vp;
+ dp_ib_cm_handle_t cm;
char ipa[20];
+ char *rtype;
header = (DAPL_HEADER *)dat_handle;
if (header->magic == DAPL_MAGIC_EP)
{
- vp = &((DAPL_EP *) dat_handle)->remote_ia_address;
+ cm = dapl_get_cm_from_ep((DAPL_EP *)dat_handle);
+ rtype = "EP";
}
else if (header->magic == DAPL_MAGIC_CR)
{
- vp = &((DAPL_CR *) dat_handle)->remote_ia_address;
+ cm = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+ rtype = "CR";
}
else
{
@@ -719,10 +810,10 @@ dapls_ib_cm_remote_addr (
return DAT_INVALID_HANDLE;
}
- dapl_os_memcpy( remote_address, vp, sizeof(DAT_SOCK_ADDR6) );
+ dapl_os_memcpy( remote_address, &cm->dst_ip_addr, sizeof(DAT_SOCK_ADDR6) );
- dapl_dbg_log ( DAPL_DBG_TYPE_CM, "%s: returns %s\n",
- __FUNCTION__,
+ dapl_dbg_log ( DAPL_DBG_TYPE_CM, "%s: returns %s remote Addrs %s\n",
+ __FUNCTION__, rtype,
dapli_get_ip_addr_str((DAT_SOCK_ADDR6*)remote_address,ipa) );
return DAT_SUCCESS;
@@ -1011,7 +1102,8 @@ dapls_ib_disconnect ( IN DAPL_EP *ep_ptr,
IN DAT_CLOSE_FLAGS disconnect_flags )
{
ib_api_status_t ib_status = IB_SUCCESS;
- ib_cm_dreq_t cm_dreq;
+ ib_cm_dreq_t cm_dreq;
+ dp_ib_cm_handle_t cm_ptr;
dapl_os_assert(ep_ptr);
@@ -1021,14 +1113,14 @@ dapls_ib_disconnect ( IN DAPL_EP *ep_ptr,
"--> %s: BAD EP Magic EP=%lx\n", __FUNCTION__,ep_ptr);
return DAT_SUCCESS;
}
-
- if (ep_ptr->cm_handle == DAPL_IB_INVALID_HANDLE )
+ cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+ if (!cm_ptr)
{
- dapl_dbg_log (DAPL_DBG_TYPE_CM,
- "--> %s: Invalid EP->CM handle, OK.\n", __FUNCTION__);
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,
+ "--> %s: !CM_PTR on EP %p\n", __FUNCTION__, ep_ptr);
return DAT_SUCCESS;
}
-
+
dapl_dbg_log (DAPL_DBG_TYPE_CM,
"--> %s() EP %p %s rx_drq %d tx_drq %d Close %s\n", __FUNCTION__,
ep_ptr, dapl_get_ep_state_str(ep_ptr->param.ep_state),
@@ -1314,12 +1406,12 @@ dapls_ib_reject_connection ( IN dp_ib_cm_handle_t ib_cm_handle,
dapli_print_private_data("DsRjC",private_data,private_data_size);
#endif
- ib_status = ib_cm_rej ( *ib_cm_handle, &cm_rej);
+ ib_status = ib_cm_rej(ib_cm_handle->ib_cm, &cm_rej);
if (ib_status != IB_SUCCESS)
{
dapl_dbg_log ( DAPL_DBG_TYPE_ERR,
- "--> DsRjC: cm_handle %lx reject failed %s\n",
+ "--> DsRjC: cm_handle %p reject failed %s\n",
ib_cm_handle, ib_get_err_str(ib_status) );
}
@@ -1390,6 +1482,7 @@ dapls_ib_accept_connection (
dapl_ibal_port_t *p_active_port;
ib_cm_rep_t cm_rep;
ib_qp_attr_t qpa;
+ dp_ib_cm_handle_t cm_ptr;
cr_ptr = (DAPL_CR *) cr_handle;
ep_ptr = (DAPL_EP *) ep_handle;
@@ -1426,28 +1519,36 @@ dapls_ib_accept_connection (
}
cr_ptr->param.local_ep_handle = ep_handle;
- ep_ptr->cm_handle = cr_ptr->ib_cm_handle;
+
/*
* assume ownership, in that once the EP is released the dynamic
* memory containing the IBAL CM handle (ib_cm_handle_t) struct will
* be released; see dapl_ep_dealloc().
*/
- ep_ptr->ibal_cm_handle = cr_ptr->ib_cm_handle;
-
+
+ /* EP-CM, save/release CR CM object, use EP CM object already linked */
+ cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+ if (!cm_ptr) {
+ dapl_dbg_log ( DAPL_DBG_TYPE_ERR,
+ "--> DsIBAC: CM linking to EP %p not available\n",
+ ep_ptr);
+ return (DAT_INVALID_STATE);
+ }
+
/* set remote IP addr fields. IP addr data is deduced from Connection
* Request record (gid/lib) and stashed away for use here. DAPL 1.1
* had an interface for passing the IP info down, interface went away
* in 2.0?
*/
dapl_os_memcpy( (void*)&ep_ptr->remote_ia_address,
- (void*)(ep_ptr->cm_handle+1),
+ (void*)&cr_ptr->ib_cm_handle->dst_ip_addr,
sizeof(DAT_SOCK_ADDR6) );
dapl_os_memcpy( (void*)&cr_ptr->remote_ia_address,
(void*)&ep_ptr->remote_ia_address,
sizeof(DAT_SOCK_ADDR6) );
-#if defined(DAPL_DBG) && 0
+#if defined(DAPL_DBG)
{
char ipa[20];
@@ -1455,11 +1556,21 @@ dapls_ib_accept_connection (
"%s: EP(%lx) RemoteAddr: %s\n",
__FUNCTION__, ep_ptr,
dapli_get_ip_addr_str(
- (DAT_SOCK_ADDR6*)&ep_ptr->remote_ia_address, ipa));
+ (DAT_SOCK_ADDR6*)&ep_ptr->remote_ia_address, ipa) );
}
#endif
- ep_ptr->qp_state = IB_QPS_INIT;
+ dapl_os_memcpy( (void*)&cm_ptr->dst_ip_addr,
+ (void*)&cr_ptr->ib_cm_handle->dst_ip_addr,
+ sizeof(DAT_SOCK_ADDR6) );
+
+ /* get h_al and connection ID from CR CM object, h_qp already set */
+ cm_ptr->ib_cm.cid = cr_ptr->ib_cm_handle->ib_cm.cid;
+ cm_ptr->ib_cm.h_al = cr_ptr->ib_cm_handle->ib_cm.h_al;
+ dapls_cm_release(cr_ptr->ib_cm_handle);
+
+ cr_ptr->ib_cm_handle = cm_ptr; /* for dapli_get_sp_ep() upcall */
+
ep_ptr->cr_ptr = cr_ptr;
dapl_os_memzero ( (void*)&cm_rep, sizeof (ib_cm_rep_t) );
@@ -1526,19 +1637,13 @@ dapls_ib_accept_connection (
cm_rep.access_ctrl, cm_rep.init_depth,cm_rep.qp_type,
dapls_cb_pending(&ep_ptr->req_buffer));
- ib_status = ib_cm_rep ( *ep_ptr->cm_handle, &cm_rep );
+ ib_status = ib_cm_rep ( cm_ptr->ib_cm, &cm_rep );
if (ib_status != IB_SUCCESS)
{
dapl_dbg_log ( DAPL_DBG_TYPE_ERR,
"--> DsIBAC: EP %lx QP %lx CR reply failed '%s'\n",
ep_ptr, ep_ptr->qp_handle, ib_get_err_str(ib_status) );
-
- /* errors not perculated back to CR callback which allocated the
- * memory, free it here on error.
- */
- dapl_os_free ( (void*)ep_ptr->ibal_cm_handle, sizeof(ib_cm_handle_t) );
- ep_ptr->ibal_cm_handle = NULL;
}
return ( dapl_ib_status_convert ( ib_status ) );
@@ -1589,11 +1694,6 @@ dapls_ib_disconnect_clean (
ep_ptr->sent_discreq = DAT_FALSE;
ep_ptr->recv_discreq = DAT_FALSE;
- if ( ep_ptr->ibal_cm_handle )
- {
- dapl_os_free ( (void*)ep_ptr->ibal_cm_handle, sizeof(ib_cm_handle_t) );
- }
- ep_ptr->ibal_cm_handle = NULL;
/*
* Query the QP to get the current state */
@@ -1653,7 +1753,7 @@ dapls_ib_cr_handoff (
cr_ptr = (DAPL_CR *) cr_handle;
- if (cr_ptr->ib_cm_handle->cid == 0xFFFFFFFF)
+ if (cr_ptr->ib_cm_handle->ib_cm.cid == 0xFFFFFFFF)
{
dapl_dbg_log ( DAPL_DBG_TYPE_ERR,
"--> DsCH: CR = %lx invalid cm handle\n", cr_ptr);
@@ -1667,7 +1767,7 @@ dapls_ib_cr_handoff (
return DAT_INVALID_PARAMETER;
}
- ib_status = ib_cm_handoff (*cr_ptr->ib_cm_handle, handoff_serv_id);
+ ib_status = ib_cm_handoff (cr_ptr->ib_cm_handle->ib_cm, handoff_serv_id);
if (ib_status != IB_SUCCESS)
{
diff --git a/dapl/ibal/dapl_ibal_qp.c b/dapl/ibal/dapl_ibal_qp.c
index cc8c394..f52f5da 100644
--- a/dapl/ibal/dapl_ibal_qp.c
+++ b/dapl/ibal/dapl_ibal_qp.c
@@ -24,6 +24,7 @@
#include "dapl_adapter_util.h"
#include "dapl_evd_util.h"
#include "dapl_ibal_util.h"
+#include "dapl_ep_util.h"
#define DAPL_IBAL_QKEY 0
#define DAPL_IBAL_START_PSN 0
@@ -133,7 +134,8 @@ dapls_ib_qp_alloc (
ib_cq_handle_t cq_send;
dapl_ibal_ca_t *p_ca;
dapl_ibal_port_t *p_active_port;
- ib_qp_attr_t qp_attr;
+ ib_qp_attr_t qp_attr;
+ dp_ib_cm_handle_t cm_ptr;
attr = &ep_ptr->param.ep_attr;
@@ -241,6 +243,16 @@ dapls_ib_qp_alloc (
ib_get_err_str(ib_status));
return (DAT_INSUFFICIENT_RESOURCES);
}
+ /* EP-CM linking */
+ cm_ptr = ibal_cm_alloc();
+ if (!cm_ptr)
+ {
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR, "--> DsQA: Create CM failed\n");
+ return (DAT_INSUFFICIENT_RESOURCES);
+ }
+ cm_ptr->ib_cm.h_qp = ep_ptr->qp_handle;
+ cm_ptr->ep = ep_ptr;
+ dapl_ep_link_cm(ep_ptr, cm_ptr);
dapl_dbg_log (DAPL_DBG_TYPE_EP, "--> DsQA: EP=%p, tEVD=%p, rEVD=%p QP=%p\n",
ep_ptr, ep_ptr->param.request_evd_handle,
diff --git a/dapl/ibal/dapl_ibal_util.h b/dapl/ibal/dapl_ibal_util.h
index a73d6c1..e28bdbe 100644
--- a/dapl/ibal/dapl_ibal_util.h
+++ b/dapl/ibal/dapl_ibal_util.h
@@ -39,8 +39,28 @@
typedef struct ib_cm_handle *dp_ib_cm_handle_t;
typedef struct ib_cm_handle *ib_cm_srvc_handle_t;
#else
-typedef ib_cm_handle_t *dp_ib_cm_handle_t;
+
+/* EP-CM linking requires list_entry, protected ref counting */
+typedef struct dapl_ibal_cm
+{
+ struct dapl_llist_entry list_entry;
+ DAPL_OS_LOCK lock;
+ int ref_count;
+ DAT_SOCK_ADDR6 dst_ip_addr;
+ ib_cm_handle_t ib_cm; /* h_al, h_qp, cid */
+ DAPL_EP *ep;
+
+} dapl_ibal_cm_t;
+
+typedef dapl_ibal_cm_t *dp_ib_cm_handle_t;
typedef ib_listen_handle_t ib_cm_srvc_handle_t;
+
+/* EP-CM linking prototypes */
+extern void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr);
+extern void dapls_cm_release(dp_ib_cm_handle_t cm_ptr);
+extern void dapls_cm_free(dp_ib_cm_handle_t cm_ptr);
+extern dp_ib_cm_handle_t ibal_cm_alloc(void);
+
#endif
typedef ib_net64_t IB_HCA_NAME;
@@ -543,7 +563,6 @@ dapli_find_evd_cb_by_context(
extern IB_HCA_NAME
dapl_ib_convert_name(IN char *name);
-
STATIC _INLINE_ int32_t
dapli_ibal_convert_privileges (IN DAT_MEM_PRIV_FLAGS privileges )
{
diff --git a/dapl/ibal/udapl.rc b/dapl/ibal/udapl.rc
index 7c2505a..e5fc1ac 100644
--- a/dapl/ibal/udapl.rc
+++ b/dapl/ibal/udapl.rc
@@ -36,11 +36,11 @@
#define VER_FILESUBTYPE VFT2_UNKNOWN
#if DBG
-#define VER_FILEDESCRIPTION_STR "Direct Access Provider Library v2.0 (Debug)"
+#define VER_FILEDESCRIPTION_STR "Direct Access Provider Library v2.0 (IBAL) (Debug)"
#define VER_INTERNALNAME_STR "dapl2d.dll"
#define VER_ORIGINALFILENAME_STR "dapl2d.dll"
#else
-#define VER_FILEDESCRIPTION_STR "Direct Access Provider Library v2.0"
+#define VER_FILEDESCRIPTION_STR "Direct Access Provider Library v2.0 (IBAL)"
#define VER_INTERNALNAME_STR "dapl2.dll"
#define VER_ORIGINALFILENAME_STR "dapl2.dll"
#endif
diff --git a/dapl/include/dapl.h b/dapl/include/dapl.h
index 208113b..8dab61e 100755
--- a/dapl/include/dapl.h
+++ b/dapl/include/dapl.h
@@ -472,7 +472,6 @@ struct dapl_ep
#if defined(_WIN32) || defined(_WIN64)
DAT_BOOLEAN recv_discreq;
DAT_BOOLEAN sent_discreq;
- dp_ib_cm_handle_t ibal_cm_handle;
#endif
#ifdef DAPL_COUNTERS
void *cntrs;
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH 04/12] dapl-2.0: scm: add support for canceling conn request that times out.
From: Davis, Arlin R @ 2010-05-19 18:11 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
print warning message during timeout.
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/common/dapl_ep_connect.c | 2 ++
dapl/openib_scm/cm.c | 11 ++++++++++-
2 files changed, 12 insertions(+), 1 deletions(-)
diff --git a/dapl/common/dapl_ep_connect.c b/dapl/common/dapl_ep_connect.c
index 4e5e112..1f193ae 100755
--- a/dapl/common/dapl_ep_connect.c
+++ b/dapl/common/dapl_ep_connect.c
@@ -334,6 +334,8 @@ dapl_ep_connect(IN DAT_EP_HANDLE ep_handle,
dapls_timer_set(ep_ptr->cxn_timer,
dapls_ep_timeout, ep_ptr, timeout);
+
+ dapl_log(DAPL_DBG_TYPE_WARN, " dapl_ep_connect timeout = %d us\n", timeout);
}
dapl_os_unlock(&ep_ptr->header.lock);
}
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 957066a..6958b67 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -1366,7 +1366,16 @@ dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
IN DAT_BOOLEAN active,
IN const ib_cm_events_t ib_cm_event)
{
- /* nothing to cleanup */
+ if (ib_cm_event == IB_CME_TIMEOUT) {
+ dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr);
+
+ dapl_log(DAPL_DBG_TYPE_WARN,
+ "dapls_ib_disc_clean: CONN_TIMEOUT ep %p cm %p %s\n",
+ ep_ptr, cm_ptr, dapl_cm_state_str(cm_ptr->state));
+
+ /* schedule release of socket and local resources */
+ dapli_cm_free(cm_ptr);
+ }
}
/*
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH 03/12] dapl-2.0: scm, cma, ucm: consolidate dat event/provider event translation
From: Davis, Arlin R @ 2010-05-19 18:11 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/openib_cma/cm.c | 108 -------------------------------------------
dapl/openib_common/util.c | 107 +++++++++++++++++++++++++++++++++++++++++++
dapl/openib_scm/cm.c | 112 ++-------------------------------------------
dapl/openib_ucm/cm.c | 99 ---------------------------------------
4 files changed, 111 insertions(+), 315 deletions(-)
diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c
index a85e6ae..1e846aa 100644
--- a/dapl/openib_cma/cm.c
+++ b/dapl/openib_cma/cm.c
@@ -1051,114 +1051,6 @@ int dapls_ib_private_data_size(IN DAPL_HCA * hca_ptr)
return RDMA_MAX_PRIVATE_DATA;
}
-/*
- * Map all CMA event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT 13
-
-static struct ib_cm_event_map {
- const ib_cm_events_t ib_cm_event;
- DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
- /* 00 */ {
- IB_CME_CONNECTED, DAT_CONNECTION_EVENT_ESTABLISHED},
- /* 01 */ {
- IB_CME_DISCONNECTED, DAT_CONNECTION_EVENT_DISCONNECTED},
- /* 02 */ {
- IB_CME_DISCONNECTED_ON_LINK_DOWN,
- DAT_CONNECTION_EVENT_DISCONNECTED},
- /* 03 */ {
- IB_CME_CONNECTION_REQUEST_PENDING, DAT_CONNECTION_REQUEST_EVENT},
- /* 04 */ {
- IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
- DAT_CONNECTION_REQUEST_EVENT},
- /* 05 */ {
- IB_CME_CONNECTION_REQUEST_ACKED, DAT_CONNECTION_REQUEST_EVENT},
- /* 06 */ {
- IB_CME_DESTINATION_REJECT,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
- /* 07 */ {
- IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
- DAT_CONNECTION_EVENT_PEER_REJECTED},
- /* 08 */ {
- IB_CME_DESTINATION_UNREACHABLE, DAT_CONNECTION_EVENT_UNREACHABLE},
- /* 09 */ {
- IB_CME_TOO_MANY_CONNECTION_REQUESTS,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
- /* 10 */ {
- IB_CME_LOCAL_FAILURE, DAT_CONNECTION_EVENT_BROKEN},
- /* 11 */ {
- IB_CME_BROKEN, DAT_CONNECTION_EVENT_BROKEN},
- /* 12 */ {
-IB_CME_TIMEOUT, DAT_CONNECTION_EVENT_TIMED_OUT},};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * dat_event_num DAT event we need an equivelent CM event for
- *
- * Output:
- * none
- *
- * Returns:
- * ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
- IN DAT_BOOLEAN active)
-{
- DAT_EVENT_NUMBER dat_event_num;
- int i;
-
- active = active;
-
- dat_event_num = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
- dat_event_num = ib_cm_event_map[i].dat_event_num;
- break;
- }
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
- "dapls_ib_get_dat_event: event(%s) ib=0x%x dat=0x%x\n",
- active ? "active" : "passive", ib_cm_event, dat_event_num);
-
- return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * ib_cm_event event provided to the dapl callback routine
- * active switch indicating active or passive connection
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
- ib_cm_events_t ib_cm_event;
- int i;
-
- ib_cm_event = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
- ib_cm_event = ib_cm_event_map[i].ib_cm_event;
- break;
- }
- }
- return ib_cm_event;
-}
-
void dapli_cma_event_cb(void)
{
struct rdma_cm_event *event;
diff --git a/dapl/openib_common/util.c b/dapl/openib_common/util.c
index 3963e1f..704d85a 100644
--- a/dapl/openib_common/util.c
+++ b/dapl/openib_common/util.c
@@ -471,3 +471,110 @@ void dapls_query_provider_specific_attr(IN DAPL_IA * ia_ptr,
/* set MTU to actual settings */
ib_attrs[0].value = ia_ptr->hca_ptr->ib_trans.named_attr.value;
}
+
+/*
+ * Map all socket CM event codes to the DAT equivelent. Common to all providers
+ */
+#define DAPL_IB_EVENT_CNT 13
+
+static struct ib_cm_event_map {
+ const ib_cm_events_t ib_cm_event;
+ DAT_EVENT_NUMBER dat_event_num;
+} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
+/* 00 */ {IB_CME_CONNECTED,
+ DAT_CONNECTION_EVENT_ESTABLISHED},
+/* 01 */ {IB_CME_DISCONNECTED,
+ DAT_CONNECTION_EVENT_DISCONNECTED},
+/* 02 */ {IB_CME_DISCONNECTED_ON_LINK_DOWN,
+ DAT_CONNECTION_EVENT_DISCONNECTED},
+/* 03 */ {IB_CME_CONNECTION_REQUEST_PENDING,
+ DAT_CONNECTION_REQUEST_EVENT},
+/* 04 */ {IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
+ DAT_CONNECTION_REQUEST_EVENT},
+/* 05 */ {IB_CME_CONNECTION_REQUEST_ACKED,
+ DAT_CONNECTION_EVENT_ESTABLISHED},
+/* 06 */ {IB_CME_DESTINATION_REJECT,
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+/* 07 */ {IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
+ DAT_CONNECTION_EVENT_PEER_REJECTED},
+/* 08 */ {IB_CME_DESTINATION_UNREACHABLE,
+ DAT_CONNECTION_EVENT_UNREACHABLE},
+/* 09 */ {IB_CME_TOO_MANY_CONNECTION_REQUESTS,
+ DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
+/* 10 */ {IB_CME_BROKEN,
+ DAT_CONNECTION_EVENT_BROKEN},
+/* 11 */ {IB_CME_TIMEOUT,
+ DAT_CONNECTION_EVENT_TIMED_OUT},
+/* 12 */ {IB_CME_LOCAL_FAILURE, /* always last */
+ DAT_CONNECTION_EVENT_BROKEN}
+};
+
+/*
+ * dapls_ib_get_cm_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ * dat_event_num DAT event we need an equivelent CM event for
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * ib_cm_event of translated DAPL value
+ */
+DAT_EVENT_NUMBER
+dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
+ IN DAT_BOOLEAN active)
+{
+ DAT_EVENT_NUMBER dat_event_num;
+ int i;
+
+ active = active;
+
+ if (ib_cm_event > IB_CME_LOCAL_FAILURE)
+ return (DAT_EVENT_NUMBER) 0;
+
+ dat_event_num = 0;
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+ if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
+ dat_event_num = ib_cm_event_map[i].dat_event_num;
+ break;
+ }
+ }
+ dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
+ "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
+ active ? "active" : "passive", ib_cm_event, dat_event_num);
+
+ return dat_event_num;
+}
+
+/*
+ * dapls_ib_get_dat_event
+ *
+ * Return a DAT connection event given a provider CM event.
+ *
+ * Input:
+ * ib_cm_event event provided to the dapl callback routine
+ * active switch indicating active or passive connection
+ *
+ * Output:
+ * none
+ *
+ * Returns:
+ * DAT_EVENT_NUMBER of translated provider value
+ */
+ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
+{
+ ib_cm_events_t ib_cm_event;
+ int i;
+
+ ib_cm_event = 0;
+ for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
+ if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
+ ib_cm_event = ib_cm_event_map[i].ib_cm_event;
+ break;
+ }
+ }
+ return ib_cm_event;
+}
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index 39f8417..957066a 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -501,7 +501,7 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
bail:
/* mark CM object for cleanup */
- cm_ptr->state = DCM_FREE;
+ dapli_cm_free(cm_ptr);
dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, 0, ep_ptr);
}
@@ -816,9 +816,7 @@ ud_bail:
(DAT_PVOID *) &xevent);
/* cleanup and release from local list */
- dapl_os_lock(&cm_ptr->lock);
- cm_ptr->state = DCM_FREE;
- dapl_os_unlock(&cm_ptr->lock);
+ dapli_cm_free(cm_ptr);
} else
#endif
@@ -836,10 +834,9 @@ bail:
#endif
/* close socket, and post error event */
cm_ptr->state = DCM_REJECTED;
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
dapl_evd_connection_callback(NULL, event, cm_ptr->msg.p_data,
DCM_MAX_PDATA_SIZE, ep_ptr);
+ dapli_cm_free(cm_ptr);
}
/*
@@ -1041,7 +1038,7 @@ static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr)
return;
bail:
/* mark for destroy, active will see socket close as rej */
- acm_ptr->state = DCM_FREE;
+ dapli_cm_free(acm_ptr);
return;
}
@@ -1573,107 +1570,6 @@ int dapls_ib_private_data_size(
return DCM_MAX_PDATA_SIZE;
}
-/*
- * Map all socket CM event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT 10
-
-static struct ib_cm_event_map {
- const ib_cm_events_t ib_cm_event;
- DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
-/* 00 */ {IB_CME_CONNECTED,
- DAT_CONNECTION_EVENT_ESTABLISHED},
-/* 01 */ {IB_CME_DISCONNECTED,
- DAT_CONNECTION_EVENT_DISCONNECTED},
-/* 02 */ {IB_CME_DISCONNECTED_ON_LINK_DOWN,
- DAT_CONNECTION_EVENT_DISCONNECTED},
-/* 03 */ {IB_CME_CONNECTION_REQUEST_PENDING,
- DAT_CONNECTION_REQUEST_EVENT},
-/* 04 */ {IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
- DAT_CONNECTION_REQUEST_EVENT},
-/* 05 */ {IB_CME_DESTINATION_REJECT,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-/* 06 */ {IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
- DAT_CONNECTION_EVENT_PEER_REJECTED},
-/* 07 */ {IB_CME_DESTINATION_UNREACHABLE,
- DAT_CONNECTION_EVENT_UNREACHABLE},
-/* 08 */ {IB_CME_TOO_MANY_CONNECTION_REQUESTS,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-/* 09 */ {IB_CME_LOCAL_FAILURE,
- DAT_CONNECTION_EVENT_BROKEN}
-};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * dat_event_num DAT event we need an equivelent CM event for
- *
- * Output:
- * none
- *
- * Returns:
- * ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
- IN DAT_BOOLEAN active)
-{
- DAT_EVENT_NUMBER dat_event_num;
- int i;
-
- active = active;
-
- if (ib_cm_event > IB_CME_LOCAL_FAILURE)
- return (DAT_EVENT_NUMBER) 0;
-
- dat_event_num = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
- dat_event_num = ib_cm_event_map[i].dat_event_num;
- break;
- }
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
- "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
- active ? "active" : "passive", ib_cm_event, dat_event_num);
-
- return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * ib_cm_event event provided to the dapl callback routine
- * active switch indicating active or passive connection
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
- ib_cm_events_t ib_cm_event;
- int i;
-
- ib_cm_event = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
- ib_cm_event = ib_cm_event_map[i].ib_cm_event;
- break;
- }
- }
- return ib_cm_event;
-}
-
/* outbound/inbound CR processing thread to avoid blocking applications */
void cr_thread(void *arg)
{
diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index 6efa2f1..2cab529 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -1817,105 +1817,6 @@ int dapls_ib_private_data_size(
return DCM_MAX_PDATA_SIZE;
}
-/*
- * Map all socket CM event codes to the DAT equivelent.
- */
-#define DAPL_IB_EVENT_CNT 10
-
-static struct ib_cm_event_map {
- const ib_cm_events_t ib_cm_event;
- DAT_EVENT_NUMBER dat_event_num;
-} ib_cm_event_map[DAPL_IB_EVENT_CNT] = {
-/* 00 */ {IB_CME_CONNECTED,
- DAT_CONNECTION_EVENT_ESTABLISHED},
-/* 01 */ {IB_CME_DISCONNECTED,
- DAT_CONNECTION_EVENT_DISCONNECTED},
-/* 02 */ {IB_CME_DISCONNECTED_ON_LINK_DOWN,
- DAT_CONNECTION_EVENT_DISCONNECTED},
-/* 03 */ {IB_CME_CONNECTION_REQUEST_PENDING,
- DAT_CONNECTION_REQUEST_EVENT},
-/* 04 */ {IB_CME_CONNECTION_REQUEST_PENDING_PRIVATE_DATA,
- DAT_CONNECTION_REQUEST_EVENT},
-/* 05 */ {IB_CME_DESTINATION_REJECT,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-/* 06 */ {IB_CME_DESTINATION_REJECT_PRIVATE_DATA,
- DAT_CONNECTION_EVENT_PEER_REJECTED},
-/* 07 */ {IB_CME_DESTINATION_UNREACHABLE,
- DAT_CONNECTION_EVENT_UNREACHABLE},
-/* 08 */ {IB_CME_TOO_MANY_CONNECTION_REQUESTS,
- DAT_CONNECTION_EVENT_NON_PEER_REJECTED},
-/* 09 */ {IB_CME_LOCAL_FAILURE,
- DAT_CONNECTION_EVENT_BROKEN}
-};
-
-/*
- * dapls_ib_get_cm_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * dat_event_num DAT event we need an equivelent CM event for
- *
- * Output:
- * none
- *
- * Returns:
- * ib_cm_event of translated DAPL value
- */
-DAT_EVENT_NUMBER
-dapls_ib_get_dat_event(IN const ib_cm_events_t ib_cm_event,
- IN DAT_BOOLEAN active)
-{
- DAT_EVENT_NUMBER dat_event_num;
- int i;
-
- if (ib_cm_event > IB_CME_LOCAL_FAILURE)
- return (DAT_EVENT_NUMBER) 0;
-
- dat_event_num = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (ib_cm_event == ib_cm_event_map[i].ib_cm_event) {
- dat_event_num = ib_cm_event_map[i].dat_event_num;
- break;
- }
- }
- dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
- "dapls_ib_get_dat_event: event translate(%s) ib=0x%x dat=0x%x\n",
- active ? "active" : "passive", ib_cm_event, dat_event_num);
-
- return dat_event_num;
-}
-
-/*
- * dapls_ib_get_dat_event
- *
- * Return a DAT connection event given a provider CM event.
- *
- * Input:
- * ib_cm_event event provided to the dapl callback routine
- * active switch indicating active or passive connection
- *
- * Output:
- * none
- *
- * Returns:
- * DAT_EVENT_NUMBER of translated provider value
- */
-ib_cm_events_t dapls_ib_get_cm_event(IN DAT_EVENT_NUMBER dat_event_num)
-{
- ib_cm_events_t ib_cm_event;
- int i;
-
- ib_cm_event = 0;
- for (i = 0; i < DAPL_IB_EVENT_CNT; i++) {
- if (dat_event_num == ib_cm_event_map[i].dat_event_num) {
- ib_cm_event = ib_cm_event_map[i].ib_cm_event;
- break;
- }
- }
- return ib_cm_event;
-}
-
#if defined(_WIN32) || defined(_WIN64)
void cm_thread(void *arg)
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH 02/12] dapl-2.0: common: missed linking changes from atomic to acquire/release
From: Davis, Arlin R @ 2010-05-19 18:11 UTC (permalink / raw)
To: linux-rdma@vger.kernel.org, ofw_list
Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
---
dapl/common/dapl_ep_util.c | 7 +++----
1 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c
index bd7cdd9..daad78d 100644
--- a/dapl/common/dapl_ep_util.c
+++ b/dapl/common/dapl_ep_util.c
@@ -417,7 +417,7 @@ void dapls_ep_timeout(uintptr_t arg)
* The disconnect_clean interface requires the provided dependent
*cm event number.
*/
- ib_cm_event = dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_DISCONNECTED);
+ ib_cm_event = dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_TIMED_OUT);
dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, ib_cm_event);
(void)dapls_evd_post_connection_event((DAPL_EVD *) ep_ptr->param.
@@ -590,7 +590,6 @@ dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr,
* dp_ib_cm_handle_t *cm_ptr defined in provider's dapl_util.h
*
* CM objects linked with EP using ->list_entry
- * CM objects sync'ed with EP using ->ref_count
* Output:
* none
*
@@ -601,7 +600,7 @@ dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr,
void dapl_ep_link_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr)
{
dapl_os_lock(&ep_ptr->header.lock);
- dapl_os_atomic_inc(&cm_ptr->ref_count);
+ dapls_cm_acquire(cm_ptr);
dapl_llist_add_tail(&ep_ptr->cm_list_head, &cm_ptr->list_entry, cm_ptr);
dapl_os_unlock(&ep_ptr->header.lock);
}
@@ -610,7 +609,7 @@ void dapl_ep_unlink_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr)
{
dapl_os_lock(&ep_ptr->header.lock);
dapl_llist_remove_entry(&ep_ptr->cm_list_head, &cm_ptr->list_entry);
- dapl_os_atomic_dec(&cm_ptr->ref_count);
+ dapls_cm_release(cm_ptr);
dapl_os_unlock(&ep_ptr->header.lock);
}
--
1.5.2.5
^ permalink raw reply related
* [PATCH 01/12] dapl-2.0: common: add CM-EP linking to support mutiple CM's and better protection during destruction
From: Davis, Arlin R @ 2010-05-19 18:00 UTC (permalink / raw)
To: linux-rdma@vger.kernel.org, ofw_list
Patch set to add CM to EP linking for better serialization, includes bug fixing as tested.
1/12 common: add CM-EP linking to support mutiple CM's and better protection during destruction
2/12 common: missed linking changes from atomic to acquire/release
3/12 scm, cma, ucm: consolidate dat event/provider event translation
4/12 scm: add support for canceling conn request that times out.
5/12 ibal: changes for EP to CM linking and synchronization.
6/12 ucm: fix error path during accept_usr reply failure
7/12 common: dat_ep_connect should not set timer UD endpoints
8/12 scm: new cm_ep linking broke UD mode over socket cm
9/12 scm: add EP locking and cm checking to socket cm disconnect
10/12 ucm: fix issues with new EP to CM linking changes
11/12 ucm: set timer during RTU_PENDING state change.
12/12 common: EP links to EVD, PZ incorrectly released before provider CM objects freed.
Tested on OFED Linux and Windows.
Add linking for CM to EP, including reference counting, to insure syncronization
during creation and destruction. A cm_list_head has been added to the EP object to
support multiple CM objects (UD) per EP. If the CM object is linked to an EP it
cannot be destroyed.
Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
---
dapl/common/dapl_cr_callback.c | 4 +-
dapl/common/dapl_ep_free.c | 15 +
dapl/common/dapl_ep_util.c | 42 +++-
dapl/common/dapl_ep_util.h | 18 ++
dapl/common/dapl_evd_connection_callb.c | 1 -
dapl/common/dapl_evd_util.c | 2 +-
dapl/include/dapl.h | 5 +-
dapl/openib_cma/cm.c | 193 +++++++------
dapl/openib_cma/dapl_ib_util.h | 11 +-
dapl/openib_common/dapl_ib_common.h | 12 +-
dapl/openib_common/qp.c | 59 +++--
dapl/openib_scm/cm.c | 461 ++++++++++++++-----------------
dapl/openib_scm/dapl_ib_util.h | 14 +-
dapl/openib_ucm/cm.c | 381 ++++++++++++--------------
dapl/openib_ucm/dapl_ib_util.h | 11 +-
15 files changed, 641 insertions(+), 588 deletions(-)
diff --git a/dapl/common/dapl_cr_callback.c b/dapl/common/dapl_cr_callback.c
index 55b5841..3997b38 100644
--- a/dapl/common/dapl_cr_callback.c
+++ b/dapl/common/dapl_cr_callback.c
@@ -206,7 +206,6 @@ void dapls_cr_callback(IN dp_ib_cm_handle_t ib_cm_handle, IN const ib_cm_events_
}
ep_ptr->param.ep_state = DAT_EP_STATE_CONNECTED;
- ep_ptr->cm_handle = ib_cm_handle;
dapl_os_unlock(&ep_ptr->header.lock);
break;
@@ -243,7 +242,6 @@ void dapls_cr_callback(IN dp_ib_cm_handle_t ib_cm_handle, IN const ib_cm_events_
*/
dapl_os_lock(&ep_ptr->header.lock);
ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED;
- ep_ptr->cm_handle = IB_INVALID_HANDLE;
dapls_ib_disconnect_clean(ep_ptr, DAT_FALSE,
ib_cm_event);
dapl_os_unlock(&ep_ptr->header.lock);
@@ -396,7 +394,7 @@ dapli_connection_request(IN dp_ib_cm_handle_t ib_cm_handle,
ep_ptr->param.ep_state =
DAT_EP_STATE_PASSIVE_CONNECTION_PENDING;
}
- ep_ptr->cm_handle = ib_cm_handle;
+ dapl_ep_link_cm(ep_ptr, ib_cm_handle);
}
/* link the CR onto the SP so we can pick it up later */
diff --git a/dapl/common/dapl_ep_free.c b/dapl/common/dapl_ep_free.c
index fd9fcc7..8708e6f 100644
--- a/dapl/common/dapl_ep_free.c
+++ b/dapl/common/dapl_ep_free.c
@@ -66,6 +66,7 @@ DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle)
DAPL_EP *ep_ptr;
DAPL_IA *ia_ptr;
DAT_EP_PARAM *param;
+ dp_ib_cm_handle_t cm_ptr, next_cm_ptr;
ib_qp_state_t save_qp_state;
DAT_RETURN dat_status = DAT_SUCCESS;
@@ -187,6 +188,20 @@ DAT_RETURN DAT_API dapl_ep_free(IN DAT_EP_HANDLE ep_handle)
}
}
+ /* Free all CM objects */
+ cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head)
+ ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head));
+ while (cm_ptr != NULL) {
+ dapl_log(DAPL_DBG_TYPE_EP,
+ "dapl_ep_free: Free CM: EP=%p CM=%p\n",
+ ep_ptr, cm_ptr);
+
+ next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head,
+ &cm_ptr->list_entry);
+ dapls_cm_free(cm_ptr); /* blocking call */
+ cm_ptr = next_cm_ptr;
+ }
+
/* Free the resource */
dapl_ep_dealloc(ep_ptr);
diff --git a/dapl/common/dapl_ep_util.c b/dapl/common/dapl_ep_util.c
index 505a9f2..bd7cdd9 100644
--- a/dapl/common/dapl_ep_util.c
+++ b/dapl/common/dapl_ep_util.c
@@ -141,6 +141,7 @@ DAPL_EP *dapl_ep_alloc(IN DAPL_IA * ia_ptr, IN const DAT_EP_ATTR * ep_attr)
ep_ptr->header.user_context.as_ptr = NULL;
dapl_llist_init_entry(&ep_ptr->header.ia_list_entry);
+ dapl_llist_init_head(&ep_ptr->cm_list_head);
dapl_os_lock_init(&ep_ptr->header.lock);
/*
@@ -161,7 +162,6 @@ DAPL_EP *dapl_ep_alloc(IN DAPL_IA * ia_ptr, IN const DAT_EP_ATTR * ep_attr)
ep_ptr->qp_handle = IB_INVALID_HANDLE;
ep_ptr->qpn = 0;
ep_ptr->qp_state = DAPL_QP_STATE_UNATTACHED;
- ep_ptr->cm_handle = IB_INVALID_HANDLE;
if (DAT_SUCCESS != dapls_cb_create(&ep_ptr->req_buffer,
ep_ptr,
@@ -537,6 +537,7 @@ dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr,
{
ib_cm_events_t ib_cm_event;
DAPL_CR *cr_ptr;
+ dp_ib_cm_handle_t cm_ptr;
/*
* Acquire the lock and make sure we didn't get a callback
@@ -557,6 +558,8 @@ dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr,
dapls_ib_get_cm_event(DAT_CONNECTION_EVENT_DISCONNECTED);
cr_ptr = ep_ptr->cr_ptr;
+ cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head)
+ ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head));
dapl_os_unlock(&ep_ptr->header.lock);
if (cr_ptr != NULL) {
@@ -567,7 +570,7 @@ dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr,
dapls_cr_callback(cr_ptr->ib_cm_handle,
ib_cm_event, NULL, 0, cr_ptr->sp_ptr);
} else {
- dapl_evd_connection_callback(ep_ptr->cm_handle,
+ dapl_evd_connection_callback(cm_ptr,
ib_cm_event,
NULL, 0, (void *)ep_ptr);
}
@@ -577,6 +580,41 @@ dapl_ep_legacy_post_disconnect(DAPL_EP * ep_ptr,
}
/*
+ * dapl_ep_link_cm
+ *
+ * Add linking of provider's CM object to a EP structure
+ * This enables multiple CM's per EP, and syncronization
+ *
+ * Input:
+ * DAPL_EP *ep_ptr
+ * dp_ib_cm_handle_t *cm_ptr defined in provider's dapl_util.h
+ *
+ * CM objects linked with EP using ->list_entry
+ * CM objects sync'ed with EP using ->ref_count
+ * Output:
+ * none
+ *
+ * Returns:
+ * none
+ *
+ */
+void dapl_ep_link_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_lock(&ep_ptr->header.lock);
+ dapl_os_atomic_inc(&cm_ptr->ref_count);
+ dapl_llist_add_tail(&ep_ptr->cm_list_head, &cm_ptr->list_entry, cm_ptr);
+ dapl_os_unlock(&ep_ptr->header.lock);
+}
+
+void dapl_ep_unlink_cm(IN DAPL_EP *ep_ptr, IN dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_lock(&ep_ptr->header.lock);
+ dapl_llist_remove_entry(&ep_ptr->cm_list_head, &cm_ptr->list_entry);
+ dapl_os_atomic_dec(&cm_ptr->ref_count);
+ dapl_os_unlock(&ep_ptr->header.lock);
+}
+
+/*
* Local variables:
* c-indent-level: 4
* c-basic-offset: 4
diff --git a/dapl/common/dapl_ep_util.h b/dapl/common/dapl_ep_util.h
index 7ac4061..31d0e23 100644
--- a/dapl/common/dapl_ep_util.h
+++ b/dapl/common/dapl_ep_util.h
@@ -83,5 +83,23 @@ dapl_ep_legacy_post_disconnect(
DAT_CLOSE_FLAGS disconnect_flags);
extern char *dapl_get_ep_state_str(DAT_EP_STATE state);
+
+extern void dapl_ep_link_cm(IN DAPL_EP *ep_ptr,
+ IN dp_ib_cm_handle_t cm_ptr);
+
+extern void dapl_ep_unlink_cm(IN DAPL_EP *ep_ptr,
+ IN dp_ib_cm_handle_t cm_ptr);
+
+STATIC _INLINE_ dp_ib_cm_handle_t dapl_get_cm_from_ep(IN DAPL_EP *ep_ptr)
+{
+ dp_ib_cm_handle_t cm_ptr;
+
+ dapl_os_lock(&ep_ptr->header.lock);
+ cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head)
+ ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head));
+ dapl_os_unlock(&ep_ptr->header.lock);
+
+ return cm_ptr;
+}
#endif /* _DAPL_EP_UTIL_H_ */
diff --git a/dapl/common/dapl_evd_connection_callb.c b/dapl/common/dapl_evd_connection_callb.c
index 8881362..3166702 100644
--- a/dapl/common/dapl_evd_connection_callb.c
+++ b/dapl/common/dapl_evd_connection_callb.c
@@ -130,7 +130,6 @@ dapl_evd_connection_callback(IN dp_ib_cm_handle_t ib_cm_handle,
break;
}
ep_ptr->param.ep_state = DAT_EP_STATE_CONNECTED;
- ep_ptr->cm_handle = ib_cm_handle;
if (private_data_size > 0) {
/* copy in the private data */
diff --git a/dapl/common/dapl_evd_util.c b/dapl/common/dapl_evd_util.c
index cc0aa17..14a10c7 100644
--- a/dapl/common/dapl_evd_util.c
+++ b/dapl/common/dapl_evd_util.c
@@ -1077,7 +1077,7 @@ dapls_evd_post_cr_event_ext(IN DAPL_SP * sp_ptr,
ep_ptr->param.ep_state =
DAT_EP_STATE_PASSIVE_CONNECTION_PENDING;
}
- ep_ptr->cm_handle = ib_cm_handle;
+ dapl_ep_link_cm(ep_ptr, ib_cm_handle);
}
/* link the CR onto the SP so we can pick it up later */
diff --git a/dapl/include/dapl.h b/dapl/include/dapl.h
index a36b110..208113b 100755
--- a/dapl/include/dapl.h
+++ b/dapl/include/dapl.h
@@ -438,7 +438,10 @@ struct dapl_ep
ib_qp_state_t qp_state;
/* communications manager handle (IBM OS API) */
- dp_ib_cm_handle_t cm_handle;
+ // dp_ib_cm_handle_t cm_handle;
+
+ /* Add support for multiple CM object ownership */
+ DAPL_LLIST_HEAD cm_list_head;
/* store the remote IA address here, reference from the param
* struct which only has a pointer, no storage
diff --git a/dapl/openib_cma/cm.c b/dapl/openib_cma/cm.c
index cfa6ede..a85e6ae 100644
--- a/dapl/openib_cma/cm.c
+++ b/dapl/openib_cma/cm.c
@@ -45,6 +45,7 @@
#include "dapl_cr_util.h"
#include "dapl_name_service.h"
#include "dapl_ib_util.h"
+#include "dapl_ep_util.h"
#include "dapl_vendor.h"
#include "dapl_osd.h"
@@ -163,14 +164,14 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
dapl_os_memzero(conn, sizeof(*conn));
dapl_os_lock_init(&conn->lock);
- conn->refs++;
+ dapls_cm_acquire(conn);
/* create CM_ID, bind to local device, create QP */
if (rdma_create_id(g_cm_events, &cm_id, (void *)conn, RDMA_PS_TCP)) {
- dapl_os_lock_destroy(&conn->lock);
- dapl_os_free(conn, sizeof(*conn));
+ dapls_cm_release(conn);
return NULL;
}
+
conn->cm_id = cm_id;
/* setup timers for address and route resolution */
@@ -183,6 +184,7 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
conn->route_retries = dapl_os_get_env_val("DAPL_CM_ROUTE_RETRY_COUNT",
IB_ROUTE_RETRY_COUNT);
if (ep != NULL) {
+ dapl_ep_link_cm(ep, conn);
conn->ep = ep;
conn->hca = ((DAPL_IA *)ep->param.ia_handle)->hca_ptr;
}
@@ -190,40 +192,65 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
return conn;
}
-/*
- * Only called from consumer thread via dat_ep_free()
- * accept, reject, or connect.
- * Cannot be called from callback thread.
- * rdma_destroy_id will block until rdma_get_cm_event is acked.
- */
-void dapls_ib_cm_free(dp_ib_cm_handle_t conn, DAPL_EP *ep)
-{
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " destroy_conn: conn %p id %d\n",
- conn, conn->cm_id);
+static void dapli_cm_dealloc(dp_ib_cm_handle_t conn) {
+
+ dapl_os_assert(!conn->ref_count);
+ dapl_os_lock_destroy(&conn->lock);
+ dapl_os_free(conn, sizeof(*conn));
+}
+void dapls_cm_acquire(dp_ib_cm_handle_t conn)
+{
dapl_os_lock(&conn->lock);
- conn->refs--;
+ conn->ref_count++;
dapl_os_unlock(&conn->lock);
+}
- /* block until event thread complete */
- while (conn->refs)
- dapl_os_sleep_usec(10000);
+void dapls_cm_release(dp_ib_cm_handle_t conn)
+{
+ dapl_os_lock(&conn->lock);
+ conn->ref_count--;
+ if (conn->ref_count) {
+ dapl_os_unlock(&conn->lock);
+ return;
+ }
+ if (conn->cm_id) {
+ if (conn->cm_id->qp)
+ rdma_destroy_qp(conn->cm_id);
+ rdma_destroy_id(conn->cm_id);
+ }
+ dapl_os_unlock(&conn->lock);
+ dapli_cm_dealloc(conn);
+}
+
+/* BLOCKING: called from dapl_ep_free, EP link will be last ref */
+void dapls_cm_free(dp_ib_cm_handle_t conn)
+{
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " cm_free: cm %p ep %p refs=%d\n",
+ conn, conn->ep, conn->ref_count);
- if (ep) {
- ep->cm_handle = NULL;
- ep->qp_handle = NULL;
- ep->qp_state = IB_QP_STATE_ERROR;
- }
+ dapls_cm_release(conn); /* release alloc ref */
+ /* Destroy cm_id, wait until EP is last ref */
+ dapl_os_lock(&conn->lock);
if (conn->cm_id) {
if (conn->cm_id->qp)
rdma_destroy_qp(conn->cm_id);
rdma_destroy_id(conn->cm_id);
+ conn->cm_id = NULL;
}
- dapl_os_lock_destroy(&conn->lock);
- dapl_os_free(conn, sizeof(*conn));
+ /* EP linking is last reference */
+ while (conn->ref_count != 1) {
+ dapl_os_unlock(&conn->lock);
+ dapl_os_sleep_usec(10000);
+ dapl_os_lock(&conn->lock);
+ }
+ dapl_os_unlock(&conn->lock);
+
+ /* unlink, dequeue from EP. Final ref so release will destroy */
+ dapl_ep_unlink_cm(conn->ep, conn);
}
static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
@@ -245,11 +272,11 @@ static struct dapl_cm_id *dapli_req_recv(struct dapl_cm_id *conn,
if (new_conn) {
(void)dapl_os_memzero(new_conn, sizeof(*new_conn));
dapl_os_lock_init(&new_conn->lock);
+ dapls_cm_acquire(new_conn);
new_conn->cm_id = event->id; /* provided by uCMA */
event->id->context = new_conn; /* update CM_ID context */
new_conn->sp = conn->sp;
new_conn->hca = conn->hca;
- new_conn->refs++;
/* Get requesters connect data, setup for accept */
new_conn->params.responder_resources =
@@ -532,13 +559,11 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
IN DAT_COUNT p_size, IN void *p_data)
{
struct dapl_ep *ep_ptr = ep_handle;
- struct dapl_cm_id *conn = ep_ptr->cm_handle;
+ struct dapl_cm_id *conn = dapl_get_cm_from_ep(ep_ptr);
int ret;
-
- /* Sanity check */
- if (NULL == ep_ptr)
- return DAT_SUCCESS;
-
+
+ dapl_os_assert(conn != NULL);
+
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" connect: rSID 0x%llx rPort %d, pdata %p, ln %d\n",
r_qual, ntohs(SID_TO_PORT(r_qual)), p_data, p_size);
@@ -547,8 +572,8 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
/* Setup QP/CM parameters and private data in cm_id */
(void)dapl_os_memzero(&conn->params, sizeof(conn->params));
- conn->params.responder_resources =
- ep_ptr->param.ep_attr.max_rdma_read_in;
+ conn->params.responder_resources =
+ ep_ptr->param.ep_attr.max_rdma_read_in;
conn->params.initiator_depth = ep_ptr->param.ep_attr.max_rdma_read_out;
conn->params.flow_control = 1;
conn->params.rnr_retry_count = IB_RNR_RETRY_COUNT;
@@ -573,7 +598,7 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
dapl_log(DAPL_DBG_TYPE_ERR,
" dapl_cma_connect: rdma_resolve_addr ERR 0x%x %s\n",
ret, strerror(errno));
- return dapl_convert_errno(errno, "ib_connect");
+ return dapl_convert_errno(errno, "rdma_resolve_addr");
}
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" connect: resolve_addr: cm_id %p -> %s port %d\n",
@@ -603,13 +628,13 @@ DAT_RETURN dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
DAT_RETURN
dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
{
- dp_ib_cm_handle_t conn = ep_ptr->cm_handle;
+ struct dapl_cm_id *conn = dapl_get_cm_from_ep(ep_ptr);
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" disconnect(ep %p, conn %p, id %d flags %x)\n",
ep_ptr, conn, (conn ? conn->cm_id : 0), close_flags);
- if ((conn == IB_INVALID_HANDLE) || (conn->cm_id == NULL))
+ if ((conn == NULL) || (conn->cm_id == NULL))
return DAT_SUCCESS;
/* no graceful half-pipe disconnect option */
@@ -646,7 +671,6 @@ dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
IN const ib_cm_events_t ib_cm_event)
{
/* nothing to do */
- return;
}
/*
@@ -683,14 +707,13 @@ dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
dapl_os_memzero(conn, sizeof(*conn));
dapl_os_lock_init(&conn->lock);
- conn->refs++;
+ dapls_cm_acquire(conn);
/* create CM_ID, bind to local device, create QP */
if (rdma_create_id
(g_cm_events, &conn->cm_id, (void *)conn, RDMA_PS_TCP)) {
- dapl_os_lock_destroy(&conn->lock);
- dapl_os_free(conn, sizeof(*conn));
- return (dapl_convert_errno(errno, "setup_listener"));
+ dapls_cm_release(conn);
+ return (dapl_convert_errno(errno, "rdma_create_id"));
}
/* open identifies the local device; per DAT specification */
@@ -704,7 +727,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
dat_status = DAT_CONN_QUAL_IN_USE;
else
dat_status =
- dapl_convert_errno(errno, "setup_listener");
+ dapl_convert_errno(errno, "rdma_bind_addr");
goto bail;
}
@@ -728,17 +751,15 @@ dapls_ib_setup_conn_listener(IN DAPL_IA * ia_ptr,
dat_status = DAT_CONN_QUAL_IN_USE;
else
dat_status =
- dapl_convert_errno(errno, "setup_listener");
+ dapl_convert_errno(errno, "rdma_listen");
goto bail;
}
/* success */
return DAT_SUCCESS;
- bail:
- rdma_destroy_id(conn->cm_id);
- dapl_os_lock_destroy(&conn->lock);
- dapl_os_free(conn, sizeof(*conn));
+bail:
+ dapls_cm_release(conn);
return dat_status;
}
@@ -765,12 +786,12 @@ dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
ib_cm_srvc_handle_t conn = sp_ptr->cm_srvc_handle;
dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " remove_listen(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
+ " remove_listen(ia_ptr %p sp_ptr %p conn %p)\n",
ia_ptr, sp_ptr, conn);
if (conn != IB_INVALID_HANDLE) {
sp_ptr->cm_srvc_handle = NULL;
- dapls_ib_cm_free(conn, NULL);
+ dapls_cm_release(conn);
}
return DAT_SUCCESS;
}
@@ -804,6 +825,7 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;
DAPL_IA *ia_ptr = ep_ptr->header.owner_ia;
struct dapl_cm_id *cr_conn = cr_ptr->ib_cm_handle;
+ struct dapl_cm_id *ep_conn = dapl_get_cm_from_ep(ep_ptr);
int ret;
DAT_RETURN dat_status;
@@ -838,18 +860,26 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
* a local device (cm_id and QP) when created. Move the QP
* to the new cm_id only if device and port numbers match.
*/
- if (ep_ptr->cm_handle->cm_id->verbs == cr_conn->cm_id->verbs &&
- ep_ptr->cm_handle->cm_id->port_num == cr_conn->cm_id->port_num) {
+ if (ep_conn->cm_id->verbs == cr_conn->cm_id->verbs &&
+ ep_conn->cm_id->port_num == cr_conn->cm_id->port_num) {
/* move QP to new cr_conn, remove QP ref in EP cm_id */
- cr_conn->cm_id->qp = ep_ptr->cm_handle->cm_id->qp;
- ep_ptr->cm_handle->cm_id->qp = NULL;
- dapls_ib_cm_free(ep_ptr->cm_handle, NULL);
+ cr_conn->cm_id->qp = ep_conn->cm_id->qp;
+
+ /* remove old CM to EP linking, destroy CM object */
+ dapl_ep_unlink_cm(ep_ptr, ep_conn);
+ ep_conn->cm_id->qp = NULL;
+ ep_conn->ep = NULL;
+ dapls_cm_release(ep_conn);
+
+ /* add new CM to EP linking, qp_handle unchanged */
+ dapl_ep_link_cm(ep_ptr, cr_conn);
+ cr_conn->ep = ep_ptr;
} else {
dapl_log(DAPL_DBG_TYPE_ERR,
" dapl_cma_accept: ERR dev(%p!=%p) or"
" port mismatch(%d!=%d)\n",
- ep_ptr->cm_handle->cm_id->verbs, cr_conn->cm_id->verbs,
- ntohs(ep_ptr->cm_handle->cm_id->port_num),
+ ep_conn->cm_id->verbs, cr_conn->cm_id->verbs,
+ ntohs(ep_conn->cm_id->port_num),
ntohs(cr_conn->cm_id->port_num));
dat_status = DAT_INTERNAL_ERROR;
goto bail;
@@ -861,16 +891,15 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
ret = rdma_accept(cr_conn->cm_id, &cr_conn->params);
if (ret) {
- dapl_log(DAPL_DBG_TYPE_ERR, " dapl_cma_accept: ERR %d %s\n",
+ dapl_log(DAPL_DBG_TYPE_ERR, " dapl_rdma_accept: ERR %d %s\n",
ret, strerror(errno));
- dat_status = dapl_convert_errno(ret, "accept");
+ dat_status = dapl_convert_errno(errno, "accept");
+
+ /* remove new cr_conn EP to CM linking */
+ dapl_ep_unlink_cm(ep_ptr, cr_conn);
goto bail;
}
- /* save accepted conn and EP reference, qp_handle unchanged */
- ep_ptr->cm_handle = cr_conn;
- cr_conn->ep = ep_ptr;
-
/* setup local and remote ports for ep query */
/* Note: port qual in network order */
ep_ptr->param.remote_port_qual =
@@ -879,9 +908,11 @@ dapls_ib_accept_connection(IN DAT_CR_HANDLE cr_handle,
PORT_TO_SID(rdma_get_src_port(cr_conn->cm_id));
return DAT_SUCCESS;
- bail:
+bail:
rdma_reject(cr_conn->cm_id, NULL, 0);
- dapls_ib_cm_free(cr_conn, NULL);
+
+ /* no EP linking, ok to destroy */
+ dapls_cm_release(cr_conn);
return dat_status;
}
@@ -942,7 +973,8 @@ dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_handle,
ret = rdma_reject(cm_handle->cm_id,
cm_handle->p_data, offset + private_data_size);
- dapls_ib_cm_free(cm_handle, NULL);
+ /* no EP linking, ok to destroy */
+ dapls_cm_release(cm_handle);
return dapl_convert_errno(ret, "reject");
}
@@ -966,7 +998,7 @@ DAT_RETURN
dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)
{
DAPL_HEADER *header;
- dp_ib_cm_handle_t ib_cm_handle;
+ dp_ib_cm_handle_t conn;
struct rdma_addr *ipaddr;
dapl_dbg_log(DAPL_DBG_TYPE_EP,
@@ -975,19 +1007,19 @@ dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle, OUT DAT_SOCK_ADDR6 * raddr)
header = (DAPL_HEADER *) dat_handle;
- if (header->magic == DAPL_MAGIC_EP)
- ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+ if (header->magic == DAPL_MAGIC_EP)
+ conn = dapl_get_cm_from_ep((DAPL_EP *) dat_handle);
else if (header->magic == DAPL_MAGIC_CR)
- ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+ conn = ((DAPL_CR *) dat_handle)->ib_cm_handle;
else
return DAT_INVALID_HANDLE;
/* get remote IP address from cm_id route */
- ipaddr = &ib_cm_handle->cm_id->route.addr;
+ ipaddr = &conn->cm_id->route.addr;
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" remote_addr: conn %p id %p SRC %x DST %x PORT %d\n",
- ib_cm_handle, ib_cm_handle->cm_id,
+ conn, conn->cm_id,
ntohl(((struct sockaddr_in *)
&ipaddr->src_addr)->sin_addr.s_addr),
ntohl(((struct sockaddr_in *)
@@ -1141,20 +1173,12 @@ void dapli_cma_event_cb(void)
else
conn = (struct dapl_cm_id *)event->id->context;
+ dapls_cm_acquire(conn);
+
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" cm_event: EVENT=%d ID=%p LID=%p CTX=%p\n",
event->event, event->id, event->listen_id, conn);
- /* cm_free is blocked waiting for ack */
- dapl_os_lock(&conn->lock);
- if (!conn->refs) {
- dapl_os_unlock(&conn->lock);
- rdma_ack_cm_event(event);
- return;
- }
- conn->refs++;
- dapl_os_unlock(&conn->lock);
-
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
dapli_addr_resolve(conn);
@@ -1268,10 +1292,7 @@ void dapli_cma_event_cb(void)
/* ack event, unblocks destroy_cm_id in consumer threads */
rdma_ack_cm_event(event);
-
- dapl_os_lock(&conn->lock);
- conn->refs--;
- dapl_os_unlock(&conn->lock);
+ dapls_cm_release(conn);
}
}
diff --git a/dapl/openib_cma/dapl_ib_util.h b/dapl/openib_cma/dapl_ib_util.h
index fd4e582..96061b3 100755
--- a/dapl/openib_cma/dapl_ib_util.h
+++ b/dapl/openib_cma/dapl_ib_util.h
@@ -55,14 +55,17 @@
#define IB_MAX_DREP_PDATA_SIZE DAPL_MIN((224-CMA_PDATA_HDR),RDMA_MAX_PRIVATE_DATA)
#define IWARP_MAX_PDATA_SIZE DAPL_MIN((512-CMA_PDATA_HDR),RDMA_MAX_PRIVATE_DATA)
+/* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */
struct dapl_cm_id {
+ struct dapl_llist_entry list_entry;
+ struct dapl_llist_entry local_entry;
+ DAPL_OS_WAIT_OBJECT event;
DAPL_OS_LOCK lock;
- int refs;
+ int ref_count;
int arp_retries;
int arp_timeout;
int route_retries;
int route_timeout;
- int in_callback;
struct rdma_cm_id *cm_id;
struct dapl_hca *hca;
struct dapl_sp *sp;
@@ -121,7 +124,9 @@ void dapli_cma_event_cb(void);
void dapli_async_event_cb(struct _ib_hca_transport *tp);
void dapli_cq_event_cb(struct _ib_hca_transport *tp);
dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep);
-void dapls_ib_cm_free(dp_ib_cm_handle_t cm, DAPL_EP *ep);
+void dapls_cm_acquire(dp_ib_cm_handle_t cm);
+void dapls_cm_release(dp_ib_cm_handle_t cm);
+void dapls_cm_free(dp_ib_cm_handle_t cm_ptr);
#ifdef DAPL_COUNTERS
STATIC _INLINE_ void dapls_print_cm_list(IN DAPL_IA * ia_ptr)
diff --git a/dapl/openib_common/dapl_ib_common.h b/dapl/openib_common/dapl_ib_common.h
index 6c66b25..3e32fab 100644
--- a/dapl/openib_common/dapl_ib_common.h
+++ b/dapl/openib_common/dapl_ib_common.h
@@ -279,12 +279,13 @@ typedef enum dapl_cm_state
DCM_REJECTING,
DCM_REJECTED,
DCM_CONNECTED,
- DCM_RELEASED,
+ DCM_RELEASE,
DCM_DISC_PENDING,
DCM_DISCONNECTED,
DCM_DESTROY,
DCM_RTU_PENDING,
- DCM_DISC_RECV
+ DCM_DISC_RECV,
+ DCM_FREE,
} DAPL_CM_STATE;
@@ -372,14 +373,15 @@ STATIC _INLINE_ char * dapl_cm_state_str(IN int st)
"CM_REJECTING",
"CM_REJECTED",
"CM_CONNECTED",
- "CM_RELEASED",
+ "CM_RELEASE",
"CM_DISC_PENDING",
"CM_DISCONNECTED",
"CM_DESTROY",
"CM_RTU_PENDING",
- "CM_DISC_RECV"
+ "CM_DISC_RECV",
+ "CM_FREE"
};
- return ((st < 0 || st > 15) ? "Invalid CM state?" : state[st]);
+ return ((st < 0 || st > 16) ? "Invalid CM state?" : state[st]);
}
STATIC _INLINE_ char * dapl_cm_op_str(IN int op)
diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c
index 17eae36..15c1dae 100644
--- a/dapl/openib_common/qp.c
+++ b/dapl/openib_common/qp.c
@@ -24,6 +24,7 @@
*/
#include "dapl.h"
#include "dapl_adapter_util.h"
+#include "dapl_ep_util.h"
/*
* dapl_ib_qp_alloc
@@ -115,12 +116,14 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
#ifdef _OPENIB_CMA_
/* Allocate CM and initialize lock */
if ((conn = dapls_ib_cm_create(ep_ptr)) == NULL)
- return (dapl_convert_errno(ENOMEM, "create_cq"));
+ return (dapl_convert_errno(ENOMEM, "cm_create"));
/* open identifies the local device; per DAT specification */
if (rdma_bind_addr(conn->cm_id,
- (struct sockaddr *)&ia_ptr->hca_ptr->hca_address))
- return (dapl_convert_errno(EAFNOSUPPORT, "create_cq"));
+ (struct sockaddr *)&ia_ptr->hca_ptr->hca_address)) {
+ dapls_cm_free(conn);
+ return (dapl_convert_errno(EAFNOSUPPORT, "rdma_bind_addr"));
+ }
#endif
/* Setup attributes and create qp */
dapl_os_memzero((void *)&qp_create, sizeof(qp_create));
@@ -158,11 +161,10 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
#ifdef _OPENIB_CMA_
if (rdma_create_qp(conn->cm_id, ib_pd_handle, &qp_create)) {
- dapls_ib_cm_free(conn, ep_ptr);
- return (dapl_convert_errno(errno, "create_qp"));
+ dapls_cm_free(conn);
+ return (dapl_convert_errno(errno, "rdma_create_qp"));
}
ep_ptr->qp_handle = conn->cm_id->qp;
- ep_ptr->cm_handle = conn;
ep_ptr->qp_state = IBV_QPS_INIT;
ep_ptr->param.local_port_qual = rdma_get_src_port(conn->cm_id);
@@ -207,33 +209,30 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
*/
DAT_RETURN dapls_ib_qp_free(IN DAPL_IA * ia_ptr, IN DAPL_EP * ep_ptr)
{
- dapl_dbg_log(DAPL_DBG_TYPE_EP, " qp_free: ep_ptr %p qp %p\n",
- ep_ptr, ep_ptr->qp_handle);
+#ifdef _OPENIB_CMA_
+ dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr);
- if (ep_ptr->cm_handle != NULL) {
- dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);
+ dapl_os_lock(&ep_ptr->header.lock);
+ if (cm_ptr && cm_ptr->cm_id->qp) {
+ rdma_destroy_qp(cm_ptr->cm_id);
+ cm_ptr->cm_id->qp = NULL;
+ ep_ptr->qp_handle = NULL;
}
-
+#else
+ dapl_os_lock(&ep_ptr->header.lock);
if (ep_ptr->qp_handle != NULL) {
/* force error state to flush queue, then destroy */
dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0);
- if (ibv_destroy_qp(ep_ptr->qp_handle))
- return (dapl_convert_errno(errno, "destroy_qp"));
-
+ if (ibv_destroy_qp(ep_ptr->qp_handle)) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " qp_free: ibv_destroy_qp error - %s\n",
+ strerror(errno));
+ }
ep_ptr->qp_handle = NULL;
}
-
-#ifdef DAT_EXTENSIONS
- /* UD endpoints can have many CR associations and will not
- * set ep->cm_handle. Call provider with cm_ptr null to incidate
- * UD type multi CR's for this EP. It will parse internal list
- * and cleanup all associations.
- */
- if (ep_ptr->param.ep_attr.service_type == DAT_IB_SERVICE_TYPE_UD)
- dapls_ib_cm_free(NULL, ep_ptr);
#endif
-
+ dapl_os_unlock(&ep_ptr->header.lock);
return DAT_SUCCESS;
}
@@ -329,10 +328,22 @@ dapls_ib_qp_modify(IN DAPL_IA * ia_ptr,
#if defined(_WIN32) || defined(_WIN64) || defined(_OPENIB_CMA_)
void dapls_ib_reinit_ep(IN DAPL_EP * ep_ptr)
{
+ dp_ib_cm_handle_t cm_ptr, next_cm_ptr;
+
/* work around bug in low level driver - 3/24/09 */
/* RTS -> RESET -> INIT -> ERROR QP transition crashes system */
if (ep_ptr->qp_handle != IB_INVALID_HANDLE) {
dapls_ib_qp_free(ep_ptr->header.owner_ia, ep_ptr);
+
+ /* free any CM object's created */
+ cm_ptr = (dapl_llist_is_empty(&ep_ptr->cm_list_head)
+ ? NULL : dapl_llist_peek_head(&ep_ptr->cm_list_head));
+ while (cm_ptr != NULL) {
+ next_cm_ptr = dapl_llist_next_entry(&ep_ptr->cm_list_head,
+ &cm_ptr->list_entry);
+ dapls_cm_free(cm_ptr);
+ cm_ptr = next_cm_ptr;
+ }
dapls_ib_qp_alloc(ep_ptr->header.owner_ia, ep_ptr, ep_ptr);
}
}
diff --git a/dapl/openib_scm/cm.c b/dapl/openib_scm/cm.c
index e374fb4..39f8417 100644
--- a/dapl/openib_scm/cm.c
+++ b/dapl/openib_scm/cm.c
@@ -57,6 +57,7 @@
#include "dapl_cr_util.h"
#include "dapl_name_service.h"
#include "dapl_ib_util.h"
+#include "dapl_ep_util.h"
#include "dapl_osd.h"
#if defined(_WIN32) || defined(_WIN64)
@@ -154,7 +155,7 @@ static int dapl_select(struct dapl_fd_set *set)
dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup\n");
if (ret == SOCKET_ERROR)
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
" dapl_select: error 0x%x\n", WSAGetLastError());
return ret;
@@ -248,7 +249,7 @@ static enum DAPL_FD_EVENTS dapl_poll(DAPL_SOCKET s, enum DAPL_FD_EVENTS event)
fds.events = event;
fds.revents = 0;
ret = poll(&fds, 1, 0);
- dapl_log(DAPL_DBG_TYPE_CM, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n",
+ dapl_log(DAPL_DBG_TYPE_THREAD, " dapl_poll: fd=%d ret=%d, evnts=0x%x\n",
s, ret, fds.revents);
if (ret == 0)
return 0;
@@ -262,16 +263,64 @@ static int dapl_select(struct dapl_fd_set *set)
{
int ret;
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: sleep, fds=%d\n", set->index);
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " dapl_select: sleep, fds=%d\n", set->index);
ret = poll(set->set, set->index, -1);
- dapl_dbg_log(DAPL_DBG_TYPE_CM, " dapl_select: wakeup, ret=0x%x\n", ret);
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " dapl_select: wakeup, ret=0x%x\n", ret);
return ret;
}
#define dapl_socket_errno() errno
#endif
-dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
+static void dapli_cm_thread_signal(dp_ib_cm_handle_t cm_ptr)
+{
+ send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0);
+}
+
+static void dapli_cm_free(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_lock(&cm_ptr->lock);
+ cm_ptr->state = DCM_FREE;
+ dapl_os_unlock(&cm_ptr->lock);
+ dapli_cm_thread_signal(cm_ptr);
+}
+
+static void dapli_cm_dealloc(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_assert(!cm_ptr->ref_count);
+
+ if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
+ shutdown(cm_ptr->socket, SHUT_RDWR);
+ closesocket(cm_ptr->socket);
+ }
+ if (cm_ptr->ah)
+ ibv_destroy_ah(cm_ptr->ah);
+
+ dapl_os_lock_destroy(&cm_ptr->lock);
+ dapl_os_wait_object_destroy(&cm_ptr->event);
+ dapl_os_free(cm_ptr, sizeof(*cm_ptr));
+}
+
+void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_lock(&cm_ptr->lock);
+ cm_ptr->ref_count++;
+ dapl_os_unlock(&cm_ptr->lock);
+}
+
+void dapls_cm_release(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_os_lock(&cm_ptr->lock);
+ cm_ptr->ref_count--;
+ if (cm_ptr->ref_count) {
+ dapl_os_unlock(&cm_ptr->lock);
+ return;
+ }
+ dapl_os_unlock(&cm_ptr->lock);
+ dapli_cm_dealloc(cm_ptr);
+}
+
+static dp_ib_cm_handle_t dapli_cm_alloc(DAPL_EP *ep_ptr)
{
dp_ib_cm_handle_t cm_ptr;
@@ -283,162 +332,108 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
if (dapl_os_lock_init(&cm_ptr->lock))
goto bail;
+ if (dapl_os_wait_object_init(&cm_ptr->event)) {
+ dapl_os_lock_destroy(&cm_ptr->lock);
+ goto bail;
+ }
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm_ptr->list_entry);
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm_ptr->local_entry);
+
cm_ptr->msg.ver = htons(DCM_VER);
cm_ptr->socket = DAPL_INVALID_SOCKET;
- cm_ptr->ep = ep;
+ dapls_cm_acquire(cm_ptr);
+
+ /* Link EP and CM */
+ if (ep_ptr != NULL) {
+ dapl_ep_link_cm(ep_ptr, cm_ptr); /* ref++ */
+ cm_ptr->ep = ep_ptr;
+ cm_ptr->hca = ((DAPL_IA *)ep_ptr->param.ia_handle)->hca_ptr;
+ }
return cm_ptr;
bail:
dapl_os_free(cm_ptr, sizeof(*cm_ptr));
return NULL;
}
-/* mark for destroy, remove all references, schedule cleanup */
-/* cm_ptr == NULL (UD), then multi CR's, kill all associated with EP */
-void dapls_ib_cm_free(dp_ib_cm_handle_t cm_ptr, DAPL_EP *ep)
+/* queue socket for processing CM work */
+static void dapli_cm_queue(dp_ib_cm_handle_t cm_ptr)
{
- DAPL_IA *ia_ptr;
- DAPL_HCA *hca_ptr = NULL;
- dp_ib_cm_handle_t cr, next_cr;
-
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " cm_destroy: cm %p ep %p\n", cm_ptr, ep);
-
- if (cm_ptr == NULL)
- goto multi_cleanup;
+ /* add to work queue for cr thread processing */
+ dapl_os_lock(&cm_ptr->hca->ib_trans.lock);
+ dapls_cm_acquire(cm_ptr);
+ dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list,
+ (DAPL_LLIST_ENTRY *)&cm_ptr->local_entry, cm_ptr);
+ dapl_os_unlock(&cm_ptr->hca->ib_trans.lock);
+ dapli_cm_thread_signal(cm_ptr);
+}
- /* to notify cleanup thread */
- hca_ptr = cm_ptr->hca;
+/* called with local LIST lock */
+static void dapli_cm_dequeue(dp_ib_cm_handle_t cm_ptr)
+{
+ /* Remove from work queue, cr thread processing */
+ dapl_llist_remove_entry(&cm_ptr->hca->ib_trans.list,
+ (DAPL_LLIST_ENTRY *)&cm_ptr->local_entry);
+ dapls_cm_release(cm_ptr);
+}
- /* cleanup, never made it to work queue */
+/* BLOCKING: called from dapl_ep_free, EP link will be last ref */
+void dapls_cm_free(dp_ib_cm_handle_t cm_ptr)
+{
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " cm_free: cm %p %s ep %p refs=%d\n",
+ cm_ptr, dapl_cm_state_str(cm_ptr->state),
+ cm_ptr->ep, cm_ptr->ref_count);
+
+ /* free from internal workq, wait until EP is last ref */
dapl_os_lock(&cm_ptr->lock);
- if (cm_ptr->state == DCM_INIT) {
- if (cm_ptr->socket != DAPL_INVALID_SOCKET) {
- shutdown(cm_ptr->socket, SHUT_RDWR);
- closesocket(cm_ptr->socket);
- }
+ cm_ptr->state = DCM_FREE;
+ while (cm_ptr->ref_count != 1) {
dapl_os_unlock(&cm_ptr->lock);
- dapl_os_lock_destroy(&cm_ptr->lock);
- dapl_os_free(cm_ptr, sizeof(*cm_ptr));
- return;
- }
-
- /* free could be called before disconnect, disc_clean will destroy */
- if (cm_ptr->state == DCM_CONNECTED) {
- dapl_os_unlock(&cm_ptr->lock);
- dapli_socket_disconnect(cm_ptr);
- return;
- }
-
- cm_ptr->state = DCM_DESTROY;
- if ((cm_ptr->ep) && (cm_ptr->ep->cm_handle == cm_ptr)) {
- cm_ptr->ep->cm_handle = IB_INVALID_HANDLE;
- cm_ptr->ep = NULL;
+ dapl_os_sleep_usec(10000);
+ dapl_os_lock(&cm_ptr->lock);
}
-
dapl_os_unlock(&cm_ptr->lock);
- goto notify_thread;
-
-multi_cleanup:
-
- /*
- * UD CR objects are kept active because of direct private data references
- * from CONN events. The cr->socket is closed and marked inactive but the
- * object remains allocated and queued on the CR resource list. There can
- * be multiple CR's associated with a given EP. There is no way to determine
- * when consumer is finished with event until the dat_ep_free.
- *
- * Schedule destruction for all CR's associated with this EP, cr_thread will
- * complete the cleanup with state == DCM_DESTROY.
- */
- ia_ptr = ep->header.owner_ia;
- dapl_os_lock(&ia_ptr->hca_ptr->ib_trans.lock);
- if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list))
- next_cr = dapl_llist_peek_head((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list);
- else
- next_cr = NULL;
-
- while (next_cr) {
- cr = next_cr;
- next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
- &ia_ptr->hca_ptr->ib_trans.list,
- (DAPL_LLIST_ENTRY*)&cr->entry);
- if (cr->ep == ep) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_free CR: ep %p cr %p\n", ep, cr);
- dapli_socket_disconnect(cr);
- dapl_os_lock(&cr->lock);
- hca_ptr = cr->hca;
- cr->ep = NULL;
- if (cr->ah) {
- ibv_destroy_ah(cr->ah);
- cr->ah = NULL;
- }
- cr->state = DCM_DESTROY;
- dapl_os_unlock(&cr->lock);
- }
- }
- dapl_os_unlock(&ia_ptr->hca_ptr->ib_trans.lock);
-notify_thread:
-
- /* wakeup work thread, if something destroyed */
- if (hca_ptr != NULL)
- send(hca_ptr->ib_trans.scm[1], "w", sizeof "w", 0);
-}
-
-/* queue socket for processing CM work */
-static void dapli_cm_queue(struct ib_cm_handle *cm_ptr)
-{
- /* add to work queue for cr thread processing */
- dapl_llist_init_entry((DAPL_LLIST_ENTRY *) & cm_ptr->entry);
- dapl_os_lock(&cm_ptr->hca->ib_trans.lock);
- dapl_llist_add_tail(&cm_ptr->hca->ib_trans.list,
- (DAPL_LLIST_ENTRY *) & cm_ptr->entry, cm_ptr);
- dapl_os_unlock(&cm_ptr->hca->ib_trans.lock);
-
- /* wakeup CM work thread */
- send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0);
+ /* unlink, dequeue from EP. Final ref so release will destroy */
+ dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
}
/*
* ACTIVE/PASSIVE: called from CR thread or consumer via ep_disconnect
- * or from ep_free
+ * or from ep_free.
*/
DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr)
{
- DAPL_EP *ep_ptr = cm_ptr->ep;
DAT_UINT32 disc_data = htonl(0xdead);
- if (ep_ptr == NULL)
- return DAT_SUCCESS;
-
dapl_os_lock(&cm_ptr->lock);
- if (cm_ptr->state != DCM_CONNECTED) {
+ if (cm_ptr->state != DCM_CONNECTED ||
+ cm_ptr->state == DCM_DISCONNECTED) {
dapl_os_unlock(&cm_ptr->lock);
return DAT_SUCCESS;
}
-
- /* send disc date, close socket, schedule destroy */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0,0,0);
cm_ptr->state = DCM_DISCONNECTED;
- send(cm_ptr->socket, (char *)&disc_data, sizeof(disc_data), 0);
dapl_os_unlock(&cm_ptr->lock);
+
+ /* send disc date, close socket, schedule destroy */
+ dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0,0,0);
+ send(cm_ptr->socket, (char *)&disc_data, sizeof(disc_data), 0);
/* disconnect events for RC's only */
- if (ep_ptr->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
- if (ep_ptr->cr_ptr) {
+ if (cm_ptr->ep->param.ep_attr.service_type == DAT_SERVICE_TYPE_RC) {
+ if (cm_ptr->ep->cr_ptr) {
dapls_cr_callback(cm_ptr,
IB_CME_DISCONNECTED,
- NULL, 0,
- ((DAPL_CR *) ep_ptr->cr_ptr)->sp_ptr);
+ NULL, 0, cm_ptr->sp);
} else {
- dapl_evd_connection_callback(ep_ptr->cm_handle,
+ dapl_evd_connection_callback(cm_ptr,
IB_CME_DISCONNECTED,
- NULL, 0, ep_ptr);
+ NULL, 0, cm_ptr->ep);
}
}
+
+ /* release from workq */
+ dapli_cm_free(cm_ptr);
/* scheduled destroy via disconnect clean in callback */
return DAT_SUCCESS;
@@ -505,8 +500,8 @@ static void dapli_socket_connected(dp_ib_cm_handle_t cm_ptr, int err)
return;
bail:
- /* close socket, free cm structure and post error event */
- dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ /* mark CM object for cleanup */
+ cm_ptr->state = DCM_FREE;
dapl_evd_connection_callback(NULL, IB_CME_LOCAL_FAILURE, NULL, 0, ep_ptr);
}
@@ -514,7 +509,7 @@ bail:
* ACTIVE: Create socket, connect, defer exchange QP information to CR thread
* to avoid blocking.
*/
-DAT_RETURN
+static DAT_RETURN
dapli_socket_connect(DAPL_EP * ep_ptr,
DAT_IA_ADDRESS_PTR r_addr,
DAT_CONN_QUAL r_qual, DAT_COUNT p_size, DAT_PVOID p_data)
@@ -528,7 +523,7 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect: r_qual %d p_size=%d\n",
r_qual, p_size);
- cm_ptr = dapls_ib_cm_create(ep_ptr);
+ cm_ptr = dapli_cm_alloc(ep_ptr);
if (cm_ptr == NULL)
return dat_ret;
@@ -566,6 +561,7 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
}
/* REQ: QP info in msg.saddr, IA address in msg.daddr, and pdata */
+ cm_ptr->hca = ia_ptr->hca_ptr;
cm_ptr->msg.op = ntohs(DCM_REQ);
cm_ptr->msg.saddr.ib.qpn = htonl(ep_ptr->qp_handle->qp_num);
cm_ptr->msg.saddr.ib.qp_type = ep_ptr->qp_handle->qp_type;
@@ -573,10 +569,6 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
dapl_os_memcpy(&cm_ptr->msg.saddr.ib.gid[0],
&ia_ptr->hca_ptr->ib_trans.gid, 16);
- /* save references */
- cm_ptr->hca = ia_ptr->hca_ptr;
- cm_ptr->ep = ep_ptr;
-
/* get local address information from socket */
sl = sizeof(cm_ptr->msg.daddr.so);
if (getsockname(cm_ptr->socket, (struct sockaddr *)&cm_ptr->msg.daddr.so, &sl)) {
@@ -607,9 +599,9 @@ dapli_socket_connect(DAPL_EP * ep_ptr,
(unsigned int)r_qual, ntohs(cm_ptr->msg.p_size),
cm_ptr->msg.p_data[0], cm_ptr->msg.p_data[1]);
+ /* queue up on work thread */
dapli_cm_queue(cm_ptr);
return DAT_SUCCESS;
-
bail:
dapl_log(DAPL_DBG_TYPE_ERR,
" connect ERROR: %s -> %s r_qual %d\n",
@@ -617,8 +609,8 @@ bail:
inet_ntoa(((struct sockaddr_in *)r_addr)->sin_addr),
(unsigned int)r_qual);
- /* close socket, free cm structure */
- dapls_ib_cm_free(cm_ptr, NULL);
+ /* Never queued, destroy */
+ dapls_cm_release(cm_ptr);
return dat_ret;
}
@@ -653,7 +645,8 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
dapli_socket_connect(cm_ptr->ep, (DAT_IA_ADDRESS_PTR)&cm_ptr->addr,
ntohs(((struct sockaddr_in *)&cm_ptr->addr)->sin_port) - 1000,
ntohs(cm_ptr->msg.p_size), &cm_ptr->msg.p_data);
- dapls_ib_cm_free(cm_ptr, NULL);
+ dapl_ep_unlink_cm(cm_ptr->ep, cm_ptr);
+ dapli_cm_free(cm_ptr);
return;
}
goto bail;
@@ -728,7 +721,6 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
}
/* modify QP to RTR and then to RTS with remote info */
- dapl_os_lock(&ep_ptr->header.lock);
if (dapls_modify_qp_state(ep_ptr->qp_handle,
IBV_QPS_RTR,
cm_ptr->msg.saddr.ib.qpn,
@@ -744,7 +736,6 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
&cm_ptr->msg.daddr.so)->sin_addr),
ntohs(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_port));
- dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
if (dapls_modify_qp_state(ep_ptr->qp_handle,
@@ -762,10 +753,8 @@ static void dapli_socket_connect_rtu(dp_ib_cm_handle_t cm_ptr)
&cm_ptr->msg.daddr.so)->sin_addr),
ntohs(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_port));
- dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
- dapl_os_unlock(&ep_ptr->header.lock);
dapl_dbg_log(DAPL_DBG_TYPE_EP, " connect_rtu: send RTU\n");
/* complete handshake after final QP state change, Just ver+op */
@@ -817,7 +806,7 @@ ud_bail:
event = DAT_IB_UD_CONNECTION_ERROR_EVENT;
} else
event = DAT_IB_UD_CONNECTION_REJECT_EVENT;
-
+
dapls_evd_post_connection_event_ext(
(DAPL_EVD *) ep_ptr->param.connect_evd_handle,
event,
@@ -826,14 +815,14 @@ ud_bail:
(DAT_PVOID *) cm_ptr->msg.p_data,
(DAT_PVOID *) &xevent);
- /* done with socket, don't destroy cm_ptr, need pdata */
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- cm_ptr->state = DCM_RELEASED;
+ /* cleanup and release from local list */
+ dapl_os_lock(&cm_ptr->lock);
+ cm_ptr->state = DCM_FREE;
+ dapl_os_unlock(&cm_ptr->lock);
+
} else
#endif
{
- ep_ptr->cm_handle = cm_ptr; /* only RC, multi CR's on UD */
dapl_evd_connection_callback(cm_ptr, event, cm_ptr->msg.p_data,
DCM_MAX_PDATA_SIZE, ep_ptr);
}
@@ -846,9 +835,9 @@ bail:
goto ud_bail;
#endif
/* close socket, and post error event */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0, 0, 0);
+ cm_ptr->state = DCM_REJECTED;
closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
+ cm_ptr->socket = DAPL_INVALID_SOCKET;
dapl_evd_connection_callback(NULL, event, cm_ptr->msg.p_data,
DCM_MAX_PDATA_SIZE, ep_ptr);
}
@@ -862,12 +851,13 @@ dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
struct sockaddr_in addr;
ib_cm_srvc_handle_t cm_ptr = NULL;
DAT_RETURN dat_status = DAT_SUCCESS;
+ int opt = 1;
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
+ dapl_dbg_log(DAPL_DBG_TYPE_CM,
+ " setup listen(ia_ptr %p ServiceID %d sp_ptr %p)\n",
ia_ptr, serviceID, sp_ptr);
- cm_ptr = dapls_ib_cm_create(NULL);
+ cm_ptr = dapli_cm_alloc(NULL);
if (cm_ptr == NULL)
return DAT_INSUFFICIENT_RESOURCES;
@@ -883,15 +873,16 @@ dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
goto bail;
}
+ setsockopt(cm_ptr->socket, SOL_SOCKET, SO_REUSEADDR, (char*)&opt, sizeof(opt));
addr.sin_port = htons(serviceID + 1000);
addr.sin_family = AF_INET;
addr.sin_addr = ((struct sockaddr_in *) &ia_ptr->hca_ptr->hca_address)->sin_addr;
if ((bind(cm_ptr->socket, (struct sockaddr *)&addr, sizeof(addr)) < 0)
|| (listen(cm_ptr->socket, 128) < 0)) {
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " listen: ERROR %s on conn_qual 0x%x\n",
- strerror(errno), serviceID + 1000);
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " listen: ERROR %s on port %d\n",
+ strerror(errno), serviceID + 1000);
if (dapl_socket_errno() == EADDRINUSE)
dat_status = DAT_CONN_QUAL_IN_USE;
else
@@ -908,14 +899,13 @@ dapli_socket_listen(DAPL_IA * ia_ptr, DAT_CONN_QUAL serviceID, DAPL_SP * sp_ptr)
dapli_cm_queue(cm_ptr);
dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " listen: qual 0x%x cr %p s_fd %d\n",
- ntohs(serviceID + 1000), cm_ptr, cm_ptr->socket);
+ " setup listen: port %d cr %p s_fd %d\n",
+ serviceID + 1000, cm_ptr, cm_ptr->socket);
return dat_status;
bail:
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " listen: ERROR on conn_qual 0x%x\n", serviceID + 1000);
- dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ /* Never queued, destroy here */
+ dapls_cm_release(cm_ptr);
return dat_status;
}
@@ -934,7 +924,7 @@ static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
*/
do {
/* Allocate accept CM and initialize */
- if ((acm_ptr = dapls_ib_cm_create(NULL)) == NULL)
+ if ((acm_ptr = dapli_cm_alloc(NULL)) == NULL)
return;
acm_ptr->sp = cm_ptr->sp;
@@ -949,7 +939,7 @@ static void dapli_socket_accept(ib_cm_srvc_handle_t cm_ptr)
dapl_log(DAPL_DBG_TYPE_ERR,
" ACCEPT: ERR %s on FD %d l_cr %p\n",
strerror(errno), cm_ptr->socket, cm_ptr);
- dapls_ib_cm_free(acm_ptr, acm_ptr->ep);
+ dapls_cm_release(acm_ptr);
return;
}
dapl_dbg_log(DAPL_DBG_TYPE_CM, " accepting from %s %x\n",
@@ -1050,8 +1040,8 @@ static void dapli_socket_accept_data(ib_cm_srvc_handle_t acm_ptr)
p_data, exp, acm_ptr->sp);
return;
bail:
- /* close socket, free cm structure, active will see close as rej */
- dapls_ib_cm_free(acm_ptr, acm_ptr->ep);
+ /* mark for destroy, active will see socket close as rej */
+ acm_ptr->state = DCM_FREE;
return;
}
@@ -1105,7 +1095,6 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
#endif
/* modify QP to RTR and then to RTS with remote info already read */
- dapl_os_lock(&ep_ptr->header.lock);
if (dapls_modify_qp_state(ep_ptr->qp_handle,
IBV_QPS_RTR,
cm_ptr->msg.saddr.ib.qpn,
@@ -1116,7 +1105,6 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
strerror(errno),
inet_ntoa(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_addr));
- dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
if (dapls_modify_qp_state(ep_ptr->qp_handle,
@@ -1129,10 +1117,8 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
strerror(errno),
inet_ntoa(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_addr));
- dapl_os_unlock(&ep_ptr->header.lock);
goto bail;
}
- dapl_os_unlock(&ep_ptr->header.lock);
/* save remote address information */
dapl_os_memcpy(&ep_ptr->remote_ia_address,
@@ -1157,8 +1143,6 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
*(uint16_t*)&cm_ptr->msg.resv[2] = htons((uint16_t)dapl_os_getpid());
dapl_os_memcpy(local.resv, cm_ptr->msg.resv, 4);
#endif
-
- cm_ptr->ep = ep_ptr;
cm_ptr->hca = ia_ptr->hca_ptr;
cm_ptr->state = DCM_ACCEPTED;
@@ -1179,7 +1163,6 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
strerror(errno), len,
inet_ntoa(((struct sockaddr_in *)
&cm_ptr->msg.daddr.so)->sin_addr));
- cm_ptr->ep = NULL;
goto bail;
}
@@ -1195,9 +1178,14 @@ dapli_socket_accept_usr(DAPL_EP * ep_ptr,
htonll(*(uint64_t*)&local.saddr.ib.gid[8]));
dapl_dbg_log(DAPL_DBG_TYPE_EP, " PASSIVE: accepted!\n");
+
+ /* Link CM to EP, already queued on work thread */
+ dapl_ep_link_cm(ep_ptr, cm_ptr);
+ cm_ptr->ep = ep_ptr;
return DAT_SUCCESS;
bail:
- dapls_ib_cm_free(cm_ptr, NULL);
+ /* schedule cleanup from workq */
+ dapli_cm_free(cm_ptr);
return ret;
}
@@ -1269,14 +1257,12 @@ ud_bail:
(DAT_PVOID *) cm_ptr->msg.p_data,
(DAT_PVOID *) &xevent);
- /* done with socket, don't destroy cm_ptr, need pdata */
- closesocket(cm_ptr->socket);
- cm_ptr->socket = DAPL_INVALID_SOCKET;
- cm_ptr->state = DCM_RELEASED;
+ /* cleanup and release from local list, still on EP list */
+ dapli_cm_free(cm_ptr);
+
} else
#endif
{
- cm_ptr->ep->cm_handle = cm_ptr; /* only RC, multi CR's on UD */
dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp);
}
return;
@@ -1286,9 +1272,9 @@ bail:
if (cm_ptr->msg.saddr.ib.qp_type == IBV_QPT_UD)
goto ud_bail;
#endif
- dapls_modify_qp_state(cm_ptr->ep->qp_handle, IBV_QPS_ERR, 0, 0, 0);
- dapls_ib_cm_free(cm_ptr, cm_ptr->ep);
+ cm_ptr->state = DCM_REJECTED;
dapls_cr_callback(cm_ptr, event, NULL, 0, cm_ptr->sp);
+ dapli_cm_free(cm_ptr);
}
/*
@@ -1318,15 +1304,11 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
IN DAT_CONN_QUAL remote_conn_qual,
IN DAT_COUNT private_data_size, IN void *private_data)
{
- DAPL_EP *ep_ptr;
- ib_qp_handle_t qp_ptr;
-
+ DAPL_EP *ep_ptr = (DAPL_EP *) ep_handle;
+
dapl_dbg_log(DAPL_DBG_TYPE_EP,
" connect(ep_handle %p ....)\n", ep_handle);
- ep_ptr = (DAPL_EP *) ep_handle;
- qp_ptr = ep_ptr->qp_handle;
-
return (dapli_socket_connect(ep_ptr, remote_ia_address,
remote_conn_qual,
private_data_size, private_data));
@@ -1350,17 +1332,17 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
DAT_RETURN
dapls_ib_disconnect(IN DAPL_EP * ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
{
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "dapls_ib_disconnect(ep_handle %p ....)\n", ep_ptr);
+ dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr);
- /* Transition to error state to flush queue */
- dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0, 0, 0);
-
- if (ep_ptr->cm_handle == NULL ||
- ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED)
+ if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED ||
+ ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC) {
return DAT_SUCCESS;
- else
- return (dapli_socket_disconnect(ep_ptr->cm_handle));
+ }
+
+ /* RC. Transition to error state to flush queue */
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0, 0, 0);
+
+ return (dapli_socket_disconnect(cm_ptr));
}
/*
@@ -1387,18 +1369,7 @@ dapls_ib_disconnect_clean(IN DAPL_EP * ep_ptr,
IN DAT_BOOLEAN active,
IN const ib_cm_events_t ib_cm_event)
{
- /* NOTE: SCM will only initialize cm_handle with RC type
- *
- * For UD there can many in-flight CR's so you
- * cannot cleanup timed out CR's with EP reference
- * alone since they share the same EP. The common
- * code that handles connection timeout logic needs
- * updated for UD support.
- */
- if (ep_ptr->cm_handle)
- dapls_ib_cm_free(ep_ptr->cm_handle, ep_ptr);
-
- return;
+ /* nothing to cleanup */
}
/*
@@ -1450,18 +1421,11 @@ dapls_ib_remove_conn_listener(IN DAPL_IA * ia_ptr, IN DAPL_SP * sp_ptr)
{
ib_cm_srvc_handle_t cm_ptr = sp_ptr->cm_srvc_handle;
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "dapls_ib_remove_conn_listener(ia_ptr %p sp_ptr %p cm_ptr %p)\n",
- ia_ptr, sp_ptr, cm_ptr);
-
- /* close accepted socket, free cm_srvc_handle and return */
+ /* free cm_srvc_handle, release will cleanup */
if (cm_ptr != NULL) {
/* cr_thread will free */
- dapl_os_lock(&cm_ptr->lock);
- cm_ptr->state = DCM_DESTROY;
sp_ptr->cm_srvc_handle = NULL;
- send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0);
- dapl_os_unlock(&cm_ptr->lock);
+ dapli_cm_free(cm_ptr);
}
return DAT_SUCCESS;
}
@@ -1542,8 +1506,6 @@ dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_ptr,
if (psize > DCM_MAX_PDATA_SIZE)
return DAT_LENGTH_ERROR;
- dapl_os_lock(&cm_ptr->lock);
-
/* write reject data to indicate reject */
cm_ptr->msg.op = htons(DCM_REJ_USER);
cm_ptr->msg.p_size = htons(psize);
@@ -1558,10 +1520,8 @@ dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm_ptr,
writev(cm_ptr->socket, iov, 1);
}
- /* cr_thread will destroy CR */
- cm_ptr->state = DCM_DESTROY;
- send(cm_ptr->hca->ib_trans.scm[1], "w", sizeof "w", 0);
- dapl_os_unlock(&cm_ptr->lock);
+ /* release and cleanup CM object */
+ dapli_cm_free(cm_ptr);
return DAT_SUCCESS;
}
@@ -1586,7 +1546,7 @@ dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle,
OUT DAT_SOCK_ADDR6 * remote_ia_address)
{
DAPL_HEADER *header;
- dp_ib_cm_handle_t ib_cm_handle;
+ dp_ib_cm_handle_t conn;
dapl_dbg_log(DAPL_DBG_TYPE_EP,
"dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
@@ -1595,14 +1555,14 @@ dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle,
header = (DAPL_HEADER *) dat_handle;
if (header->magic == DAPL_MAGIC_EP)
- ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+ conn = dapl_get_cm_from_ep((DAPL_EP *) dat_handle);
else if (header->magic == DAPL_MAGIC_CR)
- ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+ conn = ((DAPL_CR *) dat_handle)->ib_cm_handle;
else
return DAT_INVALID_HANDLE;
dapl_os_memcpy(remote_ia_address,
- &ib_cm_handle->msg.daddr.so, sizeof(DAT_SOCK_ADDR6));
+ &conn->msg.daddr.so, sizeof(DAT_SOCK_ADDR6));
return DAT_SUCCESS;
}
@@ -1745,60 +1705,55 @@ void cr_thread(void *arg)
while (next_cr) {
cr = next_cr;
next_cr = dapl_llist_next_entry(&hca_ptr->ib_trans.list,
- (DAPL_LLIST_ENTRY *) &
- cr->entry);
+ (DAPL_LLIST_ENTRY *)
+ &cr->local_entry);
+ dapls_cm_acquire(cr); /* hold thread ref */
dapl_os_lock(&cr->lock);
- if (cr->state == DCM_DESTROY
- || hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
- dapl_os_unlock(&cr->lock);
- dapl_llist_remove_entry(&hca_ptr->ib_trans.list,
- (DAPL_LLIST_ENTRY *) &
- cr->entry);
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " CR FREE: %p ep=%p st=%d sock=%d\n",
- cr, cr->ep, cr->state, cr->socket);
+ if (cr->state == DCM_FREE ||
+ hca_ptr->ib_trans.cr_state != IB_THREAD_RUN) {
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " CM FREE: %p ep=%p st=%s sck=%d refs=%d\n",
+ cr, cr->ep, dapl_cm_state_str(cr->state),
+ cr->socket, cr->ref_count);
if (cr->socket != DAPL_INVALID_SOCKET) {
shutdown(cr->socket, SHUT_RDWR);
closesocket(cr->socket);
+ cr->socket = DAPL_INVALID_SOCKET;
}
- dapl_os_lock_destroy(&cr->lock);
- dapl_os_free(cr, sizeof(*cr));
- continue;
- }
- if (cr->socket == DAPL_INVALID_SOCKET) {
dapl_os_unlock(&cr->lock);
+ dapls_cm_release(cr); /* release alloc ref */
+ dapli_cm_dequeue(cr); /* release workq ref */
+ dapls_cm_release(cr); /* release thread ref */
continue;
}
event = (cr->state == DCM_CONN_PENDING) ?
- DAPL_FD_WRITE : DAPL_FD_READ;
+ DAPL_FD_WRITE : DAPL_FD_READ;
if (dapl_fd_set(cr->socket, set, event)) {
dapl_log(DAPL_DBG_TYPE_ERR,
- " cr_thread: DESTROY CR st=%d fd %d"
+ " cr_thread: fd_set ERR st=%d fd %d"
" -> %s\n", cr->state, cr->socket,
inet_ntoa(((struct sockaddr_in *)
&cr->msg.daddr.so)->sin_addr));
dapl_os_unlock(&cr->lock);
- dapls_ib_cm_free(cr, cr->ep);
+ dapls_cm_release(cr); /* release ref */
continue;
}
dapl_os_unlock(&cr->lock);
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " poll cr=%p, sck=%d\n", cr, cr->socket);
dapl_os_unlock(&hca_ptr->ib_trans.lock);
-
+
ret = dapl_poll(cr->socket, event);
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " poll ret=0x%x cr->state=%d sck=%d\n",
- ret, cr->state, cr->socket);
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD,
+ " poll ret=0x%x %s sck=%d\n",
+ ret, dapl_cm_state_str(cr->state),
+ cr->socket);
/* data on listen, qp exchange, and on disc req */
if ((ret == DAPL_FD_READ) ||
- (cr->state != DCM_CONN_PENDING &&
- ret == DAPL_FD_ERROR)) {
+ (cr->state != DCM_CONN_PENDING && ret == DAPL_FD_ERROR)) {
if (cr->socket != DAPL_INVALID_SOCKET) {
switch (cr->state) {
case DCM_LISTEN:
@@ -1838,6 +1793,8 @@ void cr_thread(void *arg)
else
dapli_socket_connected(cr, opt ? opt : dapl_socket_errno());
}
+
+ dapls_cm_release(cr); /* release ref */
dapl_os_lock(&hca_ptr->ib_trans.lock);
}
@@ -1853,7 +1810,7 @@ void cr_thread(void *arg)
while (dapl_poll(hca_ptr->ib_trans.scm[0],
DAPL_FD_READ) == DAPL_FD_READ) {
if (recv(hca_ptr->ib_trans.scm[0], rbuf, 2, 0) == -1)
- dapl_log(DAPL_DBG_TYPE_CM,
+ dapl_log(DAPL_DBG_TYPE_THREAD,
" cr_thread: read pipe error = %s\n",
strerror(errno));
}
@@ -1869,7 +1826,7 @@ void cr_thread(void *arg)
dapl_os_free(set, sizeof(struct dapl_fd_set));
out:
hca_ptr->ib_trans.cr_state = IB_THREAD_EXIT;
- dapl_dbg_log(DAPL_DBG_TYPE_UTIL, " cr_thread(hca %p) exit\n", hca_ptr);
+ dapl_dbg_log(DAPL_DBG_TYPE_THREAD, " cr_thread(hca %p) exit\n", hca_ptr);
}
@@ -1894,7 +1851,7 @@ void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
cr = next_cr;
next_cr = dapl_llist_next_entry((DAPL_LLIST_HEAD*)
&ia_ptr->hca_ptr->ib_trans.list,
- (DAPL_LLIST_ENTRY*)&cr->entry);
+ (DAPL_LLIST_ENTRY*)&cr->local_entry);
printf( " CONN[%d]: sp %p ep %p sock %d %s %s %s %s %s %s PORT L-%x R-%x PID L-%x R-%x\n",
i, cr->sp, cr->ep, cr->socket,
diff --git a/dapl/openib_scm/dapl_ib_util.h b/dapl/openib_scm/dapl_ib_util.h
index 831084f..497bc64 100644
--- a/dapl/openib_scm/dapl_ib_util.h
+++ b/dapl/openib_scm/dapl_ib_util.h
@@ -31,10 +31,14 @@
#include "openib_osd.h"
#include "dapl_ib_common.h"
+/* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */
struct ib_cm_handle
{
- struct dapl_llist_entry entry;
+ struct dapl_llist_entry list_entry;
+ struct dapl_llist_entry local_entry;
+ DAPL_OS_WAIT_OBJECT event;
DAPL_OS_LOCK lock;
+ int ref_count;
int state;
DAPL_SOCKET socket;
struct dapl_hca *hca;
@@ -45,7 +49,7 @@ struct ib_cm_handle
DAT_SOCK_ADDR6 addr;
};
-typedef struct ib_cm_handle *dp_ib_cm_handle_t;
+typedef struct ib_cm_handle *dp_ib_cm_handle_t;
typedef dp_ib_cm_handle_t ib_cm_srvc_handle_t;
/* Definitions */
@@ -110,9 +114,9 @@ int dapli_cq_thread_init(struct dapl_hca *hca_ptr);
void dapli_cq_thread_destroy(struct dapl_hca *hca_ptr);
void dapli_async_event_cb(struct _ib_hca_transport *tp);
void dapli_cq_event_cb(struct _ib_hca_transport *tp);
-DAT_RETURN dapli_socket_disconnect(dp_ib_cm_handle_t cm_ptr);
-dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep);
-void dapls_ib_cm_free(dp_ib_cm_handle_t cm, DAPL_EP *ep);
+void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr);
+void dapls_cm_release(dp_ib_cm_handle_t cm_ptr);
+void dapls_cm_free(dp_ib_cm_handle_t cm_ptr);
#ifdef DAPL_COUNTERS
void dapls_print_cm_list(IN DAPL_IA *ia_ptr);
diff --git a/dapl/openib_ucm/cm.c b/dapl/openib_ucm/cm.c
index c0da589..6efa2f1 100644
--- a/dapl/openib_ucm/cm.c
+++ b/dapl/openib_ucm/cm.c
@@ -31,6 +31,7 @@
#include "dapl_cr_util.h"
#include "dapl_name_service.h"
#include "dapl_ib_util.h"
+#include "dapl_ep_util.h"
#include "dapl_osd.h"
@@ -374,7 +375,7 @@ static void ucm_process_recv(ib_hca_transport_t *tp,
ucm_disconnect_final(cm);
break;
case DCM_DISCONNECTED:
- case DCM_DESTROY:
+ case DCM_FREE:
/* DREQ dropped, resend */
if (ntohs(msg->op) == DCM_DREQ) {
dapl_log(DAPL_DBG_TYPE_WARN,
@@ -392,10 +393,7 @@ static void ucm_process_recv(ib_hca_transport_t *tp,
ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
}
- dapl_os_unlock(&cm->lock);
- break;
- case DCM_RELEASED:
- /* UD reply retried, ignore */
+ /* UD reply retried ok to ignore, any other print warning */
if (ntohs(msg->op) != DCM_REP) {
dapl_log(DAPL_DBG_TYPE_WARN,
" ucm_recv: UNKNOWN operation"
@@ -438,8 +436,8 @@ retry_listenq:
while (next) {
cm = next;
next = dapl_llist_next_entry(list,
- (DAPL_LLIST_ENTRY *)&cm->entry);
- if (cm->state == DCM_DESTROY)
+ (DAPL_LLIST_ENTRY *)&cm->local_entry);
+ if (cm->state == DCM_DESTROY || cm->state == DCM_FREE)
continue;
/* CM sPORT + QPN, match is good enough for listenq */
@@ -619,6 +617,46 @@ bail:
}
/* ACTIVE/PASSIVE: CM objects */
+static void dapli_cm_dealloc(dp_ib_cm_handle_t cm) {
+
+ dapl_os_assert(!cm->ref_count);
+ dapl_os_lock_destroy(&cm->lock);
+ dapl_os_wait_object_destroy(&cm->event);
+ dapl_os_free(cm, sizeof(*cm));
+}
+
+void dapls_cm_acquire(dp_ib_cm_handle_t cm)
+{
+ dapl_os_lock(&cm->lock);
+ cm->ref_count++;
+ dapl_os_unlock(&cm->lock);
+}
+
+void dapls_cm_release(dp_ib_cm_handle_t cm)
+{
+ dapl_os_lock(&cm->lock);
+ cm->ref_count--;
+ if (cm->ref_count) {
+ dapl_os_unlock(&cm->lock);
+ return;
+ }
+ /* client, release local conn id port */
+ if (!cm->sp && cm->msg.sport)
+ ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.sport));
+
+ /* server, release local conn id port */
+ if (cm->sp && cm->msg.dport)
+ ucm_free_port(&cm->hca->ib_trans, ntohs(cm->msg.dport));
+
+ /* clean up any UD address handles */
+ if (cm->ah) {
+ ibv_destroy_ah(cm->ah);
+ cm->ah = NULL;
+ }
+ dapl_os_unlock(&cm->lock);
+ dapli_cm_dealloc(cm);
+}
+
dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
{
dp_ib_cm_handle_t cm;
@@ -630,6 +668,12 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
(void)dapl_os_memzero(cm, sizeof(*cm));
if (dapl_os_lock_init(&cm->lock))
goto bail;
+
+ if (dapl_os_wait_object_init(&cm->event)) {
+ dapl_os_lock_destroy(&cm->lock);
+ goto bail;
+ }
+ dapls_cm_acquire(cm);
cm->msg.ver = htons(DCM_VER);
*(DAT_UINT32*)cm->msg.resv = htonl(dapl_os_getpid()); /* exchange PID for debugging */
@@ -639,12 +683,17 @@ dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep)
DAPL_HCA *hca = ep->header.owner_ia->hca_ptr;
cm->msg.sport = htons(ucm_get_port(&hca->ib_trans, 0));
- if (!cm->msg.sport)
+ if (!cm->msg.sport) {
+ dapl_os_wait_object_destroy(&cm->event);
+ dapl_os_lock_destroy(&cm->lock);
goto bail;
+ }
+ /* link CM object to EP */
+ dapl_ep_link_cm(ep, cm);
+ cm->hca = hca;
+ cm->ep = ep;
/* IB info in network order */
- cm->ep = ep;
- cm->hca = hca;
cm->msg.sqpn = htonl(hca->ib_trans.qp->qp_num); /* ucm */
cm->msg.saddr.ib.qpn = htonl(ep->qp_handle->qp_num); /* ep */
cm->msg.saddr.ib.qp_type = ep->qp_handle->qp_type;
@@ -658,128 +707,80 @@ bail:
return NULL;
}
-/*
- * UD CR objects are kept active because of direct private data references
- * from CONN events. The cr->socket is closed and marked inactive but the
- * object remains allocated and queued on the CR resource list. There can
- * be multiple CR's associated with a given EP. There is no way to determine
- * when consumer is finished with event until the dat_ep_free.
- *
- * Schedule destruction for all CR's associated with this EP, cr_thread will
- * complete the cleanup with state == DCM_DESTROY.
- */
-static void ucm_ud_free(DAPL_EP *ep)
+/* schedule destruction of CM object */
+void dapli_cm_free(dp_ib_cm_handle_t cm)
{
- DAPL_IA *ia = ep->header.owner_ia;
- DAPL_HCA *hca = NULL;
- ib_hca_transport_t *tp = &ia->hca_ptr->ib_trans;
- dp_ib_cm_handle_t cm, next;
-
- dapl_os_lock(&tp->lock);
- if (!dapl_llist_is_empty((DAPL_LLIST_HEAD*)&tp->list))
- next = dapl_llist_peek_head((DAPL_LLIST_HEAD*)&tp->list);
- else
- next = NULL;
-
- while (next) {
- cm = next;
- next = dapl_llist_next_entry((DAPL_LLIST_HEAD*)&tp->list,
- (DAPL_LLIST_ENTRY*)&cm->entry);
- if (cm->ep == ep) {
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_free CM: ep %p cm %p\n", ep, cm);
- dapl_os_lock(&cm->lock);
- hca = cm->hca;
- cm->ep = NULL;
- if (cm->ah) {
- ibv_destroy_ah(cm->ah);
- cm->ah = NULL;
- }
- cm->state = DCM_DESTROY;
- dapl_os_unlock(&cm->lock);
- }
- }
- dapl_os_unlock(&tp->lock);
-
- /* wakeup work thread if necessary */
- if (hca)
- dapls_thread_signal(&tp->signal);
+ dapl_os_lock(&cm->lock);
+ cm->state = DCM_FREE;
+ dapls_thread_signal(&cm->hca->ib_trans.signal);
+ dapl_os_unlock(&cm->lock);
}
-/* mark for destroy, remove all references, schedule cleanup */
-/* cm_ptr == NULL (UD), then multi CR's, kill all associated with EP */
-void dapls_ib_cm_free(dp_ib_cm_handle_t cm, DAPL_EP *ep)
+/* Blocking, ONLY called from dat_ep_free */
+void dapls_cm_free(dp_ib_cm_handle_t cm)
{
- dapl_dbg_log(DAPL_DBG_TYPE_CM,
- " cm_destroy: %s cm %p ep %p\n",
- cm ? dapl_cm_state_str(cm->state) : "", cm, ep);
-
- if (!cm && ep) {
- ucm_ud_free(ep);
- return;
- }
-
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " cm_free: cm %p %s ep %p refs=%d\n",
+ cm, dapl_cm_state_str(cm->state),
+ cm->ep, cm->ref_count);
+
+ /* free from internal workq, wait until EP is last ref */
dapl_os_lock(&cm->lock);
-
- /* client, release local conn id port */
- if (!cm->sp && cm->msg.sport)
- ucm_free_port(&cm->hca->ib_trans, cm->msg.sport);
-
- /* cleanup, never made it to work queue */
- if (cm->state == DCM_INIT) {
+ cm->state = DCM_FREE;
+ while (cm->ref_count != 1) {
dapl_os_unlock(&cm->lock);
- dapl_os_lock_destroy(&cm->lock);
- dapl_os_free(cm, sizeof(*cm));
- return;
- }
-
- /* free could be called before disconnect, disc_clean will destroy */
- if (cm->state == DCM_CONNECTED) {
- dapl_os_unlock(&cm->lock);
- dapli_cm_disconnect(cm);
- return;
- }
-
- cm->state = DCM_DESTROY;
- if ((cm->ep) && (cm->ep->cm_handle == cm)) {
- cm->ep->cm_handle = IB_INVALID_HANDLE;
- cm->ep = NULL;
+ dapl_os_sleep_usec(10000);
+ dapl_os_lock(&cm->lock);
}
-
dapl_os_unlock(&cm->lock);
- /* wakeup work thread */
- dapls_thread_signal(&cm->hca->ib_trans.signal);
+ /* unlink, dequeue from EP. Final ref so release will destroy */
+ dapl_ep_unlink_cm(cm->ep, cm);
}
/* ACTIVE/PASSIVE: queue up connection object on CM list */
-static void ucm_queue_conn(dp_ib_cm_handle_t cm)
+static void dapli_queue_conn(dp_ib_cm_handle_t cm)
{
/* add to work queue, list, for cm thread processing */
- dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm->entry);
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm->local_entry);
dapl_os_lock(&cm->hca->ib_trans.lock);
+ dapls_cm_acquire(cm);
dapl_llist_add_tail(&cm->hca->ib_trans.list,
- (DAPL_LLIST_ENTRY *)&cm->entry, cm);
+ (DAPL_LLIST_ENTRY *)&cm->local_entry, cm);
dapl_os_unlock(&cm->hca->ib_trans.lock);
dapls_thread_signal(&cm->hca->ib_trans.signal);
}
/* PASSIVE: queue up listen object on listen list */
-static void ucm_queue_listen(dp_ib_cm_handle_t cm)
+static void dapli_queue_listen(dp_ib_cm_handle_t cm)
{
/* add to work queue, llist, for cm thread processing */
- dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm->entry);
+ dapl_llist_init_entry((DAPL_LLIST_ENTRY *)&cm->local_entry);
dapl_os_lock(&cm->hca->ib_trans.llock);
+ dapls_cm_acquire(cm);
dapl_llist_add_tail(&cm->hca->ib_trans.llist,
- (DAPL_LLIST_ENTRY *)&cm->entry, cm);
+ (DAPL_LLIST_ENTRY *)&cm->local_entry, cm);
dapl_os_unlock(&cm->hca->ib_trans.llock);
}
-static void ucm_dequeue_listen(dp_ib_cm_handle_t cm) {
- dapl_os_lock(&cm->hca->ib_trans.llock);
- dapl_llist_remove_entry(&cm->hca->ib_trans.llist,
- (DAPL_LLIST_ENTRY *)&cm->entry);
- dapl_os_unlock(&cm->hca->ib_trans.llock);
+static void dapli_dequeue_listen(dp_ib_cm_handle_t cm)
+{
+ DAPL_HCA *hca = cm->hca;
+
+ dapl_os_lock(&hca->ib_trans.llock);
+ dapl_llist_remove_entry(&hca->ib_trans.llist,
+ (DAPL_LLIST_ENTRY *)&cm->local_entry);
+ dapls_cm_release(cm);
+ dapl_os_unlock(&hca->ib_trans.llock);
+}
+
+/* called with local LIST and CM object lock */
+static void dapli_cm_dequeue(dp_ib_cm_handle_t cm)
+{
+ /* Remove from work queue, cr thread processing */
+ dapl_llist_remove_entry(&cm->hca->ib_trans.list,
+ (DAPL_LLIST_ENTRY *)&cm->local_entry);
+ dapls_cm_release(cm);
}
static void ucm_disconnect_final(dp_ib_cm_handle_t cm)
@@ -802,6 +803,9 @@ static void ucm_disconnect_final(dp_ib_cm_handle_t cm)
dapls_cr_callback(cm, IB_CME_DISCONNECTED, NULL, 0, cm->sp);
else
dapl_evd_connection_callback(cm, IB_CME_DISCONNECTED, NULL, 0, cm->ep);
+
+ /* free local resources, EP ref will prevent destory until dat_ep_free */
+ dapls_cm_release(cm);
}
/*
@@ -858,7 +862,7 @@ DAT_RETURN dapli_cm_disconnect(dp_ib_cm_handle_t cm)
ucm_send(&cm->hca->ib_trans, &cm->msg, NULL, 0);
dapl_os_unlock(&cm->lock);
- if (finalize)
+ if (finalize)
ucm_disconnect_final(cm);
return DAT_SUCCESS;
@@ -896,10 +900,6 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
htonl(cm->msg.dqpn),
htons(cm->msg.dport));
- /* update ep->cm reference so we get cleaned up on callback */
- if (cm->msg.saddr.ib.qp_type == IBV_QPT_RC);
- ep->cm_handle = cm;
-
dapl_os_unlock(&cm->lock);
#ifdef DAPL_COUNTERS
@@ -925,10 +925,10 @@ dapli_cm_connect(DAPL_EP *ep, dp_ib_cm_handle_t cm)
&cm->msg.p_data, ntohs(cm->msg.p_size)))
goto bail;
- /* first time through, put on work queue */
- if (!cm->retries)
- ucm_queue_conn(cm);
-
+ /* first time through, link EP and CM, put on work queue */
+ if (!cm->retries) {
+ dapli_queue_conn(cm);
+ }
return DAT_SUCCESS;
bail:
@@ -938,8 +938,7 @@ bail:
htonl(cm->msg.dqpn), htons(cm->msg.dport),
htonl(cm->msg.p_size));
- /* close socket, free cm structure */
- dapls_ib_cm_free(cm, cm->ep);
+ dapli_cm_free(cm);
return DAT_INSUFFICIENT_RESOURCES;
}
@@ -1116,26 +1115,20 @@ ud_bail:
(DAT_PVOID *)cm->msg.p_data,
(DAT_PVOID *)&xevent);
- /* we are done, don't destroy cm_ptr, need pdata */
- dapl_os_lock(&cm->lock);
- cm->state = DCM_RELEASED;
- dapl_os_unlock(&cm->lock);
-
+ /* release cm_ptr, EP refs will prevent destroy */
+ dapli_cm_free(cm);
+
} else
#endif
{
- cm->ep->cm_handle = cm; /* only RC, multi CR's on UD */
dapl_evd_connection_callback(cm,
IB_CME_CONNECTED,
cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep);
}
return;
bail:
- if (cm->msg.saddr.ib.qp_type != IBV_QPT_UD)
- dapls_ib_reinit_ep(cm->ep); /* reset QP state */
-
dapl_evd_connection_callback(NULL, event, cm->msg.p_data, ntohs(cm->msg.p_size), cm->ep);
- dapls_ib_cm_free(cm, NULL);
+ dapli_cm_free(cm);
}
/*
@@ -1184,7 +1177,7 @@ static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
msg->p_data, ntohs(msg->p_size));
acm->state = DCM_ACCEPTING;
- ucm_queue_conn(acm);
+ dapli_queue_conn(acm);
#ifdef DAT_EXTENSIONS
if (acm->msg.daddr.ib.qp_type == IBV_QPT_UD) {
@@ -1209,8 +1202,8 @@ static void ucm_accept(ib_cm_srvc_handle_t cm, ib_cm_msg_t *msg)
return;
bail:
- /* free cm object */
- dapls_ib_cm_free(acm, NULL);
+ /* schedule work thread cleanup */
+ dapli_cm_free(acm);
return;
}
@@ -1289,21 +1282,17 @@ static void ucm_accept_rtu(dp_ib_cm_handle_t cm, ib_cm_msg_t *msg)
(DAT_PVOID *)cm->msg.p_data,
(DAT_PVOID *)&xevent);
- /* done with CM object, don't destroy cm, need pdata */
- dapl_os_lock(&cm->lock);
- cm->state = DCM_RELEASED;
- dapl_os_unlock(&cm->lock);
+ /* done with CM object, EP ref will hold object for pdata */
+ dapli_cm_free(cm);
+
} else {
#endif
- cm->ep->cm_handle = cm; /* only RC, multi CR's on UD */
dapls_cr_callback(cm, IB_CME_CONNECTED, NULL, 0, cm->sp);
}
return;
bail:
- if (cm->msg.saddr.ib.qp_type != IBV_QPT_UD)
- dapls_ib_reinit_ep(cm->ep); /* reset QP state */
- dapls_ib_cm_free(cm, cm->ep);
dapls_cr_callback(cm, IB_CME_LOCAL_FAILURE, NULL, 0, cm->sp);
+ dapli_cm_free(cm);
}
/*
@@ -1386,7 +1375,6 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
dapl_os_unlock(&cm->lock);
return DAT_INVALID_STATE;
}
- dapl_os_unlock(&cm->lock);
dapl_dbg_log(DAPL_DBG_TYPE_CM,
" ACCEPT_USR: remote lid=%x"
@@ -1413,7 +1401,6 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
#endif
/* modify QP to RTR and then to RTS with remote info already read */
- dapl_os_lock(&ep->header.lock);
if (dapls_modify_qp_state(ep->qp_handle,
IBV_QPS_RTR,
cm->msg.daddr.ib.qpn,
@@ -1423,7 +1410,7 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
" ACCEPT_USR: QPS_RTR ERR %s -> lid %x qpn %x\n",
strerror(errno), ntohs(cm->msg.daddr.ib.lid),
ntohl(cm->msg.daddr.ib.qpn));
- dapl_os_unlock(&ep->header.lock);
+ dapl_os_unlock(&cm->lock);
goto bail;
}
if (dapls_modify_qp_state(ep->qp_handle,
@@ -1435,10 +1422,9 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
" ACCEPT_USR: QPS_RTS ERR %s -> lid %x qpn %x\n",
strerror(errno), ntohs(cm->msg.daddr.ib.lid),
ntohl(cm->msg.daddr.ib.qpn));
- dapl_os_unlock(&ep->header.lock);
+ dapl_os_unlock(&cm->lock);
goto bail;
}
- dapl_os_unlock(&ep->header.lock);
/* save remote address information */
dapl_os_memcpy(&ep->remote_ia_address,
@@ -1460,24 +1446,22 @@ dapli_accept_usr(DAPL_EP *ep, DAPL_CR *cr, DAT_COUNT p_size, DAT_PVOID p_data)
dapl_os_memcpy(&cm->p_data, p_data, p_size);
/* save state and setup valid reference to EP, HCA */
- dapl_os_lock(&cm->lock);
+ dapl_ep_link_cm(ep, cm);
cm->ep = ep;
cm->hca = ia->hca_ptr;
cm->state = DCM_RTU_PENDING;
dapl_os_get_time(&cm->timer); /* RTU expected */
dapl_os_unlock(&cm->lock);
- if (ucm_reply(cm))
+ if (ucm_reply(cm)) {
+ dapl_ep_link_cm(ep, cm);
goto bail;
-
+ }
dapl_dbg_log(DAPL_DBG_TYPE_CM, " PASSIVE: accepted!\n");
dapls_thread_signal(&cm->hca->ib_trans.signal);
return DAT_SUCCESS;
bail:
- if (cm->msg.saddr.ib.qp_type != IBV_QPT_UD)
- dapls_ib_reinit_ep(ep);
-
- dapls_ib_cm_free(cm, ep);
+ dapli_cm_free(cm);
return DAT_INTERNAL_ERROR;
}
@@ -1533,7 +1517,7 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
cm->state = DCM_REP_PENDING;
/* build connect request, send to remote CM based on r_addr info */
- return(dapli_cm_connect(ep, cm));
+ return (dapli_cm_connect(ep, cm));
}
/*
@@ -1552,16 +1536,19 @@ dapls_ib_connect(IN DAT_EP_HANDLE ep_handle,
* DAT_SUCCESS
*/
DAT_RETURN
-dapls_ib_disconnect(IN DAPL_EP *ep, IN DAT_CLOSE_FLAGS close_flags)
+dapls_ib_disconnect(IN DAPL_EP *ep_ptr, IN DAT_CLOSE_FLAGS close_flags)
{
- dapl_dbg_log(DAPL_DBG_TYPE_EP,
- "dapls_ib_disconnect(ep_handle %p ....)\n", ep);
+ dp_ib_cm_handle_t cm_ptr = dapl_get_cm_from_ep(ep_ptr);
- if (ep->cm_handle == NULL ||
- ep->param.ep_state == DAT_EP_STATE_DISCONNECTED)
+ if (ep_ptr->param.ep_state == DAT_EP_STATE_DISCONNECTED ||
+ ep_ptr->param.ep_attr.service_type != DAT_SERVICE_TYPE_RC) {
return DAT_SUCCESS;
- else
- return (dapli_cm_disconnect(ep->cm_handle));
+ }
+
+ /* RC. Transition to error state to flush queue */
+ dapls_modify_qp_state(ep_ptr->qp_handle, IBV_QPS_ERR, 0, 0, 0);
+
+ return (dapli_cm_disconnect(cm_ptr));
}
/*
@@ -1588,18 +1575,7 @@ dapls_ib_disconnect_clean(IN DAPL_EP *ep,
IN DAT_BOOLEAN active,
IN const ib_cm_events_t ib_cm_event)
{
- /* NOTE: SCM will only initialize cm_handle with RC type
- *
- * For UD there can many in-flight CR's so you
- * cannot cleanup timed out CR's with EP reference
- * alone since they share the same EP. The common
- * code that handles connection timeout logic needs
- * updated for UD support.
- */
- if (ep->cm_handle)
- dapls_ib_cm_free(ep->cm_handle, ep);
-
- return;
+ /* nothing to cleanup */
}
/*
@@ -1660,7 +1636,7 @@ dapls_ib_setup_conn_listener(IN DAPL_IA *ia,
/* queue up listen socket to process inbound CR's */
cm->state = DCM_LISTEN;
- ucm_queue_listen(cm);
+ dapli_queue_listen(cm);
return DAT_SUCCESS;
}
@@ -1687,7 +1663,6 @@ DAT_RETURN
dapls_ib_remove_conn_listener(IN DAPL_IA *ia, IN DAPL_SP *sp)
{
ib_cm_srvc_handle_t cm = sp->cm_srvc_handle;
- ib_hca_transport_t *tp = &ia->hca_ptr->ib_trans;
/* free cm_srvc_handle and port, and mark CM for cleanup */
if (cm) {
@@ -1696,14 +1671,8 @@ dapls_ib_remove_conn_listener(IN DAPL_IA *ia, IN DAPL_SP *sp)
ia, sp, cm, ntohs(cm->msg.dport));
sp->cm_srvc_handle = NULL;
- dapl_os_lock(&cm->lock);
- ucm_free_port(tp, ntohs(cm->msg.dport));
- cm->msg.dport = 0;
- cm->state = DCM_DESTROY;
- dapl_os_unlock(&cm->lock);
- ucm_dequeue_listen(cm);
- dapl_os_lock_destroy(&cm->lock);
- dapl_os_free(cm, sizeof(*cm));
+ dapli_dequeue_listen(cm);
+ dapls_cm_release(cm); /* last ref, dealloc */
}
return DAT_SUCCESS;
}
@@ -1792,13 +1761,11 @@ dapls_ib_reject_connection(IN dp_ib_cm_handle_t cm,
if (ucm_send(&cm->hca->ib_trans, &cm->msg, pdata, psize)) {
dapl_log(DAPL_DBG_TYPE_WARN,
" cm_reject: ERR: %s\n", strerror(errno));
+ dapl_os_unlock(&cm->lock);
return DAT_INTERNAL_ERROR;
}
dapl_os_unlock(&cm->lock);
-
- /* cleanup and destroy CM resources */
- dapls_ib_cm_free(cm, NULL);
-
+ dapli_cm_free(cm);
return DAT_SUCCESS;
}
@@ -1823,7 +1790,7 @@ dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle,
OUT DAT_SOCK_ADDR6 * remote_ia_address)
{
DAPL_HEADER *header;
- dp_ib_cm_handle_t ib_cm_handle;
+ dp_ib_cm_handle_t cm;
dapl_dbg_log(DAPL_DBG_TYPE_EP,
"dapls_ib_cm_remote_addr(dat_handle %p, ....)\n",
@@ -1832,14 +1799,14 @@ dapls_ib_cm_remote_addr(IN DAT_HANDLE dat_handle,
header = (DAPL_HEADER *) dat_handle;
if (header->magic == DAPL_MAGIC_EP)
- ib_cm_handle = ((DAPL_EP *) dat_handle)->cm_handle;
+ cm = dapl_get_cm_from_ep((DAPL_EP *) dat_handle);
else if (header->magic == DAPL_MAGIC_CR)
- ib_cm_handle = ((DAPL_CR *) dat_handle)->ib_cm_handle;
+ cm = ((DAPL_CR *) dat_handle)->ib_cm_handle;
else
return DAT_INVALID_HANDLE;
dapl_os_memcpy(remote_ia_address,
- &ib_cm_handle->msg.daddr, sizeof(DAT_SOCK_ADDR6));
+ &cm->msg.daddr, sizeof(DAT_SOCK_ADDR6));
return DAT_SUCCESS;
}
@@ -1976,19 +1943,25 @@ void cm_thread(void *arg)
while (next) {
cm = next;
next = dapl_llist_next_entry(&hca->ib_trans.list,
- (DAPL_LLIST_ENTRY *)&cm->entry);
+ (DAPL_LLIST_ENTRY *)&cm->local_entry);
+ dapls_cm_acquire(cm); /* hold thread ref */
dapl_os_lock(&cm->lock);
- if (cm->state == DCM_DESTROY ||
+ if (cm->state == DCM_FREE ||
hca->ib_trans.cm_state != IB_THREAD_RUN) {
- dapl_llist_remove_entry(&hca->ib_trans.list,
- (DAPL_LLIST_ENTRY *)&cm->entry);
dapl_os_unlock(&cm->lock);
- dapl_os_lock_destroy(&cm->lock);
- dapl_os_free(cm, sizeof(*cm));
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " CM FREE: %p ep=%p st=%s refs=%d\n",
+ cm, cm->ep, dapl_cm_state_str(cm->state),
+ cm->ref_count);
+
+ dapls_cm_release(cm); /* release alloc ref */
+ dapli_cm_dequeue(cm); /* release workq ref */
+ dapls_cm_release(cm); /* release thread ref */
continue;
}
dapl_os_unlock(&cm->lock);
ucm_check_timers(cm, &time_ms);
+ dapls_cm_release(cm); /* release thread ref */
}
dapl_os_unlock(&hca->ib_trans.lock);
@@ -2047,20 +2020,25 @@ void cm_thread(void *arg)
cm = next;
next = dapl_llist_next_entry(
&hca->ib_trans.list,
- (DAPL_LLIST_ENTRY *)&cm->entry);
+ (DAPL_LLIST_ENTRY *)&cm->local_entry);
+ dapls_cm_acquire(cm); /* hold thread ref */
dapl_os_lock(&cm->lock);
- if (cm->state == DCM_DESTROY ||
+ if (cm->state == DCM_FREE ||
hca->ib_trans.cm_state != IB_THREAD_RUN) {
- dapl_llist_remove_entry(
- &hca->ib_trans.list,
- (DAPL_LLIST_ENTRY *)&cm->entry);
dapl_os_unlock(&cm->lock);
- dapl_os_lock_destroy(&cm->lock);
- dapl_os_free(cm, sizeof(*cm));
+ dapl_log(DAPL_DBG_TYPE_CM,
+ " CM FREE: %p ep=%p st=%s refs=%d\n",
+ cm, cm->ep, dapl_cm_state_str(cm->state),
+ cm->ref_count);
+
+ dapls_cm_release(cm); /* release alloc ref */
+ dapli_cm_dequeue(cm); /* release workq ref */
+ dapls_cm_release(cm); /* release thread ref */
continue;
}
dapl_os_unlock(&cm->lock);
ucm_check_timers(cm, &time_ms);
+ dapls_cm_release(cm); /* release thread ref */
}
/* set to exit and all resources destroyed */
@@ -2088,7 +2066,6 @@ void cm_thread(void *arg)
DAPL_FD_READ) == DAPL_FD_READ) {
recv(hca->ib_trans.signal.scm[0], rbuf, 2, 0);
}
-
dapl_os_lock(&hca->ib_trans.lock);
/* set to exit and all resources destroyed */
@@ -2129,7 +2106,7 @@ void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
while (next_cm) {
cm = next_cm;
next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list,
- (DAPL_LLIST_ENTRY*)&cm->entry);
+ (DAPL_LLIST_ENTRY*)&cm->local_entry);
printf( " LISTEN[%d]: sp %p %s uCM_QP: 0x%x %d 0x%x l_pid %x,%d\n",
i, cm->sp, dapl_cm_state_str(cm->state),
@@ -2153,7 +2130,7 @@ void dapls_print_cm_list(IN DAPL_IA *ia_ptr)
while (next_cm) {
cm = next_cm;
next_cm = dapl_llist_next_entry((DAPL_LLIST_HEAD*)list,
- (DAPL_LLIST_ENTRY*)&cm->entry);
+ (DAPL_LLIST_ENTRY*)&cm->local_entry);
printf( " CONN[%d]: ep %p cm %p %s %s"
" %x %x %x %s %x %x %x r_pid %x,%d\n",
diff --git a/dapl/openib_ucm/dapl_ib_util.h b/dapl/openib_ucm/dapl_ib_util.h
index d7844c6..de17f04 100644
--- a/dapl/openib_ucm/dapl_ib_util.h
+++ b/dapl/openib_ucm/dapl_ib_util.h
@@ -33,11 +33,15 @@
#include "openib_osd.h"
#include "dapl_ib_common.h"
+/* DAPL CM objects MUST include list_entry, ref_count, event for EP linking */
struct ib_cm_handle
{
- struct dapl_llist_entry entry;
+ struct dapl_llist_entry list_entry;
+ struct dapl_llist_entry local_entry;
+ DAPL_OS_WAIT_OBJECT event;
DAPL_OS_LOCK lock;
DAPL_OS_TIMEVAL timer;
+ int ref_count;
int state;
int retries;
struct dapl_hca *hca;
@@ -117,8 +121,9 @@ typedef struct _ib_hca_transport
void cm_thread(void *arg);
void ucm_async_event(struct dapl_hca *hca);
void dapli_cq_event_cb(struct _ib_hca_transport *tp);
-dp_ib_cm_handle_t dapls_ib_cm_create(DAPL_EP *ep);
-void dapls_ib_cm_free(dp_ib_cm_handle_t cm, DAPL_EP *ep);
+void dapls_cm_acquire(dp_ib_cm_handle_t cm_ptr);
+void dapls_cm_release(dp_ib_cm_handle_t cm_ptr);
+void dapls_cm_free(dp_ib_cm_handle_t cm_ptr);
#ifdef DAPL_COUNTERS
void dapls_print_cm_list(IN DAPL_IA *ia_ptr);
--
1.5.2.5
^ permalink raw reply related
* Re: InfiniBand/RDMA merge plans for 2.6.35
From: Jason Gunthorpe @ 2010-05-19 17:28 UTC (permalink / raw)
To: Sean Hefty; +Cc: 'Roland Dreier', linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <7943A3B0D2C7416186D6A61FE071D640-Zpru7NauK7drdx17CPfAsdBPR1lH4CV8@public.gmane.org>
On Mon, May 17, 2010 at 12:58:59PM -0700, Sean Hefty wrote:
> which has been updated to 2.6.34-rc7. I tried to keep the patches small, to
> make review a little easier.
>
> Jason, you've been most involved in reviewing the patches so far.
> Any chance I can ask you to assist Roland with a more formal review?
Sure, as time permits.. :)
Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH] dapl-2.0: common: remove unnecessary lmr lkey hashing and duplicate lkey checking
From: Davis, Arlin R @ 2010-05-19 17:06 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
lmr lkey hashing is too restrictive given the returned lkey could be
the same value for different regions on some rdma devices. Actually,
this checking is really unecesssary and requires considerable overhead
for hashing so just remove hashing of lmr lkey's. Let verbs device
level do the checking and validation.
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/common/dapl_hca_util.c | 16 +-------
dapl/common/dapl_lmr_free.c | 19 +---------
dapl/common/dapl_rmr_bind.c | 15 ++-----
dapl/udapl/dapl_lmr_create.c | 87 ------------------------------------------
4 files changed, 7 insertions(+), 130 deletions(-)
diff --git a/dapl/common/dapl_hca_util.c b/dapl/common/dapl_hca_util.c
index b8f068c..4a8a400 100644
--- a/dapl/common/dapl_hca_util.c
+++ b/dapl/common/dapl_hca_util.c
@@ -38,7 +38,6 @@
#include "dapl_adapter_util.h"
#include "dapl_provider.h"
#include "dapl_hca_util.h"
-#include "dapl_hash.h"
/*
* dapl_hca_alloc
@@ -66,13 +65,6 @@ DAPL_HCA *dapl_hca_alloc(char *name, char *port)
}
dapl_os_memzero(hca_ptr, sizeof(DAPL_HCA));
-
- if (DAT_SUCCESS !=
- dapls_hash_create(DAPL_HASH_TABLE_DEFAULT_CAPACITY,
- &hca_ptr->lmr_hash_table)) {
- goto bail;
- }
-
dapl_os_lock_init(&hca_ptr->lock);
dapl_llist_init_head(&hca_ptr->ia_list_head);
@@ -87,13 +79,8 @@ DAPL_HCA *dapl_hca_alloc(char *name, char *port)
return (hca_ptr);
bail:
- if (NULL != hca_ptr) {
- if (NULL != hca_ptr->lmr_hash_table) {
- dapls_hash_free(hca_ptr->lmr_hash_table);
- }
-
+ if (NULL != hca_ptr)
dapl_os_free(hca_ptr, sizeof(DAPL_HCA));
- }
return NULL;
}
@@ -115,7 +102,6 @@ DAPL_HCA *dapl_hca_alloc(char *name, char *port)
*/
void dapl_hca_free(DAPL_HCA * hca_ptr)
{
- (void)dapls_hash_free(hca_ptr->lmr_hash_table);
dapl_os_free(hca_ptr->name, dapl_os_strlen(hca_ptr->name) + 1);
dapl_os_free(hca_ptr, sizeof(DAPL_HCA));
}
diff --git a/dapl/common/dapl_lmr_free.c b/dapl/common/dapl_lmr_free.c
index e72824a..5f9336f 100644
--- a/dapl/common/dapl_lmr_free.c
+++ b/dapl/common/dapl_lmr_free.c
@@ -90,29 +90,12 @@ DAT_RETURN DAT_API dapl_lmr_free(IN DAT_LMR_HANDLE lmr_handle)
return DAT_INVALID_STATE;
}
- dat_status =
- dapls_hash_remove(lmr->header.owner_ia->hca_ptr->
- lmr_hash_table,
- lmr->param.lmr_context, NULL);
- if (dat_status != DAT_SUCCESS) {
- goto bail;
- }
-
dat_status = dapls_ib_mr_deregister(lmr);
if (dat_status == DAT_SUCCESS) {
dapl_os_atomic_dec(&pz->pz_ref_count);
dapl_lmr_dealloc(lmr);
- } else {
- /*
- * Deregister failed; put it back in the
- * hash table.
- */
- dapls_hash_insert(lmr->header.owner_ia->
- hca_ptr->lmr_hash_table,
- lmr->param.lmr_context, lmr);
- }
-
+ }
break;
}
#if defined(__KDAPL__)
diff --git a/dapl/common/dapl_rmr_bind.c b/dapl/common/dapl_rmr_bind.c
index 9793f38..ecb190b 100755
--- a/dapl/common/dapl_rmr_bind.c
+++ b/dapl/common/dapl_rmr_bind.c
@@ -48,7 +48,8 @@
STATIC _INLINE_ DAT_RETURN
dapli_rmr_bind_fuse(IN DAPL_RMR * rmr,
- IN const DAT_LMR_TRIPLET * lmr_triplet,
+ IN DAT_LMR_HANDLE lmr_handle,
+ IN const DAT_LMR_TRIPLET * lmr_triplet,
IN DAT_MEM_PRIV_FLAGS mem_priv,
IN DAPL_EP * ep_ptr,
IN DAT_RMR_COOKIE user_cookie,
@@ -69,6 +70,7 @@ dapli_rmr_bind_unfuse(IN DAPL_RMR * rmr,
DAT_RETURN
dapli_rmr_bind_fuse(IN DAPL_RMR * rmr,
+ IN DAT_LMR_HANDLE lmr_handle,
IN const DAT_LMR_TRIPLET * lmr_triplet,
IN DAT_MEM_PRIV_FLAGS mem_priv,
IN DAPL_EP * ep_ptr,
@@ -80,16 +82,8 @@ dapli_rmr_bind_fuse(IN DAPL_RMR * rmr,
DAPL_COOKIE *cookie;
DAT_RETURN dat_status;
DAT_BOOLEAN is_signaled;
- DAPL_HASH_DATA hash_lmr;
- dat_status =
- dapls_hash_search(rmr->header.owner_ia->hca_ptr->lmr_hash_table,
- lmr_triplet->lmr_context, &hash_lmr);
- if (DAT_SUCCESS != dat_status) {
- dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG2);
- goto bail;
- }
- lmr = (DAPL_LMR *) hash_lmr;
+ lmr = (DAPL_LMR *) lmr_handle;
/* if the ep in unconnected return an error. IB requires that the */
/* QP be connected to change a memory window binding since: */
@@ -305,6 +299,7 @@ dapl_rmr_bind(IN DAT_RMR_HANDLE rmr_handle,
/* if the rmr should be bound */
if (0 != lmr_triplet->segment_length) {
return dapli_rmr_bind_fuse(rmr,
+ lmr_handle,
lmr_triplet,
mem_priv,
ep_ptr,
diff --git a/dapl/udapl/dapl_lmr_create.c b/dapl/udapl/dapl_lmr_create.c
index 849f4fe..f1d5016 100644
--- a/dapl/udapl/dapl_lmr_create.c
+++ b/dapl/udapl/dapl_lmr_create.c
@@ -128,32 +128,6 @@ dapli_lmr_create_virtual(IN DAPL_IA * ia,
goto bail;
}
- /* if the LMR context is already in the hash table */
- dat_status = dapls_hash_search(ia->hca_ptr->lmr_hash_table,
- lmr->param.lmr_context, NULL);
- if (dat_status == DAT_SUCCESS) {
- (void)dapls_ib_mr_deregister(lmr);
- dapl_lmr_dealloc(lmr);
-
- dat_status =
- DAT_ERROR(DAT_INVALID_STATE, DAT_INVALID_STATE_LMR_IN_USE);
- goto bail;
- }
-
- dat_status = dapls_hash_insert(ia->hca_ptr->lmr_hash_table,
- lmr->param.lmr_context, lmr);
- if (dat_status != DAT_SUCCESS) {
- (void)dapls_ib_mr_deregister(lmr);
- dapl_lmr_dealloc(lmr);
-
- /* The value returned by dapls_hash_insert(.) is not */
- /* returned to the consumer because the spec. requires */
- /* that dat_lmr_create(.) return only certain values. */
- dat_status =
- DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY);
- goto bail;
- }
-
dapl_os_atomic_inc(&pz->pz_ref_count);
*lmr_handle = (DAT_LMR_HANDLE) lmr;
@@ -189,7 +163,6 @@ dapli_lmr_create_lmr(IN DAPL_IA * ia,
DAPL_LMR *lmr;
DAT_REGION_DESCRIPTION reg_desc;
DAT_RETURN dat_status;
- DAPL_HASH_DATA hash_lmr;
dapl_dbg_log(DAPL_DBG_TYPE_API,
"dapl_lmr_create_lmr (%p, %p, %p, %x, %x, %p, %p, %p, %p)\n",
@@ -199,14 +172,6 @@ dapli_lmr_create_lmr(IN DAPL_IA * ia,
lmr_handle,
lmr_context, registered_length, registered_address);
- dat_status = dapls_hash_search(ia->hca_ptr->lmr_hash_table,
- original_lmr->param.lmr_context,
- &hash_lmr);
- if (dat_status != DAT_SUCCESS) {
- dat_status = DAT_ERROR(DAT_INVALID_PARAMETER, DAT_INVALID_ARG2);
- goto bail;
- }
- lmr = (DAPL_LMR *) hash_lmr;
reg_desc.for_lmr_handle = (DAT_LMR_HANDLE) original_lmr;
lmr = dapl_lmr_alloc(ia,
@@ -228,32 +193,6 @@ dapli_lmr_create_lmr(IN DAPL_IA * ia,
goto bail;
}
- /* if the LMR context is already in the hash table */
- dat_status = dapls_hash_search(ia->hca_ptr->lmr_hash_table,
- lmr->param.lmr_context, NULL);
- if (dat_status == DAT_SUCCESS) {
- dapls_ib_mr_deregister(lmr);
- dapl_lmr_dealloc(lmr);
-
- dat_status =
- DAT_ERROR(DAT_INVALID_STATE, DAT_INVALID_STATE_LMR_IN_USE);
- goto bail;
- }
-
- dat_status = dapls_hash_insert(ia->hca_ptr->lmr_hash_table,
- lmr->param.lmr_context, lmr);
- if (dat_status != DAT_SUCCESS) {
- dapls_ib_mr_deregister(lmr);
- dapl_lmr_dealloc(lmr);
-
- /* The value returned by dapls_hash_insert(.) is not */
- /* returned to the consumer because the spec. requires */
- /* that dat_lmr_create(.) return only certain values. */
- dat_status =
- DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY);
- goto bail;
- }
-
dapl_os_atomic_inc(&pz->pz_ref_count);
*lmr_handle = (DAT_LMR_HANDLE) lmr;
@@ -328,32 +267,6 @@ dapli_lmr_create_shared(IN DAPL_IA * ia,
goto bail;
}
- /* if the LMR context is already in the hash table */
- dat_status = dapls_hash_search(ia->hca_ptr->lmr_hash_table,
- lmr->param.lmr_context, NULL);
- if (DAT_SUCCESS == dat_status) {
- (void)dapls_ib_mr_deregister(lmr);
- dapl_lmr_dealloc(lmr);
-
- dat_status =
- DAT_ERROR(DAT_INVALID_STATE, DAT_INVALID_STATE_LMR_IN_USE);
- goto bail;
- }
-
- dat_status = dapls_hash_insert(ia->hca_ptr->lmr_hash_table,
- lmr->param.lmr_context, lmr);
- if (dat_status != DAT_SUCCESS) {
- (void)dapls_ib_mr_deregister(lmr);
- dapl_lmr_dealloc(lmr);
-
- /* The value returned by dapls_hash_insert(.) is not */
- /* returned to the consumer because the spec. requires */
- /* that dat_lmr_create(.) return only certain values. */
- dat_status =
- DAT_ERROR(DAT_INSUFFICIENT_RESOURCES, DAT_RESOURCE_MEMORY);
- goto bail;
- }
-
dapl_os_atomic_inc(&pz->pz_ref_count);
*lmr_handle = (DAT_LMR_HANDLE) lmr;
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH] dapl-2.0: common: add some debug prints to help isolate QP type issues
From: Davis, Arlin R @ 2010-05-19 17:05 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/openib_common/qp.c | 9 ++++++---
1 files changed, 6 insertions(+), 3 deletions(-)
diff --git a/dapl/openib_common/qp.c b/dapl/openib_common/qp.c
index 15c1dae..473604b 100644
--- a/dapl/openib_common/qp.c
+++ b/dapl/openib_common/qp.c
@@ -182,8 +182,8 @@ dapls_ib_qp_alloc(IN DAPL_IA * ia_ptr,
}
#endif
dapl_dbg_log(DAPL_DBG_TYPE_EP,
- " qp_alloc: qpn %p sq %d,%d rq %d,%d\n",
- ep_ptr->qp_handle->qp_num,
+ " qp_alloc: qpn %p type %d sq %d,%d rq %d,%d\n",
+ ep_ptr->qp_handle->qp_num, ep_ptr->qp_handle->qp_type,
qp_create.cap.max_send_wr, qp_create.cap.max_send_sge,
qp_create.cap.max_recv_wr, qp_create.cap.max_recv_sge);
@@ -561,8 +561,11 @@ dapls_create_ah(IN DAPL_HCA *hca,
struct ibv_qp_attr qp_attr;
ib_ah_handle_t ah;
- if (qp->qp_type != IBV_QPT_UD)
+ if (qp->qp_type != IBV_QPT_UD) {
+ dapl_log(DAPL_DBG_TYPE_ERR,
+ " create_ah ERR: QP_type != UD\n");
return NULL;
+ }
dapl_os_memzero((void *)&qp_attr, sizeof(qp_attr));
qp_attr.qp_state = IBV_QP_STATE;
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH] dapl-2.0: common: dapl_event_str function missing 2 IB extended events
From: Davis, Arlin R @ 2010-05-19 17:02 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
Add all IB extended events in event string print function
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/common/dapl_evd_util.c | 4 ++++
1 files changed, 4 insertions(+), 0 deletions(-)
diff --git a/dapl/common/dapl_evd_util.c b/dapl/common/dapl_evd_util.c
index 14a10c7..cb3eb1b 100644
--- a/dapl/common/dapl_evd_util.c
+++ b/dapl/common/dapl_evd_util.c
@@ -96,6 +96,10 @@ char *dapl_event_str(IN DAT_EVENT_NUMBER event_num)
DAT_IB_EXTENSION_RANGE_BASE + 1},
{"DAT_IB_UD_CONNECTION_EVENT_ESTABLISHED",
DAT_IB_EXTENSION_RANGE_BASE + 2},
+ {"DAT_IB_UD_CONNECTION_REJECT_EVENT",
+ DAT_IB_EXTENSION_RANGE_BASE + 3},
+ {"DAT_IB_UD_CONNECTION_ERROR_EVENT",
+ DAT_IB_EXTENSION_RANGE_BASE + 4},
{"DAT_IW_EXTENSION_RANGE_BASE", DAT_IW_EXTENSION_RANGE_BASE},
#endif /* DAT_EXTENSIONS */
{NULL, 0},
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH] dapl-2.0: windows: remove static paths from dapltest scripts
From: Davis, Arlin R @ 2010-05-19 17:02 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
signed-off-by: stan smith <stan.smith-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
test/dapltest/scripts/dt-cli.bat | 11 +++++------
test/dapltest/scripts/dt-svr.bat | 12 +++++-------
2 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/test/dapltest/scripts/dt-cli.bat b/test/dapltest/scripts/dt-cli.bat
index f1eb23b..441b975 100644
--- a/test/dapltest/scripts/dt-cli.bat
+++ b/test/dapltest/scripts/dt-cli.bat
@@ -17,13 +17,12 @@ if not "!F!" == "off" (
rem set DAT_OVERRIDE=D:\dapl2\dat.conf
rem favor DAT 2.0 (dapl2test.exe) over DAT 1.1 (dapltest.exe)
-set PF="%ProgramFiles%\WinOF"
-
-if NOT EXIST %PF%\dapl2test.exe (
- echo Missing file %PF%\dapl2test.exe ?
- exit /B 1
-)
set DT=dapl2test.exe
+%DT% -h > Nul 2>&1
+if not "%ERRORLEVEL%" == "1" (
+ echo %0: ERR - %DT% not in exec path?
+ exit /B %ERRORLEVEL%
+)
rem To debug dapl2test - use dapl2testd.exe with ibnic0v2d
rem setup DAPL provider name: translate shorthand name or use name from dat.conf.
diff --git a/test/dapltest/scripts/dt-svr.bat b/test/dapltest/scripts/dt-svr.bat
index abd17fb..9974d23 100644
--- a/test/dapltest/scripts/dt-svr.bat
+++ b/test/dapltest/scripts/dt-svr.bat
@@ -5,15 +5,13 @@ rem
SETLOCAL
rem set DAT_OVERRIDE=C:\DAT\dat.conf
+set DT=dapl2test.exe
-set PF="%ProgramFiles%\WinOF"
-
-if NOT EXIST %PF%\dapl2test.exe (
- echo Missing file %PF%\dapl2test.exe ?
- exit /B 1
+%DT% -h > Nul 2>&1
+if not "%ERRORLEVEL%" == "1" (
+ echo %0: ERR - %DT% not in exec path?
+ exit /B %ERRORLEVEL%
)
-
-set DT=dapl2test.exe
rem To debug dapl2test - use dapl2testd.exe with ibnic0v2d
rem which Dapl provider?
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH] dapl-2.0: ibal: output completion code in deciaml & hex as intended
From: Davis, Arlin R @ 2010-05-19 17:02 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
sign-off-by: stan smith <stan.smith-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/ibal/dapl_ibal_util.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
diff --git a/dapl/ibal/dapl_ibal_util.c b/dapl/ibal/dapl_ibal_util.c
index 513d7c9..7f9b819 100644
--- a/dapl/ibal/dapl_ibal_util.c
+++ b/dapl/ibal/dapl_ibal_util.c
@@ -2011,7 +2011,7 @@ dapls_ib_get_dto_status(
default:
#ifdef DAPL_DBG
- dapl_dbg_log (DAPL_DBG_TYPE_ERR,"%s() unknown IB_COMP_ST %x(0x%x)\n",
+ dapl_dbg_log (DAPL_DBG_TYPE_ERR,"%s() unknown IB_COMP_ST %d(0x%x)\n",
__FUNCTION__,ib_status,ib_status);
#endif
return DAT_DTO_FAILURE;
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH] dapl-2.0: ibal: add missing windows makefile in git tree
From: Davis, Arlin R @ 2010-05-19 17:02 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/ibal/makefile | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
create mode 100644 dapl/ibal/makefile
diff --git a/dapl/ibal/makefile b/dapl/ibal/makefile
new file mode 100644
index 0000000..a0c0627
--- /dev/null
+++ b/dapl/ibal/makefile
@@ -0,0 +1,7 @@
+#
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source
+# file to this component. This file merely indirects to the real make file
+# that is shared by all the driver components of the OpenIB Windows project.
+#
+
+!INCLUDE ..\..\..\..\inc\openib.def
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH 2/2] compat-dapl-1.2: cma: memory leak of verbs CQ and completion channels created during dat_ia_open
From: Davis, Arlin R @ 2010-05-19 16:43 UTC (permalink / raw)
To: linux-rdma@vger.kernel.org, ofw_list
check/cleanup CQ and completion channels during dat_ia_close
Signed-off-by: Arlin Davis <arlin.r.davis@intel.com>
---
dapl/openib_cma/dapl_ib_util.c | 22 ++++++++++++++++------
1 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/dapl/openib_cma/dapl_ib_util.c b/dapl/openib_cma/dapl_ib_util.c
index 9d97ae1..00aa5fb 100755
--- a/dapl/openib_cma/dapl_ib_util.c
+++ b/dapl/openib_cma/dapl_ib_util.c
@@ -373,12 +373,6 @@ DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA *hca_ptr)
dapl_dbg_log(DAPL_DBG_TYPE_UTIL," close_hca: %p->%p\n",
hca_ptr,hca_ptr->ib_hca_handle);
- if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
- if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
- return(dapl_convert_errno(errno,"ib_close_device"));
- hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
- }
-
dapl_os_lock(&g_hca_lock);
if (g_ib_thread_state != IB_THREAD_RUN) {
dapl_os_unlock(&g_hca_lock);
@@ -410,6 +404,22 @@ DAT_RETURN dapls_ib_close_hca(IN DAPL_HCA *hca_ptr)
nanosleep (&sleep, &remain);
}
bail:
+ if (hca_ptr->ib_trans.ib_cq)
+ ibv_destroy_comp_channel(hca_ptr->ib_trans.ib_cq);
+
+ if (hca_ptr->ib_trans.ib_cq_empty) {
+ struct ibv_comp_channel *channel;
+ channel = hca_ptr->ib_trans.ib_cq_empty->channel;
+ ibv_destroy_cq(hca_ptr->ib_trans.ib_cq_empty);
+ ibv_destroy_comp_channel(channel);
+ }
+
+ if (hca_ptr->ib_hca_handle != IB_INVALID_HANDLE) {
+ if (rdma_destroy_id(hca_ptr->ib_trans.cm_id))
+ return (dapl_convert_errno(errno, "ib_close_device"));
+ hca_ptr->ib_hca_handle = IB_INVALID_HANDLE;
+ }
+
return (DAT_SUCCESS);
}
--
1.5.2.5
^ permalink raw reply related
* [PATCH 1/2] compat-dapl-1.2: cma: memory leak of FD's (pipe) created during dat_evd_create
From: Davis, Arlin R @ 2010-05-19 16:42 UTC (permalink / raw)
To: linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org, ofw_list
Add checking for pipe FD's during destroy and clean them up with close.
Signed-off-by: Arlin Davis <arlin.r.davis-ral2JQCrhuEAvxtiuMwx3w@public.gmane.org>
---
dapl/openib_cma/dapl_ib_cq.c | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/dapl/openib_cma/dapl_ib_cq.c b/dapl/openib_cma/dapl_ib_cq.c
index cf19f38..c54bbaf 100644
--- a/dapl/openib_cma/dapl_ib_cq.c
+++ b/dapl/openib_cma/dapl_ib_cq.c
@@ -462,8 +462,11 @@ dapls_ib_wait_object_create(IN DAPL_EVD *evd_ptr,
ibv_create_comp_channel(
evd_ptr->header.owner_ia->hca_ptr->ib_hca_handle);
- if ((*p_cq_wait_obj_handle)->events == NULL)
+ if ((*p_cq_wait_obj_handle)->events == NULL) {
+ close((*p_cq_wait_obj_handle)->pipe[0]);
+ close((*p_cq_wait_obj_handle)->pipe[1]);
goto bail;
+ }
return DAT_SUCCESS;
bail:
@@ -483,6 +486,9 @@ dapls_ib_wait_object_destroy(IN ib_wait_obj_handle_t p_cq_wait_obj_handle)
ibv_destroy_comp_channel(p_cq_wait_obj_handle->events);
+ close(p_cq_wait_obj_handle->pipe[0]);
+ close(p_cq_wait_obj_handle->pipe[1]);
+
dapl_os_free(p_cq_wait_obj_handle,
sizeof(struct _ib_wait_obj_handle));
--
1.5.2.5
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* Re: [PATCH v2] libibverbs: add path record definitions to sa.h
From: Steve Wise @ 2010-05-19 16:00 UTC (permalink / raw)
To: Walukiewicz, Miroslaw; +Cc: Roland Dreier, Hefty, Sean, linux-rdma
In-Reply-To: <BE2BFE91933D1B4089447C64486040801D4D8232-IGOiFh9zz4wLt2AQoY/u9bfspsVTdybXVpNB7YpNyf8@public.gmane.org>
Walukiewicz, Miroslaw wrote:
> Hello Steve,
>
> Do you plan some changes in the core code related to RAW_QPT?
>
>
The only changes I see needed to the kernel core is the mcast change you
already proposed to allow mcast attach/detach for RAW_ETY qps...
> Could you explain me better what means "priviledged interface" for you?
>
>
I just mean that allocating these raw qps should only be allowed by
effective UID 0. This is analogous to PF_PACKET sockets which are
privileged as well.
Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* RE: [PATCH v2] libibverbs: add path record definitions to sa.h
From: Walukiewicz, Miroslaw @ 2010-05-19 15:37 UTC (permalink / raw)
To: Steve Wise, Roland Dreier; +Cc: Hefty, Sean, linux-rdma
In-Reply-To: <4BF29E3D.40704-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>
Hello Steve,
Do you plan some changes in the core code related to RAW_QPT?
Could you explain me better what means "priviledged interface" for you?
Regards,
Mirek
-----Original Message-----
From: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org [mailto:linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org] On Behalf Of Steve Wise
Sent: Tuesday, May 18, 2010 4:04 PM
To: Roland Dreier
Cc: Hefty, Sean; linux-rdma
Subject: Re: [PATCH v2] libibverbs: add path record definitions to sa.h
Roland Dreier wrote:
> > Can you add the RAW_ETY qp type in this release as well?
>
> To be honest I haven't looked at the iWARP datagram stuff at all. I'm
> not sure overloading the RAW_ETY QP type is necessarily the right thing
> to do -- it has quite different (never implemented) semantics in the IB
> case. Is there any overview of what you guys are planning as far as
> how work requests are created for such QPs?
>
The RAW_ETY qp would be just that: A kernel-bypass/user mode qp that
allows sending/receiving ethernet packets. It would also provide a way
for user applications to join/leave ethernet mcast groups (which
requires an rdma core kernel change that Intel posted too). What the
iWARP vendors are doing on top of that is implementing some form of UDP
in user mode. The main goal here is to provide an ultra low latency UDP
multicast and unicast channel for important market segments that desire
this paradigm. Also, due to the nature of this (send/recv raw eth
frames), the interface would be privileged.
If you want to wait, then later I'll post patches on how this is being
done for cxgb4. But I thought adding the RAW_ETY was definitely a
common requirement for Intel and Chelsio.
Steve.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH V2 4/4] mlx4_ib: XRC RCV qp implementation.
From: Jack Morgenstein @ 2010-05-19 7:44 UTC (permalink / raw)
To: rolandd-FYB4Gu1CFyUAvxtiuMwx3w
Cc: Sean Hefty, linux-rdma-u79uwXL29TY76Z2rM5mHXA
RE-sending with correct subject line (should be V2).
Support for XRC RCV-only QP (requested by userspace,
but resides in kernel space).
Implements create_xrc_rcv_qp, modify_xrc_rcv_qp, query_xrc_rcv_qp,
and destroy_xrc_rcv_qp.
Since query_xrc_rcv_qp and query_qp are very similar, the common portions
have been placed in a helper function which is invoked by both query methods.
Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
---
drivers/infiniband/hw/mlx4/cq.c | 4 +-
drivers/infiniband/hw/mlx4/main.c | 60 ++++++++--
drivers/infiniband/hw/mlx4/mlx4_ib.h | 13 ++
drivers/infiniband/hw/mlx4/qp.c | 208 +++++++++++++++++++++++++++++++++-
4 files changed, 272 insertions(+), 13 deletions(-)
Index: infiniband/drivers/infiniband/hw/mlx4/cq.c
===================================================================
--- infiniband.orig/drivers/infiniband/hw/mlx4/cq.c
+++ infiniband/drivers/infiniband/hw/mlx4/cq.c
@@ -176,7 +176,7 @@ struct ib_cq *mlx4_ib_create_cq(struct i
if (entries < 1 || entries > dev->dev->caps.max_cqes)
return ERR_PTR(-EINVAL);
- cq = kmalloc(sizeof *cq, GFP_KERNEL);
+ cq = kzalloc(sizeof *cq, GFP_KERNEL);
if (!cq)
return ERR_PTR(-ENOMEM);
@@ -545,7 +545,7 @@ static int mlx4_ib_poll_one(struct mlx4_
struct mlx4_cqe *cqe;
struct mlx4_qp *mqp;
struct mlx4_ib_wq *wq;
- struct mlx4_ib_srq *srq;
+ struct mlx4_ib_srq *uninitialized_var(srq);
struct mlx4_srq *msrq;
int is_send;
int is_error;
Index: infiniband/drivers/infiniband/hw/mlx4/main.c
===================================================================
--- infiniband.orig/drivers/infiniband/hw/mlx4/main.c
+++ infiniband/drivers/infiniband/hw/mlx4/main.c
@@ -420,7 +420,7 @@ static struct ib_pd *mlx4_ib_alloc_pd(st
struct mlx4_ib_pd *pd;
int err;
- pd = kmalloc(sizeof *pd, GFP_KERNEL);
+ pd = kzalloc(sizeof *pd, GFP_KERNEL);
if (!pd)
return ERR_PTR(-ENOMEM);
@@ -462,12 +462,18 @@ static int mlx4_ib_mcg_detach(struct ib_
&to_mqp(ibqp)->mqp, gid->raw);
}
+static void mlx4_dummy_comp_handler(struct ib_cq *cq, void *cq_context)
+{
+}
+
static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev,
struct ib_ucontext *context,
struct ib_udata *udata)
{
struct mlx4_ib_xrcd *xrcd;
struct mlx4_ib_dev *mdev = to_mdev(ibdev);
+ struct ib_pd *pd;
+ struct ib_cq *cq;
int err;
if (!(mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
@@ -478,23 +484,51 @@ static struct ib_xrcd *mlx4_ib_alloc_xrc
return ERR_PTR(-ENOMEM);
err = mlx4_xrcd_alloc(mdev->dev, &xrcd->xrcdn);
- if (err) {
- kfree(xrcd);
- return ERR_PTR(err);
+ if (err)
+ goto err_xrcd;
+
+ pd = mlx4_ib_alloc_pd(ibdev, NULL, NULL);
+ if (IS_ERR(pd)) {
+ err = PTR_ERR(pd);
+ goto err_pd;
}
+ pd->device = ibdev;
+
+ cq = mlx4_ib_create_cq(ibdev, 1, 0, NULL, NULL);
+ if (IS_ERR(cq)) {
+ err = PTR_ERR(cq);
+ goto err_cq;
+ }
+ cq->device = ibdev;
+ cq->comp_handler = mlx4_dummy_comp_handler;
if (context)
if (ib_copy_to_udata(udata, &xrcd->xrcdn, sizeof(__u32))) {
- mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
- kfree(xrcd);
- return ERR_PTR(-EFAULT);
+ err = -EFAULT;
+ goto err_copy;
}
+ xrcd->cq = cq;
+ xrcd->pd = pd;
return &xrcd->ibxrcd;
+
+err_copy:
+ mlx4_ib_destroy_cq(cq);
+err_cq:
+ mlx4_ib_dealloc_pd(pd);
+err_pd:
+ mlx4_xrcd_free(mdev->dev, xrcd->xrcdn);
+err_xrcd:
+ kfree(xrcd);
+ return ERR_PTR(err);
}
static int mlx4_ib_dealloc_xrcd(struct ib_xrcd *xrcd)
{
+ struct mlx4_ib_xrcd *mxrcd = to_mxrcd(xrcd);
+
+ mlx4_ib_destroy_cq(mxrcd->cq);
+ mlx4_ib_dealloc_pd(mxrcd->pd);
mlx4_xrcd_free(to_mdev(xrcd->device)->dev, to_mxrcd(xrcd)->xrcdn);
kfree(xrcd);
@@ -700,18 +734,28 @@ static void *mlx4_ib_add(struct mlx4_dev
ibdev->ib_dev.create_xrc_srq = mlx4_ib_create_xrc_srq;
ibdev->ib_dev.alloc_xrcd = mlx4_ib_alloc_xrcd;
ibdev->ib_dev.dealloc_xrcd = mlx4_ib_dealloc_xrcd;
+ ibdev->ib_dev.create_xrc_rcv_qp = mlx4_ib_create_xrc_rcv_qp;
+ ibdev->ib_dev.modify_xrc_rcv_qp = mlx4_ib_modify_xrc_rcv_qp;
+ ibdev->ib_dev.query_xrc_rcv_qp = mlx4_ib_query_xrc_rcv_qp;
+ ibdev->ib_dev.destroy_xrc_rcv_qp = mlx4_ib_destroy_xrc_rcv_qp;
ibdev->ib_dev.uverbs_cmd_mask |=
(1ull << IB_USER_VERBS_CMD_CREATE_XRC_SRQ) |
(1ull << IB_USER_VERBS_CMD_OPEN_XRCD) |
- (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD);
+ (1ull << IB_USER_VERBS_CMD_CLOSE_XRCD) |
+ (1ull << IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_DESTROY_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_REG_XRC_RCV_QP) |
+ (1ull << IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP);
}
-
if (init_node_data(ibdev))
goto err_map;
spin_lock_init(&ibdev->sm_lock);
mutex_init(&ibdev->cap_mask_mutex);
+ mutex_init(&ibdev->xrc_rcv_mutex);
if (ib_register_device(&ibdev->ib_dev))
goto err_map;
Index: infiniband/drivers/infiniband/hw/mlx4/mlx4_ib.h
===================================================================
--- infiniband.orig/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ infiniband/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -59,6 +59,8 @@ struct mlx4_ib_pd {
struct mlx4_ib_xrcd {
struct ib_xrcd ibxrcd;
u32 xrcdn;
+ struct ib_pd *pd;
+ struct ib_cq *cq;
};
struct mlx4_ib_cq_buf {
@@ -115,6 +117,7 @@ struct mlx4_ib_wq {
enum mlx4_ib_qp_flags {
MLX4_IB_QP_LSO = 1 << 0,
MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1,
+ MLX4_IB_XRC_RCV = 1 << 2,
};
struct mlx4_ib_qp {
@@ -181,6 +184,7 @@ struct mlx4_ib_dev {
spinlock_t sm_lock;
struct mutex cap_mask_mutex;
+ struct mutex xrc_rcv_mutex;
bool ib_active;
};
@@ -329,6 +333,15 @@ int mlx4_ib_map_phys_fmr(struct ib_fmr *
u64 iova);
int mlx4_ib_unmap_fmr(struct list_head *fmr_list);
int mlx4_ib_fmr_dealloc(struct ib_fmr *fmr);
+int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
+ u32 *qp_num);
+int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
+ struct ib_qp_attr *attr, int attr_mask);
+int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num,
+ struct ib_qp_attr *attr, int attr_mask,
+ struct ib_qp_init_attr *init_attr);
+int mlx4_ib_destroy_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num);
+
static inline int mlx4_ib_ah_grh_present(struct mlx4_ib_ah *ah)
{
Index: infiniband/drivers/infiniband/hw/mlx4/qp.c
===================================================================
--- infiniband.orig/drivers/infiniband/hw/mlx4/qp.c
+++ infiniband/drivers/infiniband/hw/mlx4/qp.c
@@ -59,6 +59,7 @@ enum {
MLX4_IB_LSO_HEADER_SPARE = 128,
};
+
struct mlx4_ib_sqp {
struct mlx4_ib_qp qp;
int pkey_index;
@@ -209,14 +210,14 @@ static inline unsigned pad_wraparound(st
static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type)
{
struct ib_event event;
- struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
+ struct mlx4_ib_qp *mqp = to_mibqp(qp);
+ struct ib_qp *ibqp = &mqp->ibqp;
if (type == MLX4_EVENT_TYPE_PATH_MIG)
to_mibqp(qp)->port = to_mibqp(qp)->alt_port;
if (ibqp->event_handler) {
event.device = ibqp->device;
- event.element.qp = ibqp;
switch (type) {
case MLX4_EVENT_TYPE_PATH_MIG:
event.event = IB_EVENT_PATH_MIG;
@@ -248,6 +249,12 @@ static void mlx4_ib_qp_event(struct mlx4
return;
}
+ if (unlikely(ibqp->qp_type == IB_QPT_XRC &&
+ mqp->flags & MLX4_IB_XRC_RCV)) {
+ event.event |= IB_XRC_QP_EVENT_FLAG;
+ event.element.xrc_qp_num = ibqp->qp_num;
+ } else
+ event.element.qp = ibqp;
ibqp->event_handler(&event, ibqp->qp_context);
}
}
@@ -1886,27 +1893,15 @@ static void to_ib_ah_attr(struct mlx4_de
}
}
-int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
- struct ib_qp_init_attr *qp_init_attr)
+int mlx4_ib_query_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
+ struct ib_qp_attr *qp_attr,
+ struct ib_qp_init_attr *qp_init_attr)
{
- struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
- struct mlx4_ib_qp *qp = to_mqp(ibqp);
struct mlx4_qp_context context;
int mlx4_state;
- int err = 0;
- mutex_lock(&qp->mutex);
-
- if (qp->state == IB_QPS_RESET) {
- qp_attr->qp_state = IB_QPS_RESET;
- goto done;
- }
-
- err = mlx4_qp_query(dev->dev, &qp->mqp, &context);
- if (err) {
- err = -EINVAL;
- goto out;
- }
+ if (mlx4_qp_query(dev->dev, &qp->mqp, &context))
+ return -EINVAL;
mlx4_state = be32_to_cpu(context.flags) >> 28;
@@ -1950,7 +1945,26 @@ int mlx4_ib_query_qp(struct ib_qp *ibqp,
qp_attr->rnr_retry = (be32_to_cpu(context.params1) >> 13) & 0x7;
qp_attr->alt_timeout = context.alt_path.ackto >> 3;
-done:
+ return 0;
+}
+
+int mlx4_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibqp->device);
+ struct mlx4_ib_qp *qp = to_mqp(ibqp);
+ int err = 0;
+
+ mutex_lock(&qp->mutex);
+
+ if (qp->state == IB_QPS_RESET)
+ qp_attr->qp_state = IB_QPS_RESET;
+ else {
+ err = mlx4_ib_query_qp_common(dev, qp, qp_attr, qp_init_attr);
+ if (err)
+ goto out;
+ }
+
qp_attr->cur_qp_state = qp_attr->qp_state;
qp_attr->cap.max_recv_wr = qp->rq.wqe_cnt;
qp_attr->cap.max_recv_sge = qp->rq.max_gs;
@@ -1983,3 +1997,164 @@ out:
return err;
}
+int mlx4_ib_create_xrc_rcv_qp(struct ib_qp_init_attr *init_attr,
+ u32 *qp_num)
+{
+ struct mlx4_ib_dev *dev = to_mdev(init_attr->xrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(init_attr->xrcd);
+ struct ib_qp_init_attr lcl_init;
+ struct mlx4_ib_qp *qp;
+ struct ib_qp *ibqp;
+ int err;
+
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -ENOSYS;
+
+ memcpy(&lcl_init, init_attr, sizeof(struct ib_qp_init_attr));
+ lcl_init.qp_type = IB_QPT_XRC;
+ lcl_init.sq_sig_type = 0;
+ lcl_init.cap.max_inline_data = 0;
+ lcl_init.cap.max_recv_sge = 0;
+ lcl_init.cap.max_recv_wr = 0;
+ lcl_init.cap.max_send_sge = 1;
+ lcl_init.cap.max_send_wr = 1;
+
+ qp = kzalloc(sizeof *qp, GFP_KERNEL);
+ if (!qp)
+ return -ENOMEM;
+
+ qp->flags = MLX4_IB_XRC_RCV;
+ qp->xrcdn = xrcd->xrcdn;
+ err = create_qp_common(dev, xrcd->pd, &lcl_init, NULL, 0, qp);
+ if (err) {
+ kfree(qp);
+ return err;
+ }
+
+ ibqp = &qp->ibqp;
+ /* set the ibpq attributes which will be used by the mlx4 module */
+ ibqp->qp_num = qp->mqp.qpn;
+ ibqp->device = lcl_init.xrcd->device;
+ ibqp->pd = xrcd->pd;
+ ibqp->send_cq = ibqp->recv_cq = xrcd->cq;
+ ibqp->event_handler = lcl_init.event_handler;
+ ibqp->qp_context = lcl_init.qp_context;
+ ibqp->qp_type = lcl_init.qp_type;
+ ibqp->xrcd = lcl_init.xrcd;
+
+ *qp_num = qp->mqp.qpn;
+ return 0;
+}
+
+int mlx4_ib_modify_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
+ struct ib_qp_attr *attr, int attr_mask)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
+ struct mlx4_ib_qp *qp;
+ struct mlx4_qp *mqp;
+ int err = -EINVAL;
+
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -ENOSYS;
+
+ mutex_lock(&dev->xrc_rcv_mutex);
+ mqp = __mlx4_qp_lookup(dev->dev, qp_num);
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "mlx4_ib_modify_xrc_rcv_qp: "
+ "unknown QPN %06x\n", qp_num);
+ goto out;
+ }
+
+ qp = to_mibqp(mqp);
+ if (qp->ibqp.qp_type != IB_QPT_XRC || !(qp->flags & MLX4_IB_XRC_RCV) ||
+ !qp->ibqp.xrcd || xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
+ goto out;
+
+ err = mlx4_ib_modify_qp(&qp->ibqp, attr, attr_mask, NULL);
+
+out:
+ mutex_unlock(&dev->xrc_rcv_mutex);
+ return err;
+}
+
+int mlx4_ib_query_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num,
+ struct ib_qp_attr *qp_attr, int qp_attr_mask,
+ struct ib_qp_init_attr *qp_init_attr)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
+ struct mlx4_ib_qp *qp;
+ struct mlx4_qp *mqp;
+ int err = -EINVAL;
+
+ if (!(dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC))
+ return -ENOSYS;
+
+ mutex_lock(&dev->xrc_rcv_mutex);
+ mqp = __mlx4_qp_lookup(dev->dev, qp_num);
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "mlx4_ib_query_xrc_rcv_qp: "
+ "unknown QPN %06x\n", qp_num);
+ goto err_out;
+ }
+
+ qp = to_mibqp(mqp);
+ if (qp->ibqp.qp_type != IB_QPT_XRC || !(qp->flags & MLX4_IB_XRC_RCV) ||
+ !qp->ibqp.xrcd || xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
+ goto err_out;
+
+ if (qp->state == IB_QPS_RESET)
+ qp_attr->qp_state = IB_QPS_RESET;
+ else {
+ err = mlx4_ib_query_qp_common(dev, qp, qp_attr, qp_init_attr);
+ if (err)
+ goto err_out;
+ }
+
+ qp_attr->cur_qp_state = qp_attr->qp_state;
+ qp_attr->cap.max_recv_wr = 0;
+ qp_attr->cap.max_recv_sge = 0;
+ qp_attr->cap.max_send_wr = 0;
+ qp_attr->cap.max_send_sge = 0;
+ qp_attr->cap.max_inline_data = 0;
+ qp_init_attr->cap = qp_attr->cap;
+
+ err = 0;
+
+err_out:
+ mutex_unlock(&dev->xrc_rcv_mutex);
+ return err;
+}
+
+int mlx4_ib_destroy_xrc_rcv_qp(struct ib_xrcd *ibxrcd, u32 qp_num)
+{
+ struct mlx4_ib_dev *dev = to_mdev(ibxrcd->device);
+ struct mlx4_ib_xrcd *xrcd = to_mxrcd(ibxrcd);
+ struct mlx4_ib_qp *qp;
+ struct mlx4_qp *mqp;
+ int err = -EINVAL;
+
+ mutex_lock(&dev->xrc_rcv_mutex);
+ mqp = __mlx4_qp_lookup(dev->dev, qp_num);
+ if (unlikely(!mqp)) {
+ printk(KERN_WARNING "mlx4_ib_destroy_xrc_rcv_qp: "
+ "unknown QPN %06x\n", qp_num);
+ goto out;
+ }
+
+ qp = to_mibqp(mqp);
+
+ if (qp->ibqp.qp_type != IB_QPT_XRC || !(qp->flags & MLX4_IB_XRC_RCV) ||
+ !qp->ibqp.xrcd || xrcd->xrcdn != to_mxrcd(qp->ibqp.xrcd)->xrcdn)
+ goto out;
+
+ mlx4_ib_destroy_qp(&qp->ibqp);
+
+ err = 0;
+
+out:
+ mutex_unlock(&dev->xrc_rcv_mutex);
+ return err;
+}
+
-------------------------------------------------------
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH V3 2/4] ib_uverbs: XRC RCV qp implementation
From: Jack Morgenstein @ 2010-05-19 7:42 UTC (permalink / raw)
To: Sean Hefty
Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, linux-rdma-u79uwXL29TY76Z2rM5mHXA
Implement XRC target QPs (xrc rcv qps) for userspace.
The basic verbs are: create/modify/query/destroy.
In addition, added two additional verbs -- register and unregister.
The motivation for register/unregister comes from MPI.
MPI requires XRC receive QPs which are not destroyed when the creating
process terminates (but persist so that other processes may still use
them as XRC targets).
Solution: Userspace requests that a QP be created in kernel space.
Each userspace process using that QP (i.e. receiving packets on an XRC SRQ
via the qp), registers with that QP. When the last userspace process unregisters with
the QP, it is destroyed. Unregistration is also part of userspace process
cleanup, so there is no leakage.
This patch implements the kernel procedures to implement the following
(new) libibverbs API:
ibv_create_xrc_rcv_qp
ibv_modify_xrc_rcv_qp
ibv_query_xrc_rcv_qp
ibv_destroy_xrc_rcv_qp
ibv_reg_xrc_rcv_qp
ibv_unreg_xrc_rcv_qp
Note that users who wish to make use of the reg/unreg capability should
never call the destroy verb -- the XRC RCV qp is automatically destroyed
when all registered processes have unregistered (or terminated).
In this case, the process which called "create" may also unregister (or do
nothing, and when it terminates, its reference to that QP is removed).
Thus, usage is:
Either: create/modify/query/destroy_xrc_rcv_qp
Or: create/modify/query/reg/unreg_xrc_rcv_qp
V3: renamed ib_xrc_rcv_table_cleanup to ib_xrc_rcv_qp_table_cleanup for consistency.
>From 6571bf63dbbbca0e95faabd81a9e57f908d7df17 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
Date: Mon, 10 May 2010 20:23:24 +0300
Subject: [PATCH 4/4] ib_uverbs: XRC RCV qp implementation.
Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
---
drivers/infiniband/core/uverbs.h | 8 +
drivers/infiniband/core/uverbs_cmd.c | 304 +++++++++++++++++++++++++++++++++
drivers/infiniband/core/uverbs_main.c | 15 ++
drivers/infiniband/hw/mlx4/main.c | 4 +-
include/rdma/ib_user_verbs.h | 87 +++++++++-
5 files changed, 416 insertions(+), 2 deletions(-)
Index: infiniband/drivers/infiniband/core/uverbs.h
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs.h
+++ infiniband/drivers/infiniband/core/uverbs.h
@@ -175,6 +175,8 @@ void ib_uverbs_qp_event_handler(struct i
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr);
void ib_uverbs_event_handler(struct ib_event_handler *handler,
struct ib_event *event);
+void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
+ void *context_ptr);
void ib_uverbs_dealloc_xrcd(struct ib_uverbs_device *dev,
struct ib_xrcd *xrcd);
@@ -214,5 +216,11 @@ IB_UVERBS_DECLARE_CMD(destroy_srq);
IB_UVERBS_DECLARE_CMD(create_xrc_srq);
IB_UVERBS_DECLARE_CMD(open_xrcd);
IB_UVERBS_DECLARE_CMD(close_xrcd);
+IB_UVERBS_DECLARE_CMD(create_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(modify_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(query_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(destroy_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(reg_xrc_rcv_qp);
+IB_UVERBS_DECLARE_CMD(unreg_xrc_rcv_qp);
#endif /* UVERBS_H */
Index: infiniband/drivers/infiniband/core/uverbs_cmd.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs_cmd.c
+++ infiniband/drivers/infiniband/core/uverbs_cmd.c
@@ -2661,3 +2661,311 @@ void ib_uverbs_dealloc_xrcd(struct ib_uv
if (inode)
xrcd_table_delete(dev, inode);
}
+
+ssize_t ib_uverbs_create_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_create_xrc_rcv_qp cmd;
+ struct ib_uverbs_create_xrc_rcv_qp_resp resp;
+ struct ib_qp_init_attr init_attr = {0};
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *xrcd_uobj;
+ u32 qp_num;
+ int err;
+
+ if (out_len < sizeof resp)
+ return -ENOSPC;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ xrcd = idr_read_xrcd(cmd.xrcd_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd)
+ return -EINVAL;
+
+ init_attr.event_handler = ib_uverbs_xrc_rcv_qp_event_handler;
+ init_attr.qp_context = file;
+ init_attr.xrcd = xrcd;
+
+ err = ib_create_xrc_rcv_qp(xrcd, &init_attr, &qp_num);
+ if (err)
+ goto err_put;
+
+ memset(&resp, 0, sizeof resp);
+ resp.qpn = qp_num;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp)) {
+ err = -EFAULT;
+ goto err_destroy;
+ }
+
+ put_uobj_read(xrcd_uobj);
+
+ return in_len;
+
+err_destroy:
+ ib_destroy_xrc_rcv_qp(xrcd, qp_num);
+err_put:
+ put_uobj_read(xrcd_uobj);
+ return err;
+}
+
+ssize_t ib_uverbs_modify_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_modify_xrc_rcv_qp cmd;
+ struct ib_qp_attr *attr;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *xrcd_uobj;
+ int err;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kzalloc(sizeof *attr, GFP_KERNEL);
+ if (!attr)
+ return -ENOMEM;
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd) {
+ kfree(attr);
+ return -EINVAL;
+ }
+
+ attr->qp_state = cmd.qp_state;
+ attr->cur_qp_state = cmd.cur_qp_state;
+ attr->qp_access_flags = cmd.qp_access_flags;
+ attr->pkey_index = cmd.pkey_index;
+ attr->port_num = cmd.port_num;
+ attr->path_mtu = cmd.path_mtu;
+ attr->path_mig_state = cmd.path_mig_state;
+ attr->qkey = cmd.qkey;
+ attr->rq_psn = cmd.rq_psn;
+ attr->sq_psn = cmd.sq_psn;
+ attr->dest_qp_num = cmd.dest_qp_num;
+ attr->alt_pkey_index = cmd.alt_pkey_index;
+ attr->en_sqd_async_notify = cmd.en_sqd_async_notify;
+ attr->max_rd_atomic = cmd.max_rd_atomic;
+ attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic;
+ attr->min_rnr_timer = cmd.min_rnr_timer;
+ attr->port_num = cmd.port_num;
+ attr->timeout = cmd.timeout;
+ attr->retry_cnt = cmd.retry_cnt;
+ attr->rnr_retry = cmd.rnr_retry;
+ attr->alt_port_num = cmd.alt_port_num;
+ attr->alt_timeout = cmd.alt_timeout;
+
+ memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16);
+ attr->ah_attr.grh.flow_label = cmd.dest.flow_label;
+ attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index;
+ attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit;
+ attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class;
+ attr->ah_attr.dlid = cmd.dest.dlid;
+ attr->ah_attr.sl = cmd.dest.sl;
+ attr->ah_attr.src_path_bits = cmd.dest.src_path_bits;
+ attr->ah_attr.static_rate = cmd.dest.static_rate;
+ attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0;
+ attr->ah_attr.port_num = cmd.dest.port_num;
+
+ memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16);
+ attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label;
+ attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index;
+ attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit;
+ attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class;
+ attr->alt_ah_attr.dlid = cmd.alt_dest.dlid;
+ attr->alt_ah_attr.sl = cmd.alt_dest.sl;
+ attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits;
+ attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate;
+ attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0;
+ attr->alt_ah_attr.port_num = cmd.alt_dest.port_num;
+
+ err = xrcd->device->modify_xrc_rcv_qp(xrcd, cmd.qp_num, attr, cmd.attr_mask);
+ put_uobj_read(xrcd_uobj);
+ kfree(attr);
+ return err ? err : in_len;
+}
+
+ssize_t ib_uverbs_query_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_query_xrc_rcv_qp cmd;
+ struct ib_uverbs_query_qp_resp resp;
+ struct ib_qp_attr *attr;
+ struct ib_qp_init_attr *init_attr;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *xrcd_uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ attr = kmalloc(sizeof *attr, GFP_KERNEL);
+ init_attr = kmalloc(sizeof *init_attr, GFP_KERNEL);
+ if (!attr || !init_attr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = xrcd->device->query_xrc_rcv_qp(xrcd, cmd.qp_num, attr,
+ cmd.attr_mask, init_attr);
+
+ put_uobj_read(xrcd_uobj);
+
+ if (ret)
+ goto out;
+
+ memset(&resp, 0, sizeof resp);
+ resp.qp_state = attr->qp_state;
+ resp.cur_qp_state = attr->cur_qp_state;
+ resp.path_mtu = attr->path_mtu;
+ resp.path_mig_state = attr->path_mig_state;
+ resp.qkey = attr->qkey;
+ resp.rq_psn = attr->rq_psn;
+ resp.sq_psn = attr->sq_psn;
+ resp.dest_qp_num = attr->dest_qp_num;
+ resp.qp_access_flags = attr->qp_access_flags;
+ resp.pkey_index = attr->pkey_index;
+ resp.alt_pkey_index = attr->alt_pkey_index;
+ resp.sq_draining = attr->sq_draining;
+ resp.max_rd_atomic = attr->max_rd_atomic;
+ resp.max_dest_rd_atomic = attr->max_dest_rd_atomic;
+ resp.min_rnr_timer = attr->min_rnr_timer;
+ resp.port_num = attr->port_num;
+ resp.timeout = attr->timeout;
+ resp.retry_cnt = attr->retry_cnt;
+ resp.rnr_retry = attr->rnr_retry;
+ resp.alt_port_num = attr->alt_port_num;
+ resp.alt_timeout = attr->alt_timeout;
+
+ memcpy(resp.dest.dgid, attr->ah_attr.grh.dgid.raw, 16);
+ resp.dest.flow_label = attr->ah_attr.grh.flow_label;
+ resp.dest.sgid_index = attr->ah_attr.grh.sgid_index;
+ resp.dest.hop_limit = attr->ah_attr.grh.hop_limit;
+ resp.dest.traffic_class = attr->ah_attr.grh.traffic_class;
+ resp.dest.dlid = attr->ah_attr.dlid;
+ resp.dest.sl = attr->ah_attr.sl;
+ resp.dest.src_path_bits = attr->ah_attr.src_path_bits;
+ resp.dest.static_rate = attr->ah_attr.static_rate;
+ resp.dest.is_global = !!(attr->ah_attr.ah_flags & IB_AH_GRH);
+ resp.dest.port_num = attr->ah_attr.port_num;
+
+ memcpy(resp.alt_dest.dgid, attr->alt_ah_attr.grh.dgid.raw, 16);
+ resp.alt_dest.flow_label = attr->alt_ah_attr.grh.flow_label;
+ resp.alt_dest.sgid_index = attr->alt_ah_attr.grh.sgid_index;
+ resp.alt_dest.hop_limit = attr->alt_ah_attr.grh.hop_limit;
+ resp.alt_dest.traffic_class = attr->alt_ah_attr.grh.traffic_class;
+ resp.alt_dest.dlid = attr->alt_ah_attr.dlid;
+ resp.alt_dest.sl = attr->alt_ah_attr.sl;
+ resp.alt_dest.src_path_bits = attr->alt_ah_attr.src_path_bits;
+ resp.alt_dest.static_rate = attr->alt_ah_attr.static_rate;
+ resp.alt_dest.is_global = !!(attr->alt_ah_attr.ah_flags & IB_AH_GRH);
+ resp.alt_dest.port_num = attr->alt_ah_attr.port_num;
+
+ resp.max_send_wr = init_attr->cap.max_send_wr;
+ resp.max_recv_wr = init_attr->cap.max_recv_wr;
+ resp.max_send_sge = init_attr->cap.max_send_sge;
+ resp.max_recv_sge = init_attr->cap.max_recv_sge;
+ resp.max_inline_data = init_attr->cap.max_inline_data;
+ resp.sq_sig_all = init_attr->sq_sig_type == IB_SIGNAL_ALL_WR;
+
+ if (copy_to_user((void __user *) (unsigned long) cmd.response,
+ &resp, sizeof resp))
+ ret = -EFAULT;
+
+out:
+ kfree(attr);
+ kfree(init_attr);
+
+ return ret ? ret : in_len;
+}
+
+ssize_t ib_uverbs_reg_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_reg_xrc_rcv_qp cmd;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *xrcd_uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd)
+ return -EINVAL;
+
+ ret = ib_reg_xrc_rcv_qp(xrcd, file, cmd.qp_num);
+ if (ret)
+ goto err_put;
+
+ put_uobj_read(xrcd_uobj);
+ return in_len;
+
+err_put:
+ put_uobj_read(xrcd_uobj);
+ return ret;
+}
+
+ssize_t ib_uverbs_unreg_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_unreg_xrc_rcv_qp cmd;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *xrcd_uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd)
+ return -EINVAL;
+
+ ret = ib_unreg_xrc_rcv_qp(xrcd, file, cmd.qp_num);
+ if (ret) {
+ put_uobj_read(xrcd_uobj);
+ return -EINVAL;
+ }
+
+ put_uobj_read(xrcd_uobj);
+ return in_len;
+}
+
+ssize_t ib_uverbs_destroy_xrc_rcv_qp(struct ib_uverbs_file *file,
+ const char __user *buf, int in_len,
+ int out_len)
+{
+ struct ib_uverbs_unreg_xrc_rcv_qp cmd;
+ struct ib_xrcd *xrcd;
+ struct ib_uobject *xrcd_uobj;
+ int ret;
+
+ if (copy_from_user(&cmd, buf, sizeof cmd))
+ return -EFAULT;
+
+ xrcd = idr_read_xrcd(cmd.xrc_domain_handle, file->ucontext, &xrcd_uobj);
+ if (!xrcd)
+ return -EINVAL;
+
+ ret = ib_destroy_xrc_rcv_qp(xrcd, cmd.qp_num);
+ if (ret) {
+ put_uobj_read(xrcd_uobj);
+ return -EINVAL;
+ }
+
+ put_uobj_read(xrcd_uobj);
+ return in_len;
+}
+
Index: infiniband/drivers/infiniband/core/uverbs_main.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/uverbs_main.c
+++ infiniband/drivers/infiniband/core/uverbs_main.c
@@ -111,6 +111,12 @@ static ssize_t (*uverbs_cmd_table[])(str
[IB_USER_VERBS_CMD_CREATE_XRC_SRQ] = ib_uverbs_create_xrc_srq,
[IB_USER_VERBS_CMD_OPEN_XRCD] = ib_uverbs_open_xrcd,
[IB_USER_VERBS_CMD_CLOSE_XRCD] = ib_uverbs_close_xrcd,
+ [IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP] = ib_uverbs_create_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP] = ib_uverbs_modify_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP] = ib_uverbs_query_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_DESTROY_XRC_RCV_QP] = ib_uverbs_destroy_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_REG_XRC_RCV_QP] = ib_uverbs_reg_xrc_rcv_qp,
+ [IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP] = ib_uverbs_unreg_xrc_rcv_qp,
};
static void ib_uverbs_add_one(struct ib_device *device);
@@ -246,6 +252,8 @@ static int ib_uverbs_cleanup_ucontext(st
kfree(uobj);
}
+ ib_xrc_rcv_qp_table_cleanup(context->device, file);
+
mutex_lock(&file->device->xrcd_tree_mutex);
list_for_each_entry_safe(uobj, tmp, &context->xrcd_list, list) {
struct ib_xrcd *xrcd = uobj->object;
@@ -479,6 +487,13 @@ void ib_uverbs_qp_event_handler(struct i
&uobj->events_reported);
}
+void ib_uverbs_xrc_rcv_qp_event_handler(struct ib_event *event,
+ void *context_ptr)
+{
+ ib_uverbs_async_handler(context_ptr, event->element.xrc_qp_num,
+ event->event, NULL, NULL);
+}
+
void ib_uverbs_srq_event_handler(struct ib_event *event, void *context_ptr)
{
struct ib_uevent_object *uobj;
Index: infiniband/include/rdma/ib_user_verbs.h
===================================================================
--- infiniband.orig/include/rdma/ib_user_verbs.h
+++ infiniband/include/rdma/ib_user_verbs.h
@@ -84,7 +84,13 @@ enum {
IB_USER_VERBS_CMD_POST_SRQ_RECV,
IB_USER_VERBS_CMD_CREATE_XRC_SRQ,
IB_USER_VERBS_CMD_OPEN_XRCD,
- IB_USER_VERBS_CMD_CLOSE_XRCD
+ IB_USER_VERBS_CMD_CLOSE_XRCD,
+ IB_USER_VERBS_CMD_CREATE_XRC_RCV_QP,
+ IB_USER_VERBS_CMD_MODIFY_XRC_RCV_QP,
+ IB_USER_VERBS_CMD_QUERY_XRC_RCV_QP,
+ IB_USER_VERBS_CMD_REG_XRC_RCV_QP,
+ IB_USER_VERBS_CMD_UNREG_XRC_RCV_QP,
+ IB_USER_VERBS_CMD_DESTROY_XRC_RCV_QP,
};
/*
@@ -719,4 +725,76 @@ struct ib_uverbs_close_xrcd {
__u64 driver_data[0];
};
+struct ib_uverbs_create_xrc_rcv_qp {
+ __u64 response;
+ __u64 user_handle;
+ __u32 xrcd_handle;
+ __u8 reserved1[28];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_create_xrc_rcv_qp_resp {
+ __u32 qpn;
+ __u32 reserved;
+};
+
+struct ib_uverbs_modify_xrc_rcv_qp {
+ __u32 xrc_domain_handle;
+ __u32 qp_num;
+ struct ib_uverbs_qp_dest dest;
+ struct ib_uverbs_qp_dest alt_dest;
+ __u32 attr_mask;
+ __u32 qkey;
+ __u32 rq_psn;
+ __u32 sq_psn;
+ __u32 dest_qp_num;
+ __u32 qp_access_flags;
+ __u16 pkey_index;
+ __u16 alt_pkey_index;
+ __u8 qp_state;
+ __u8 cur_qp_state;
+ __u8 path_mtu;
+ __u8 path_mig_state;
+ __u8 en_sqd_async_notify;
+ __u8 max_rd_atomic;
+ __u8 max_dest_rd_atomic;
+ __u8 min_rnr_timer;
+ __u8 port_num;
+ __u8 timeout;
+ __u8 retry_cnt;
+ __u8 rnr_retry;
+ __u8 alt_port_num;
+ __u8 alt_timeout;
+ __u8 reserved[6];
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_query_xrc_rcv_qp {
+ __u64 response;
+ __u32 xrc_domain_handle;
+ __u32 qp_num;
+ __u32 attr_mask;
+ __u32 reserved;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_destroy_xrc_rcv_qp {
+ __u32 xrc_domain_handle;
+ __u32 qp_num;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_reg_xrc_rcv_qp {
+ __u32 xrc_domain_handle;
+ __u32 qp_num;
+ __u64 driver_data[0];
+};
+
+struct ib_uverbs_unreg_xrc_rcv_qp {
+ __u32 xrc_domain_handle;
+ __u32 qp_num;
+ __u64 driver_data[0];
+};
+
+
#endif /* IB_USER_VERBS_H */
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH V3 1/4] ib_core: changes to support XRC RCV qp's
From: Jack Morgenstein @ 2010-05-19 7:42 UTC (permalink / raw)
To: Sean Hefty
Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, Tziporet Koren,
linux-rdma-u79uwXL29TY76Z2rM5mHXA
The core layer does reference counting on XRC RCV qp's,
and also is responsible for distributing async events generated
for XRC RCV qp's to all registered processes.
Methods: ib_create_xrc_rcv_qp, ib_destroy_xrc_rcv_qp, ib_reg_xrc_rcv_qp
and ib_unreg_xrc_rcv_qp -- and a XRC RCV QP cleanup function called
when a process terminates (this function removes all registrations for
that process, and destroys any XRC RCV QPs which have no processes registered
after the cleanup).
All other functions serve to support keeping track of the XRC RCV qp's
(in a radix tree), and to distribute the async events.
V3: Fixed bug reported by Sean, remove EXPORT_SYMBOL from internal procedures
ib_xrc_rcv_qp_table_xxx, and renamed ib_xrc_rcv_table_cleanup to
ib_xrc_rcv_qp_table_cleanup for consistency.
Signed-off-by: Jack Morgenstein <jackm-LDSdmyG8hGV8YrgS2mwiifqBs+8SCbDb@public.gmane.org>
---
drivers/infiniband/core/device.c | 6 +-
drivers/infiniband/core/verbs.c | 275 ++++++++++++++++++++++++++++++++++++++
include/rdma/ib_verbs.h | 45 ++++++
3 files changed, 325 insertions(+), 1 deletions(-)
Index: infiniband/drivers/infiniband/core/device.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/device.c
+++ infiniband/drivers/infiniband/core/device.c
@@ -172,9 +172,13 @@ static int end_port(struct ib_device *de
*/
struct ib_device *ib_alloc_device(size_t size)
{
+ struct ib_device *ibdev;
BUG_ON(size < sizeof (struct ib_device));
- return kzalloc(size, GFP_KERNEL);
+ ibdev = kzalloc(size, GFP_KERNEL);
+ if (ibdev)
+ ib_xrc_rcv_qp_table_init(ibdev);
+ return ibdev;
}
EXPORT_SYMBOL(ib_alloc_device);
Index: infiniband/drivers/infiniband/core/verbs.c
===================================================================
--- infiniband.orig/drivers/infiniband/core/verbs.c
+++ infiniband/drivers/infiniband/core/verbs.c
@@ -39,6 +39,8 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/slab.h>
#include <rdma/ib_verbs.h>
#include <rdma/ib_cache.h>
@@ -1030,3 +1032,276 @@ int ib_dealloc_xrcd(struct ib_xrcd *xrcd
return xrcd->device->dealloc_xrcd(xrcd);
}
EXPORT_SYMBOL(ib_dealloc_xrcd);
+
+void ib_xrc_rcv_qp_table_init(struct ib_device *dev)
+{
+ spin_lock_init(&dev->xrc_rcv_qp_table_lock);
+ INIT_RADIX_TREE(&dev->xrc_rcv_qp_table, GFP_ATOMIC);
+}
+
+struct ib_xrc_rcv_qp_table_entry *
+ib_xrc_rcv_tbl_find(struct ib_device *dev, u32 qpn)
+{
+ return radix_tree_lookup(&dev->xrc_rcv_qp_table, qpn);
+}
+
+int ib_xrc_rcv_qp_table_new(struct ib_device *dev, void *event_handler,
+ u32 qpn, struct ib_xrcd *xrcd, void *context)
+{
+ struct ib_xrc_rcv_qp_table_entry *qp;
+ struct ib_xrc_rcv_reg_entry *reg_entry;
+ unsigned long flags;
+ int rc = -ENOMEM;
+
+ qp = kzalloc(sizeof *qp, GFP_ATOMIC);
+ if (!qp)
+ return -ENOMEM;
+
+ reg_entry = kzalloc(sizeof *reg_entry, GFP_KERNEL);
+ if (!reg_entry)
+ goto out_alloc;
+
+ INIT_LIST_HEAD(&qp->list);
+ qp->event_handler = event_handler;
+ qp->xrcd = xrcd;
+ qp->qpn = qpn;
+
+ reg_entry->context = context;
+ list_add_tail(®_entry->list, &qp->list);
+
+ spin_lock_irqsave(&dev->xrc_rcv_qp_table_lock, flags);
+ rc = radix_tree_insert(&dev->xrc_rcv_qp_table, qpn, qp);
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+ if (rc)
+ goto out_radix;
+ atomic_inc(&xrcd->usecnt);
+ return 0;
+
+out_radix:
+ kfree(reg_entry);
+out_alloc:
+ kfree(qp);
+ return rc;
+}
+
+
+int ib_xrc_rcv_qp_table_add_reg_entry(struct ib_device *dev, u32 qpn,
+ void *context)
+{
+ struct ib_xrc_rcv_reg_entry *reg_entry, *tmp;
+ struct ib_xrc_rcv_qp_table_entry *qp;
+ unsigned long flags;
+ int err = -EINVAL, found = 0;
+
+ reg_entry = kzalloc(sizeof *reg_entry, GFP_KERNEL);
+ if (!reg_entry)
+ return -ENOMEM;
+ reg_entry->context = context;
+
+ spin_lock_irqsave(&dev->xrc_rcv_qp_table_lock, flags);
+ qp = ib_xrc_rcv_tbl_find(dev, qpn);
+ if (unlikely(!qp))
+ goto free_out;
+ list_for_each_entry(tmp, &qp->list, list)
+ if (tmp->context == context) {
+ found = 1;
+ break;
+ }
+ /* add only a single entry per user context */
+ if (unlikely(found)) {
+ err = 0;
+ goto free_out;
+ }
+ atomic_inc(&qp->xrcd->usecnt);
+ list_add_tail(®_entry->list, &qp->list);
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+ return 0;
+
+free_out:
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+ kfree(reg_entry);
+ return err;
+}
+
+int ib_xrc_rcv_qp_table_remove_reg_entry(struct ib_device *dev, u32 qpn,
+ void *context)
+{
+ struct ib_xrc_rcv_reg_entry *uninitialized_var(reg_entry), *tmp;
+ struct ib_xrc_rcv_qp_table_entry *qp;
+ unsigned long flags;
+ int found = 0, destroy = 0;
+
+ spin_lock_irqsave(&dev->xrc_rcv_qp_table_lock, flags);
+ qp = ib_xrc_rcv_tbl_find(dev, qpn);
+ if (unlikely(!qp))
+ goto out;
+ if (!list_empty(&qp->list)) {
+ list_for_each_entry_safe(reg_entry, tmp,
+ &qp->list, list) {
+ if (reg_entry->context == context) {
+ list_del(®_entry->list);
+ found = 1;
+ atomic_dec(&qp->xrcd->usecnt);
+ break;
+ }
+ }
+
+ if (unlikely(!found))
+ goto out;
+ }
+
+ /* now, remove the entry if the list is empty */
+ if (unlikely(list_empty(&qp->list))) {
+ destroy = 1;
+ radix_tree_delete(&dev->xrc_rcv_qp_table, qpn);
+ }
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+ if (destroy) {
+ dev->destroy_xrc_rcv_qp(qp->xrcd, qpn);
+ kfree(qp);
+ }
+ if (found)
+ kfree(reg_entry);
+ return 0;
+
+out:
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+ return -EINVAL;
+}
+
+int ib_xrc_rcv_qp_table_remove(struct ib_device *dev, u32 qpn)
+{
+ struct ib_xrc_rcv_reg_entry *reg_entry, *tmp;
+ struct ib_xrc_rcv_qp_table_entry *qp;
+ struct list_head xrc_local;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&xrc_local);
+
+ spin_lock_irqsave(&dev->xrc_rcv_qp_table_lock, flags);
+
+ qp = ib_xrc_rcv_tbl_find(dev, qpn);
+ if (unlikely(!qp))
+ goto out;
+ /* ASSERT(!list_empty(&qp->list)); */
+ list_replace_init(&qp->list, &xrc_local);
+ radix_tree_delete(&dev->xrc_rcv_qp_table, qpn);
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+
+ list_for_each_entry_safe(reg_entry, tmp, &xrc_local, list) {
+ list_del(®_entry->list);
+ kfree(reg_entry);
+ atomic_dec(&qp->xrcd->usecnt);
+ }
+ dev->destroy_xrc_rcv_qp(qp->xrcd, qpn);
+ kfree(qp);
+ return 0;
+
+out:
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+ return -EINVAL;
+}
+
+/* cleanup. If context is NULL, clean up everything */
+void ib_xrc_rcv_qp_table_cleanup(struct ib_device *dev, void *context)
+{
+ struct ib_xrc_rcv_qp_table_entry *qp, *qp1, *qp2;
+ struct ib_xrc_rcv_reg_entry *reg_entry, *tmp;
+ struct list_head delete_list, qp_del_list;
+ unsigned long flags, next = 0;
+
+ INIT_LIST_HEAD(&delete_list);
+ INIT_LIST_HEAD(&qp_del_list);
+
+ spin_lock_irqsave(&dev->xrc_rcv_qp_table_lock, flags);
+ while (radix_tree_gang_lookup(&dev->xrc_rcv_qp_table, (void **) &qp,
+ next, 1)) {
+ next = qp->qpn + 1;
+ list_for_each_entry_safe(reg_entry, tmp, &qp->list, list) {
+ if (!context || reg_entry->context == context) {
+ atomic_dec(&qp->xrcd->usecnt);
+ list_move_tail(®_entry->list, &delete_list);
+ }
+ }
+ if (unlikely(list_empty(&qp->list))) {
+ qp = radix_tree_delete(&dev->xrc_rcv_qp_table, qp->qpn);
+ if (qp)
+ list_add(&qp->list, &qp_del_list);
+ }
+ }
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+
+ list_for_each_entry_safe(reg_entry, tmp, &delete_list, list) {
+ list_del(®_entry->list);
+ kfree(reg_entry);
+ }
+
+ list_for_each_entry_safe(qp1, qp2, &qp_del_list, list) {
+ list_del(&qp1->list);
+ dev->destroy_xrc_rcv_qp(qp1->xrcd, qp1->qpn);
+ kfree(qp1);
+ }
+}
+EXPORT_SYMBOL(ib_xrc_rcv_qp_table_cleanup);
+
+void ib_xrc_rcv_qp_event_handler(struct ib_event *event, void *context_ptr)
+{
+ struct ib_device *dev = context_ptr;
+ struct ib_xrc_rcv_qp_table_entry *qp;
+ struct ib_xrc_rcv_reg_entry *reg_entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->xrc_rcv_qp_table_lock, flags);
+ qp = ib_xrc_rcv_tbl_find(dev, event->element.xrc_qp_num);
+ if (unlikely(!qp))
+ goto out;
+
+ list_for_each_entry(reg_entry, &qp->list, list) {
+ qp->event_handler(event, reg_entry->context);
+ }
+
+out:
+ spin_unlock_irqrestore(&dev->xrc_rcv_qp_table_lock, flags);
+}
+
+int ib_create_xrc_rcv_qp(struct ib_xrcd *xrcd, struct ib_qp_init_attr *attr,
+ u32 *qp_num)
+{
+ struct ib_qp_init_attr init_attr;
+ int err;
+
+ memcpy(&init_attr, attr, sizeof(*attr));
+ init_attr.event_handler = ib_xrc_rcv_qp_event_handler;
+ init_attr.qp_context = xrcd->device;
+ err = xrcd->device->create_xrc_rcv_qp(&init_attr, qp_num);
+ if (err)
+ return err;
+
+ err = ib_xrc_rcv_qp_table_new(xrcd->device, attr->event_handler,
+ *qp_num, xrcd, attr->qp_context);
+ if (err)
+ xrcd->device->destroy_xrc_rcv_qp(xrcd, *qp_num);
+ return err;
+}
+EXPORT_SYMBOL(ib_create_xrc_rcv_qp);
+
+int ib_destroy_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num)
+{
+ return ib_xrc_rcv_qp_table_remove(xrcd->device, qp_num);
+}
+EXPORT_SYMBOL(ib_destroy_xrc_rcv_qp);
+
+int ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
+{
+ return ib_xrc_rcv_qp_table_add_reg_entry(xrcd->device, qp_num,
+ context);
+}
+EXPORT_SYMBOL(ib_reg_xrc_rcv_qp);
+
+int ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num)
+{
+ return ib_xrc_rcv_qp_table_remove_reg_entry(xrcd->device, qp_num,
+ context);
+}
+EXPORT_SYMBOL(ib_unreg_xrc_rcv_qp);
+
Index: infiniband/include/rdma/ib_verbs.h
===================================================================
--- infiniband.orig/include/rdma/ib_verbs.h
+++ infiniband/include/rdma/ib_verbs.h
@@ -47,6 +47,7 @@
#include <linux/list.h>
#include <linux/rwsem.h>
#include <linux/scatterlist.h>
+#include <linux/radix-tree.h>
#include <asm/atomic.h>
#include <asm/uaccess.h>
@@ -344,12 +345,17 @@ enum ib_event_type {
IB_EVENT_CLIENT_REREGISTER
};
+enum ib_event_flags {
+ IB_XRC_QP_EVENT_FLAG = 0x80000000,
+};
+
struct ib_event {
struct ib_device *device;
union {
struct ib_cq *cq;
struct ib_qp *qp;
struct ib_srq *srq;
+ u32 xrc_qp_num;
u8 port_num;
} element;
enum ib_event_type event;
@@ -1154,6 +1160,23 @@ struct ib_device {
struct ib_ucontext *context,
struct ib_udata *udata);
int (*dealloc_xrcd)(struct ib_xrcd *xrcd);
+ int (*create_xrc_rcv_qp)(struct ib_qp_init_attr *init_attr,
+ u32 *qp_num);
+ int (*modify_xrc_rcv_qp)(struct ib_xrcd *xrcd,
+ u32 qp_num,
+ struct ib_qp_attr *attr,
+ int attr_mask);
+ int (*query_xrc_rcv_qp)(struct ib_xrcd *xrcd,
+ u32 qp_num,
+ struct ib_qp_attr *attr,
+ int attr_mask,
+ struct ib_qp_init_attr *init_attr);
+ int (*destroy_xrc_rcv_qp)(struct ib_xrcd *xrcd,
+ u32 qp_num);
+ int (*reg_xrc_rcv_qp)(struct ib_xrcd *xrcd, void *context,
+ u32 qp_num);
+ int (*unreg_xrc_rcv_qp)(struct ib_xrcd *xrcd, void *context,
+ u32 qp_num);
struct ib_dma_mapping_ops *dma_ops;
@@ -1161,6 +1184,8 @@ struct ib_device {
struct device dev;
struct kobject *ports_parent;
struct list_head port_list;
+ struct radix_tree_root xrc_rcv_qp_table;
+ spinlock_t xrc_rcv_qp_table_lock;
enum {
IB_DEV_UNINITIALIZED,
@@ -1178,6 +1203,18 @@ struct ib_device {
u8 phys_port_cnt;
};
+struct ib_xrc_rcv_qp_table_entry {
+ struct list_head list;
+ void (*event_handler)(struct ib_event *, void *);
+ struct ib_xrcd *xrcd;
+ u32 qpn;
+};
+
+struct ib_xrc_rcv_reg_entry {
+ struct list_head list;
+ void *context;
+};
+
struct ib_client {
char *name;
void (*add) (struct ib_device *);
@@ -2092,4 +2129,12 @@ struct ib_xrcd *ib_alloc_xrcd(struct ib_
*/
int ib_dealloc_xrcd(struct ib_xrcd *xrcd);
+void ib_xrc_rcv_qp_table_init(struct ib_device *dev);
+void ib_xrc_rcv_qp_table_cleanup(struct ib_device *dev, void *context);
+int ib_create_xrc_rcv_qp(struct ib_xrcd *xrcd, struct ib_qp_init_attr *attr,
+ u32 *qp_num);
+int ib_destroy_xrc_rcv_qp(struct ib_xrcd *xrcd, u32 qp_num);
+int ib_reg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
+int ib_unreg_xrc_rcv_qp(struct ib_xrcd *xrcd, void *context, u32 qp_num);
+
#endif /* IB_VERBS_H */
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* Re: [PATCH V2 1/4] ib_core: changes to support XRC RCV qp's
From: Jack Morgenstein @ 2010-05-19 6:30 UTC (permalink / raw)
To: Sean Hefty
Cc: rolandd-FYB4Gu1CFyUAvxtiuMwx3w, Tziporet Koren,
linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <21985B556C0D42658109996B406ADC15-Zpru7NauK7drdx17CPfAsdBPR1lH4CV8@public.gmane.org>
On Tuesday 18 May 2010 21:33, Sean Hefty wrote:
> >+struct ib_xrc_rcv_qp_table_entry *
> >+ib_xrc_rcv_tbl_find(struct ib_device *dev, u32 qpn)
> >+{
> >+ return radix_tree_lookup(&dev->xrc_rcv_qp_table, qpn);
> >+}
>
> nit - but do we need a wrapper around this single call?
I prefer it this way, so that we have a complete xrc_rcv_table interface.
> >+ list_for_each_entry(tmp, &qp->list, list)
> >+ if (tmp->context == context) {
> >+ found = 1;
> >+ break;
> >+ }
> >+ /* add only a single entry per user context */
> >+ if (unlikely(found)) {
> >+ err = 0;
> >+ goto free_out;
> >+ }
>
> Maybe this becomes clear later, but can a user add multiple entries? Can't we
> just consider this a usage error? Actually, why does it matter if the same
> context is added multiple times?
This list of entries is also used to disperse XRC RCV qp events to all registered
processes -- and we only want a single event dispatched per process.
> >+ dev->destroy_xrc_rcv_qp(qp->xrcd, qpn);
> >+ kfree(qp);
> >+ list_for_each_entry_safe(reg_entry, tmp, &xrc_local, list) {
> >+ list_del(®_entry->list);
> >+ kfree(reg_entry);
> >+ atomic_dec(&qp->xrcd->usecnt);
>
> qp was just freed a few lines up
Ouch! Thanks for the catch, Sean. I'll fix it before resubmitting.
Regards,
Jack
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply
* [PATCH v4 11/11] IB/qib: Add qib_verbs.h
From: Ralph Campbell @ 2010-05-19 0:27 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100519002647.11950.62870.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_verbs.h file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_verbs.h | 1100 +++++++++++++++++++++++++++++++++
1 files changed, 1100 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_verbs.h
diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
new file mode 100644
index 0000000..bd57c12
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_verbs.h
@@ -0,0 +1,1100 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009, 2010 QLogic Corporation.
+ * All rights reserved.
+ * Copyright (c) 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef QIB_VERBS_H
+#define QIB_VERBS_H
+
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/workqueue.h>
+#include <rdma/ib_pack.h>
+#include <rdma/ib_user_verbs.h>
+
+struct qib_ctxtdata;
+struct qib_pportdata;
+struct qib_devdata;
+struct qib_verbs_txreq;
+
+#define QIB_MAX_RDMA_ATOMIC 16
+#define QIB_GUIDS_PER_PORT 5
+
+#define QPN_MAX (1 << 24)
+#define QPNMAP_ENTRIES (QPN_MAX / PAGE_SIZE / BITS_PER_BYTE)
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+#define QIB_UVERBS_ABI_VERSION 2
+
+/*
+ * Define an ib_cq_notify value that is not valid so we know when CQ
+ * notifications are armed.
+ */
+#define IB_CQ_NONE (IB_CQ_NEXT_COMP + 1)
+
+#define IB_SEQ_NAK (3 << 29)
+
+/* AETH NAK opcode values */
+#define IB_RNR_NAK 0x20
+#define IB_NAK_PSN_ERROR 0x60
+#define IB_NAK_INVALID_REQUEST 0x61
+#define IB_NAK_REMOTE_ACCESS_ERROR 0x62
+#define IB_NAK_REMOTE_OPERATIONAL_ERROR 0x63
+#define IB_NAK_INVALID_RD_REQUEST 0x64
+
+/* Flags for checking QP state (see ib_qib_state_ops[]) */
+#define QIB_POST_SEND_OK 0x01
+#define QIB_POST_RECV_OK 0x02
+#define QIB_PROCESS_RECV_OK 0x04
+#define QIB_PROCESS_SEND_OK 0x08
+#define QIB_PROCESS_NEXT_SEND_OK 0x10
+#define QIB_FLUSH_SEND 0x20
+#define QIB_FLUSH_RECV 0x40
+#define QIB_PROCESS_OR_FLUSH_SEND \
+ (QIB_PROCESS_SEND_OK | QIB_FLUSH_SEND)
+
+/* IB Performance Manager status values */
+#define IB_PMA_SAMPLE_STATUS_DONE 0x00
+#define IB_PMA_SAMPLE_STATUS_STARTED 0x01
+#define IB_PMA_SAMPLE_STATUS_RUNNING 0x02
+
+/* Mandatory IB performance counter select values. */
+#define IB_PMA_PORT_XMIT_DATA cpu_to_be16(0x0001)
+#define IB_PMA_PORT_RCV_DATA cpu_to_be16(0x0002)
+#define IB_PMA_PORT_XMIT_PKTS cpu_to_be16(0x0003)
+#define IB_PMA_PORT_RCV_PKTS cpu_to_be16(0x0004)
+#define IB_PMA_PORT_XMIT_WAIT cpu_to_be16(0x0005)
+
+#define QIB_VENDOR_IPG cpu_to_be16(0xFFA0)
+
+#define IB_BTH_REQ_ACK (1 << 31)
+#define IB_BTH_SOLICITED (1 << 23)
+#define IB_BTH_MIG_REQ (1 << 22)
+
+/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
+#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
+
+#define IB_GRH_VERSION 6
+#define IB_GRH_VERSION_MASK 0xF
+#define IB_GRH_VERSION_SHIFT 28
+#define IB_GRH_TCLASS_MASK 0xFF
+#define IB_GRH_TCLASS_SHIFT 20
+#define IB_GRH_FLOW_MASK 0xFFFFF
+#define IB_GRH_FLOW_SHIFT 0
+#define IB_GRH_NEXT_HDR 0x1B
+
+#define IB_DEFAULT_GID_PREFIX cpu_to_be64(0xfe80000000000000ULL)
+
+/* Values for set/get portinfo VLCap OperationalVLs */
+#define IB_VL_VL0 1
+#define IB_VL_VL0_1 2
+#define IB_VL_VL0_3 3
+#define IB_VL_VL0_7 4
+#define IB_VL_VL0_14 5
+
+static inline int qib_num_vls(int vls)
+{
+ switch (vls) {
+ default:
+ case IB_VL_VL0:
+ return 1;
+ case IB_VL_VL0_1:
+ return 2;
+ case IB_VL_VL0_3:
+ return 4;
+ case IB_VL_VL0_7:
+ return 8;
+ case IB_VL_VL0_14:
+ return 15;
+ }
+}
+
+struct ib_reth {
+ __be64 vaddr;
+ __be32 rkey;
+ __be32 length;
+} __attribute__ ((packed));
+
+struct ib_atomic_eth {
+ __be32 vaddr[2]; /* unaligned so access as 2 32-bit words */
+ __be32 rkey;
+ __be64 swap_data;
+ __be64 compare_data;
+} __attribute__ ((packed));
+
+struct qib_other_headers {
+ __be32 bth[3];
+ union {
+ struct {
+ __be32 deth[2];
+ __be32 imm_data;
+ } ud;
+ struct {
+ struct ib_reth reth;
+ __be32 imm_data;
+ } rc;
+ struct {
+ __be32 aeth;
+ __be32 atomic_ack_eth[2];
+ } at;
+ __be32 imm_data;
+ __be32 aeth;
+ struct ib_atomic_eth atomic_eth;
+ } u;
+} __attribute__ ((packed));
+
+/*
+ * Note that UD packets with a GRH header are 8+40+12+8 = 68 bytes
+ * long (72 w/ imm_data). Only the first 56 bytes of the IB header
+ * will be in the eager header buffer. The remaining 12 or 16 bytes
+ * are in the data buffer.
+ */
+struct qib_ib_header {
+ __be16 lrh[4];
+ union {
+ struct {
+ struct ib_grh grh;
+ struct qib_other_headers oth;
+ } l;
+ struct qib_other_headers oth;
+ } u;
+} __attribute__ ((packed));
+
+struct qib_pio_header {
+ __le32 pbc[2];
+ struct qib_ib_header hdr;
+} __attribute__ ((packed));
+
+/*
+ * There is one struct qib_mcast for each multicast GID.
+ * All attached QPs are then stored as a list of
+ * struct qib_mcast_qp.
+ */
+struct qib_mcast_qp {
+ struct list_head list;
+ struct qib_qp *qp;
+};
+
+struct qib_mcast {
+ struct rb_node rb_node;
+ union ib_gid mgid;
+ struct list_head qp_list;
+ wait_queue_head_t wait;
+ atomic_t refcount;
+ int n_attached;
+};
+
+/* Protection domain */
+struct qib_pd {
+ struct ib_pd ibpd;
+ int user; /* non-zero if created from user space */
+};
+
+/* Address Handle */
+struct qib_ah {
+ struct ib_ah ibah;
+ struct ib_ah_attr attr;
+ atomic_t refcount;
+};
+
+/*
+ * This structure is used by qib_mmap() to validate an offset
+ * when an mmap() request is made. The vm_area_struct then uses
+ * this as its vm_private_data.
+ */
+struct qib_mmap_info {
+ struct list_head pending_mmaps;
+ struct ib_ucontext *context;
+ void *obj;
+ __u64 offset;
+ struct kref ref;
+ unsigned size;
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and completion queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ */
+struct qib_cq_wc {
+ u32 head; /* index of next entry to fill */
+ u32 tail; /* index of next ib_poll_cq() entry */
+ union {
+ /* these are actually size ibcq.cqe + 1 */
+ struct ib_uverbs_wc uqueue[0];
+ struct ib_wc kqueue[0];
+ };
+};
+
+/*
+ * The completion queue structure.
+ */
+struct qib_cq {
+ struct ib_cq ibcq;
+ struct work_struct comptask;
+ spinlock_t lock; /* protect changes in this struct */
+ u8 notify;
+ u8 triggered;
+ struct qib_cq_wc *queue;
+ struct qib_mmap_info *ip;
+};
+
+/*
+ * A segment is a linear region of low physical memory.
+ * XXX Maybe we should use phys addr here and kmap()/kunmap().
+ * Used by the verbs layer.
+ */
+struct qib_seg {
+ void *vaddr;
+ size_t length;
+};
+
+/* The number of qib_segs that fit in a page. */
+#define QIB_SEGSZ (PAGE_SIZE / sizeof(struct qib_seg))
+
+struct qib_segarray {
+ struct qib_seg segs[QIB_SEGSZ];
+};
+
+struct qib_mregion {
+ struct ib_pd *pd; /* shares refcnt of ibmr.pd */
+ u64 user_base; /* User's address for this region */
+ u64 iova; /* IB start address of this region */
+ size_t length;
+ u32 lkey;
+ u32 offset; /* offset (bytes) to start of region */
+ int access_flags;
+ u32 max_segs; /* number of qib_segs in all the arrays */
+ u32 mapsz; /* size of the map array */
+ atomic_t refcount;
+ struct qib_segarray *map[0]; /* the segments */
+};
+
+/*
+ * These keep track of the copy progress within a memory region.
+ * Used by the verbs layer.
+ */
+struct qib_sge {
+ struct qib_mregion *mr;
+ void *vaddr; /* kernel virtual address of segment */
+ u32 sge_length; /* length of the SGE */
+ u32 length; /* remaining length of the segment */
+ u16 m; /* current index: mr->map[m] */
+ u16 n; /* current index: mr->map[m]->segs[n] */
+};
+
+/* Memory region */
+struct qib_mr {
+ struct ib_mr ibmr;
+ struct ib_umem *umem;
+ struct qib_mregion mr; /* must be last */
+};
+
+/*
+ * Send work request queue entry.
+ * The size of the sg_list is determined when the QP is created and stored
+ * in qp->s_max_sge.
+ */
+struct qib_swqe {
+ struct ib_send_wr wr; /* don't use wr.sg_list */
+ u32 psn; /* first packet sequence number */
+ u32 lpsn; /* last packet sequence number */
+ u32 ssn; /* send sequence number */
+ u32 length; /* total length of data in sg_list */
+ struct qib_sge sg_list[0];
+};
+
+/*
+ * Receive work request queue entry.
+ * The size of the sg_list is determined when the QP (or SRQ) is created
+ * and stored in qp->r_rq.max_sge (or srq->rq.max_sge).
+ */
+struct qib_rwqe {
+ u64 wr_id;
+ u8 num_sge;
+ struct ib_sge sg_list[0];
+};
+
+/*
+ * This structure is used to contain the head pointer, tail pointer,
+ * and receive work queue entries as a single memory allocation so
+ * it can be mmap'ed into user space.
+ * Note that the wq array elements are variable size so you can't
+ * just index into the array to get the N'th element;
+ * use get_rwqe_ptr() instead.
+ */
+struct qib_rwq {
+ u32 head; /* new work requests posted to the head */
+ u32 tail; /* receives pull requests from here. */
+ struct qib_rwqe wq[0];
+};
+
+struct qib_rq {
+ struct qib_rwq *wq;
+ spinlock_t lock; /* protect changes in this struct */
+ u32 size; /* size of RWQE array */
+ u8 max_sge;
+};
+
+struct qib_srq {
+ struct ib_srq ibsrq;
+ struct qib_rq rq;
+ struct qib_mmap_info *ip;
+ /* send signal when number of RWQEs < limit */
+ u32 limit;
+};
+
+struct qib_sge_state {
+ struct qib_sge *sg_list; /* next SGE to be used if any */
+ struct qib_sge sge; /* progress state for the current SGE */
+ u32 total_len;
+ u8 num_sge;
+};
+
+/*
+ * This structure holds the information that the send tasklet needs
+ * to send a RDMA read response or atomic operation.
+ */
+struct qib_ack_entry {
+ u8 opcode;
+ u8 sent;
+ u32 psn;
+ u32 lpsn;
+ union {
+ struct qib_sge rdma_sge;
+ u64 atomic_data;
+ };
+};
+
+/*
+ * Variables prefixed with s_ are for the requester (sender).
+ * Variables prefixed with r_ are for the responder (receiver).
+ * Variables prefixed with ack_ are for responder replies.
+ *
+ * Common variables are protected by both r_rq.lock and s_lock in that order
+ * which only happens in modify_qp() or changing the QP 'state'.
+ */
+struct qib_qp {
+ struct ib_qp ibqp;
+ struct qib_qp *next; /* link list for QPN hash table */
+ struct qib_qp *timer_next; /* link list for qib_ib_timer() */
+ struct list_head iowait; /* link for wait PIO buf */
+ struct list_head rspwait; /* link for waititing to respond */
+ struct ib_ah_attr remote_ah_attr;
+ struct ib_ah_attr alt_ah_attr;
+ struct qib_ib_header s_hdr; /* next packet header to send */
+ atomic_t refcount;
+ wait_queue_head_t wait;
+ wait_queue_head_t wait_dma;
+ struct timer_list s_timer;
+ struct work_struct s_work;
+ struct qib_mmap_info *ip;
+ struct qib_sge_state *s_cur_sge;
+ struct qib_verbs_txreq *s_tx;
+ struct qib_mregion *s_rdma_mr;
+ struct qib_sge_state s_sge; /* current send request data */
+ struct qib_ack_entry s_ack_queue[QIB_MAX_RDMA_ATOMIC + 1];
+ struct qib_sge_state s_ack_rdma_sge;
+ struct qib_sge_state s_rdma_read_sge;
+ struct qib_sge_state r_sge; /* current receive data */
+ spinlock_t r_lock; /* used for APM */
+ spinlock_t s_lock;
+ atomic_t s_dma_busy;
+ unsigned processor_id; /* Processor ID QP is bound to */
+ u32 s_flags;
+ u32 s_cur_size; /* size of send packet in bytes */
+ u32 s_len; /* total length of s_sge */
+ u32 s_rdma_read_len; /* total length of s_rdma_read_sge */
+ u32 s_next_psn; /* PSN for next request */
+ u32 s_last_psn; /* last response PSN processed */
+ u32 s_sending_psn; /* lowest PSN that is being sent */
+ u32 s_sending_hpsn; /* highest PSN that is being sent */
+ u32 s_psn; /* current packet sequence number */
+ u32 s_ack_rdma_psn; /* PSN for sending RDMA read responses */
+ u32 s_ack_psn; /* PSN for acking sends and RDMA writes */
+ u32 s_rnr_timeout; /* number of milliseconds for RNR timeout */
+ u32 r_ack_psn; /* PSN for next ACK or atomic ACK */
+ u64 r_wr_id; /* ID for current receive WQE */
+ unsigned long r_aflags;
+ u32 r_len; /* total length of r_sge */
+ u32 r_rcv_len; /* receive data len processed */
+ u32 r_psn; /* expected rcv packet sequence number */
+ u32 r_msn; /* message sequence number */
+ u16 s_hdrwords; /* size of s_hdr in 32 bit words */
+ u16 s_rdma_ack_cnt;
+ u8 state; /* QP state */
+ u8 s_state; /* opcode of last packet sent */
+ u8 s_ack_state; /* opcode of packet to ACK */
+ u8 s_nak_state; /* non-zero if NAK is pending */
+ u8 r_state; /* opcode of last packet received */
+ u8 r_nak_state; /* non-zero if NAK is pending */
+ u8 r_min_rnr_timer; /* retry timeout value for RNR NAKs */
+ u8 r_flags;
+ u8 r_max_rd_atomic; /* max number of RDMA read/atomic to receive */
+ u8 r_head_ack_queue; /* index into s_ack_queue[] */
+ u8 qp_access_flags;
+ u8 s_max_sge; /* size of s_wq->sg_list */
+ u8 s_retry_cnt; /* number of times to retry */
+ u8 s_rnr_retry_cnt;
+ u8 s_retry; /* requester retry counter */
+ u8 s_rnr_retry; /* requester RNR retry counter */
+ u8 s_pkey_index; /* PKEY index to use */
+ u8 s_alt_pkey_index; /* Alternate path PKEY index to use */
+ u8 s_max_rd_atomic; /* max number of RDMA read/atomic to send */
+ u8 s_num_rd_atomic; /* number of RDMA read/atomic pending */
+ u8 s_tail_ack_queue; /* index into s_ack_queue[] */
+ u8 s_srate;
+ u8 s_draining;
+ u8 s_mig_state;
+ u8 timeout; /* Timeout for this QP */
+ u8 alt_timeout; /* Alternate path timeout for this QP */
+ u8 port_num;
+ enum ib_mtu path_mtu;
+ u32 remote_qpn;
+ u32 qkey; /* QKEY for this QP (for UD or RD) */
+ u32 s_size; /* send work queue size */
+ u32 s_head; /* new entries added here */
+ u32 s_tail; /* next entry to process */
+ u32 s_cur; /* current work queue entry */
+ u32 s_acked; /* last un-ACK'ed entry */
+ u32 s_last; /* last completed entry */
+ u32 s_ssn; /* SSN of tail entry */
+ u32 s_lsn; /* limit sequence number (credit) */
+ struct qib_swqe *s_wq; /* send work queue */
+ struct qib_swqe *s_wqe;
+ struct qib_rq r_rq; /* receive work queue */
+ struct qib_sge r_sg_list[0]; /* verified SGEs */
+};
+
+/*
+ * Atomic bit definitions for r_aflags.
+ */
+#define QIB_R_WRID_VALID 0
+#define QIB_R_REWIND_SGE 1
+
+/*
+ * Bit definitions for r_flags.
+ */
+#define QIB_R_REUSE_SGE 0x01
+#define QIB_R_RDMAR_SEQ 0x02
+#define QIB_R_RSP_NAK 0x04
+#define QIB_R_RSP_SEND 0x08
+#define QIB_R_COMM_EST 0x10
+
+/*
+ * Bit definitions for s_flags.
+ *
+ * QIB_S_SIGNAL_REQ_WR - set if QP send WRs contain completion signaled
+ * QIB_S_BUSY - send tasklet is processing the QP
+ * QIB_S_TIMER - the RC retry timer is active
+ * QIB_S_ACK_PENDING - an ACK is waiting to be sent after RDMA read/atomics
+ * QIB_S_WAIT_FENCE - waiting for all prior RDMA read or atomic SWQEs
+ * before processing the next SWQE
+ * QIB_S_WAIT_RDMAR - waiting for a RDMA read or atomic SWQE to complete
+ * before processing the next SWQE
+ * QIB_S_WAIT_RNR - waiting for RNR timeout
+ * QIB_S_WAIT_SSN_CREDIT - waiting for RC credits to process next SWQE
+ * QIB_S_WAIT_DMA - waiting for send DMA queue to drain before generating
+ * next send completion entry not via send DMA
+ * QIB_S_WAIT_PIO - waiting for a send buffer to be available
+ * QIB_S_WAIT_TX - waiting for a struct qib_verbs_txreq to be available
+ * QIB_S_WAIT_DMA_DESC - waiting for DMA descriptors to be available
+ * QIB_S_WAIT_KMEM - waiting for kernel memory to be available
+ * QIB_S_WAIT_PSN - waiting for a packet to exit the send DMA queue
+ * QIB_S_WAIT_ACK - waiting for an ACK packet before sending more requests
+ * QIB_S_SEND_ONE - send one packet, request ACK, then wait for ACK
+ */
+#define QIB_S_SIGNAL_REQ_WR 0x0001
+#define QIB_S_BUSY 0x0002
+#define QIB_S_TIMER 0x0004
+#define QIB_S_RESP_PENDING 0x0008
+#define QIB_S_ACK_PENDING 0x0010
+#define QIB_S_WAIT_FENCE 0x0020
+#define QIB_S_WAIT_RDMAR 0x0040
+#define QIB_S_WAIT_RNR 0x0080
+#define QIB_S_WAIT_SSN_CREDIT 0x0100
+#define QIB_S_WAIT_DMA 0x0200
+#define QIB_S_WAIT_PIO 0x0400
+#define QIB_S_WAIT_TX 0x0800
+#define QIB_S_WAIT_DMA_DESC 0x1000
+#define QIB_S_WAIT_KMEM 0x2000
+#define QIB_S_WAIT_PSN 0x4000
+#define QIB_S_WAIT_ACK 0x8000
+#define QIB_S_SEND_ONE 0x10000
+#define QIB_S_UNLIMITED_CREDIT 0x20000
+
+/*
+ * Wait flags that would prevent any packet type from being sent.
+ */
+#define QIB_S_ANY_WAIT_IO (QIB_S_WAIT_PIO | QIB_S_WAIT_TX | \
+ QIB_S_WAIT_DMA_DESC | QIB_S_WAIT_KMEM)
+
+/*
+ * Wait flags that would prevent send work requests from making progress.
+ */
+#define QIB_S_ANY_WAIT_SEND (QIB_S_WAIT_FENCE | QIB_S_WAIT_RDMAR | \
+ QIB_S_WAIT_RNR | QIB_S_WAIT_SSN_CREDIT | QIB_S_WAIT_DMA | \
+ QIB_S_WAIT_PSN | QIB_S_WAIT_ACK)
+
+#define QIB_S_ANY_WAIT (QIB_S_ANY_WAIT_IO | QIB_S_ANY_WAIT_SEND)
+
+#define QIB_PSN_CREDIT 16
+
+/*
+ * Since struct qib_swqe is not a fixed size, we can't simply index into
+ * struct qib_qp.s_wq. This function does the array index computation.
+ */
+static inline struct qib_swqe *get_swqe_ptr(struct qib_qp *qp,
+ unsigned n)
+{
+ return (struct qib_swqe *)((char *)qp->s_wq +
+ (sizeof(struct qib_swqe) +
+ qp->s_max_sge *
+ sizeof(struct qib_sge)) * n);
+}
+
+/*
+ * Since struct qib_rwqe is not a fixed size, we can't simply index into
+ * struct qib_rwq.wq. This function does the array index computation.
+ */
+static inline struct qib_rwqe *get_rwqe_ptr(struct qib_rq *rq, unsigned n)
+{
+ return (struct qib_rwqe *)
+ ((char *) rq->wq->wq +
+ (sizeof(struct qib_rwqe) +
+ rq->max_sge * sizeof(struct ib_sge)) * n);
+}
+
+/*
+ * QPN-map pages start out as NULL, they get allocated upon
+ * first use and are never deallocated. This way,
+ * large bitmaps are not allocated unless large numbers of QPs are used.
+ */
+struct qpn_map {
+ void *page;
+};
+
+struct qib_qpn_table {
+ spinlock_t lock; /* protect changes in this struct */
+ unsigned flags; /* flags for QP0/1 allocated for each port */
+ u32 last; /* last QP number allocated */
+ u32 nmaps; /* size of the map table */
+ u16 limit;
+ u16 mask;
+ /* bit map of free QP numbers other than 0/1 */
+ struct qpn_map map[QPNMAP_ENTRIES];
+};
+
+struct qib_lkey_table {
+ spinlock_t lock; /* protect changes in this struct */
+ u32 next; /* next unused index (speeds search) */
+ u32 gen; /* generation count */
+ u32 max; /* size of the table */
+ struct qib_mregion **table;
+};
+
+struct qib_opcode_stats {
+ u64 n_packets; /* number of packets */
+ u64 n_bytes; /* total number of bytes */
+};
+
+struct qib_ibport {
+ struct qib_qp *qp0;
+ struct qib_qp *qp1;
+ struct ib_mad_agent *send_agent; /* agent for SMI (traps) */
+ struct qib_ah *sm_ah;
+ struct qib_ah *smi_ah;
+ struct rb_root mcast_tree;
+ spinlock_t lock; /* protect changes in this struct */
+
+ /* non-zero when timer is set */
+ unsigned long mkey_lease_timeout;
+ unsigned long trap_timeout;
+ __be64 gid_prefix; /* in network order */
+ __be64 mkey;
+ __be64 guids[QIB_GUIDS_PER_PORT - 1]; /* writable GUIDs */
+ u64 tid; /* TID for traps */
+ u64 n_unicast_xmit; /* total unicast packets sent */
+ u64 n_unicast_rcv; /* total unicast packets received */
+ u64 n_multicast_xmit; /* total multicast packets sent */
+ u64 n_multicast_rcv; /* total multicast packets received */
+ u64 z_symbol_error_counter; /* starting count for PMA */
+ u64 z_link_error_recovery_counter; /* starting count for PMA */
+ u64 z_link_downed_counter; /* starting count for PMA */
+ u64 z_port_rcv_errors; /* starting count for PMA */
+ u64 z_port_rcv_remphys_errors; /* starting count for PMA */
+ u64 z_port_xmit_discards; /* starting count for PMA */
+ u64 z_port_xmit_data; /* starting count for PMA */
+ u64 z_port_rcv_data; /* starting count for PMA */
+ u64 z_port_xmit_packets; /* starting count for PMA */
+ u64 z_port_rcv_packets; /* starting count for PMA */
+ u32 z_local_link_integrity_errors; /* starting count for PMA */
+ u32 z_excessive_buffer_overrun_errors; /* starting count for PMA */
+ u32 z_vl15_dropped; /* starting count for PMA */
+ u32 n_rc_resends;
+ u32 n_rc_acks;
+ u32 n_rc_qacks;
+ u32 n_rc_delayed_comp;
+ u32 n_seq_naks;
+ u32 n_rdma_seq;
+ u32 n_rnr_naks;
+ u32 n_other_naks;
+ u32 n_loop_pkts;
+ u32 n_pkt_drops;
+ u32 n_vl15_dropped;
+ u32 n_rc_timeouts;
+ u32 n_dmawait;
+ u32 n_unaligned;
+ u32 n_rc_dupreq;
+ u32 n_rc_seqnak;
+ u32 port_cap_flags;
+ u32 pma_sample_start;
+ u32 pma_sample_interval;
+ __be16 pma_counter_select[5];
+ u16 pma_tag;
+ u16 pkey_violations;
+ u16 qkey_violations;
+ u16 mkey_violations;
+ u16 mkey_lease_period;
+ u16 sm_lid;
+ u16 repress_traps;
+ u8 sm_sl;
+ u8 mkeyprot;
+ u8 subnet_timeout;
+ u8 vl_high_limit;
+ u8 sl_to_vl[16];
+
+ struct qib_opcode_stats opstats[128];
+};
+
+struct qib_ibdev {
+ struct ib_device ibdev;
+ struct list_head pending_mmaps;
+ spinlock_t mmap_offset_lock; /* protect mmap_offset */
+ u32 mmap_offset;
+ struct qib_mregion *dma_mr;
+
+ /* QP numbers are shared by all IB ports */
+ struct qib_qpn_table qpn_table;
+ struct qib_lkey_table lk_table;
+ struct list_head piowait; /* list for wait PIO buf */
+ struct list_head dmawait; /* list for wait DMA */
+ struct list_head txwait; /* list for wait qib_verbs_txreq */
+ struct list_head memwait; /* list for wait kernel memory */
+ struct list_head txreq_free;
+ struct timer_list mem_timer;
+ struct qib_qp **qp_table;
+ struct qib_pio_header *pio_hdrs;
+ dma_addr_t pio_hdrs_phys;
+ /* list of QPs waiting for RNR timer */
+ spinlock_t pending_lock; /* protect wait lists, PMA counters, etc. */
+ unsigned qp_table_size; /* size of the hash table */
+ spinlock_t qpt_lock;
+
+ u32 n_piowait;
+ u32 n_txwait;
+
+ u32 n_pds_allocated; /* number of PDs allocated for device */
+ spinlock_t n_pds_lock;
+ u32 n_ahs_allocated; /* number of AHs allocated for device */
+ spinlock_t n_ahs_lock;
+ u32 n_cqs_allocated; /* number of CQs allocated for device */
+ spinlock_t n_cqs_lock;
+ u32 n_qps_allocated; /* number of QPs allocated for device */
+ spinlock_t n_qps_lock;
+ u32 n_srqs_allocated; /* number of SRQs allocated for device */
+ spinlock_t n_srqs_lock;
+ u32 n_mcast_grps_allocated; /* number of mcast groups allocated */
+ spinlock_t n_mcast_grps_lock;
+};
+
+struct qib_verbs_counters {
+ u64 symbol_error_counter;
+ u64 link_error_recovery_counter;
+ u64 link_downed_counter;
+ u64 port_rcv_errors;
+ u64 port_rcv_remphys_errors;
+ u64 port_xmit_discards;
+ u64 port_xmit_data;
+ u64 port_rcv_data;
+ u64 port_xmit_packets;
+ u64 port_rcv_packets;
+ u32 local_link_integrity_errors;
+ u32 excessive_buffer_overrun_errors;
+ u32 vl15_dropped;
+};
+
+static inline struct qib_mr *to_imr(struct ib_mr *ibmr)
+{
+ return container_of(ibmr, struct qib_mr, ibmr);
+}
+
+static inline struct qib_pd *to_ipd(struct ib_pd *ibpd)
+{
+ return container_of(ibpd, struct qib_pd, ibpd);
+}
+
+static inline struct qib_ah *to_iah(struct ib_ah *ibah)
+{
+ return container_of(ibah, struct qib_ah, ibah);
+}
+
+static inline struct qib_cq *to_icq(struct ib_cq *ibcq)
+{
+ return container_of(ibcq, struct qib_cq, ibcq);
+}
+
+static inline struct qib_srq *to_isrq(struct ib_srq *ibsrq)
+{
+ return container_of(ibsrq, struct qib_srq, ibsrq);
+}
+
+static inline struct qib_qp *to_iqp(struct ib_qp *ibqp)
+{
+ return container_of(ibqp, struct qib_qp, ibqp);
+}
+
+static inline struct qib_ibdev *to_idev(struct ib_device *ibdev)
+{
+ return container_of(ibdev, struct qib_ibdev, ibdev);
+}
+
+/*
+ * Send if not busy or waiting for I/O and either
+ * a RC response is pending or we can process send work requests.
+ */
+static inline int qib_send_ok(struct qib_qp *qp)
+{
+ return !(qp->s_flags & (QIB_S_BUSY | QIB_S_ANY_WAIT_IO)) &&
+ (qp->s_hdrwords || (qp->s_flags & QIB_S_RESP_PENDING) ||
+ !(qp->s_flags & QIB_S_ANY_WAIT_SEND));
+}
+
+extern struct workqueue_struct *qib_wq;
+extern struct workqueue_struct *qib_cq_wq;
+
+/*
+ * This must be called with s_lock held.
+ */
+static inline void qib_schedule_send(struct qib_qp *qp)
+{
+ if (qib_send_ok(qp)) {
+ if (qp->processor_id == smp_processor_id())
+ queue_work(qib_wq, &qp->s_work);
+ else
+ queue_work_on(qp->processor_id,
+ qib_wq, &qp->s_work);
+ }
+}
+
+static inline int qib_pkey_ok(u16 pkey1, u16 pkey2)
+{
+ u16 p1 = pkey1 & 0x7FFF;
+ u16 p2 = pkey2 & 0x7FFF;
+
+ /*
+ * Low 15 bits must be non-zero and match, and
+ * one of the two must be a full member.
+ */
+ return p1 && p1 == p2 && ((__s16)pkey1 < 0 || (__s16)pkey2 < 0);
+}
+
+void qib_bad_pqkey(struct qib_ibport *ibp, __be16 trap_num, u32 key, u32 sl,
+ u32 qp1, u32 qp2, __be16 lid1, __be16 lid2);
+void qib_cap_mask_chg(struct qib_ibport *ibp);
+void qib_sys_guid_chg(struct qib_ibport *ibp);
+void qib_node_desc_chg(struct qib_ibport *ibp);
+int qib_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
+ struct ib_wc *in_wc, struct ib_grh *in_grh,
+ struct ib_mad *in_mad, struct ib_mad *out_mad);
+int qib_create_agents(struct qib_ibdev *dev);
+void qib_free_agents(struct qib_ibdev *dev);
+
+/*
+ * Compare the lower 24 bits of the two values.
+ * Returns an integer <, ==, or > than zero.
+ */
+static inline int qib_cmp24(u32 a, u32 b)
+{
+ return (((int) a) - ((int) b)) << 8;
+}
+
+struct qib_mcast *qib_mcast_find(struct qib_ibport *ibp, union ib_gid *mgid);
+
+int qib_snapshot_counters(struct qib_pportdata *ppd, u64 *swords,
+ u64 *rwords, u64 *spkts, u64 *rpkts,
+ u64 *xmit_wait);
+
+int qib_get_counters(struct qib_pportdata *ppd,
+ struct qib_verbs_counters *cntrs);
+
+int qib_multicast_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int qib_multicast_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid);
+
+int qib_mcast_tree_empty(struct qib_ibport *ibp);
+
+__be32 qib_compute_aeth(struct qib_qp *qp);
+
+struct qib_qp *qib_lookup_qpn(struct qib_ibport *ibp, u32 qpn);
+
+struct ib_qp *qib_create_qp(struct ib_pd *ibpd,
+ struct ib_qp_init_attr *init_attr,
+ struct ib_udata *udata);
+
+int qib_destroy_qp(struct ib_qp *ibqp);
+
+int qib_error_qp(struct qib_qp *qp, enum ib_wc_status err);
+
+int qib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_udata *udata);
+
+int qib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+ int attr_mask, struct ib_qp_init_attr *init_attr);
+
+unsigned qib_free_all_qps(struct qib_devdata *dd);
+
+void qib_init_qpn_table(struct qib_devdata *dd, struct qib_qpn_table *qpt);
+
+void qib_free_qpn_table(struct qib_qpn_table *qpt);
+
+void qib_get_credit(struct qib_qp *qp, u32 aeth);
+
+unsigned qib_pkt_delay(u32 plen, u8 snd_mult, u8 rcv_mult);
+
+void qib_verbs_sdma_desc_avail(struct qib_pportdata *ppd, unsigned avail);
+
+void qib_put_txreq(struct qib_verbs_txreq *tx);
+
+int qib_verbs_send(struct qib_qp *qp, struct qib_ib_header *hdr,
+ u32 hdrwords, struct qib_sge_state *ss, u32 len);
+
+void qib_copy_sge(struct qib_sge_state *ss, void *data, u32 length,
+ int release);
+
+void qib_skip_sge(struct qib_sge_state *ss, u32 length, int release);
+
+void qib_uc_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+void qib_rc_rcv(struct qib_ctxtdata *rcd, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+int qib_check_ah(struct ib_device *ibdev, struct ib_ah_attr *ah_attr);
+
+void qib_rc_rnr_retry(unsigned long arg);
+
+void qib_rc_send_complete(struct qib_qp *qp, struct qib_ib_header *hdr);
+
+void qib_rc_error(struct qib_qp *qp, enum ib_wc_status err);
+
+int qib_post_ud_send(struct qib_qp *qp, struct ib_send_wr *wr);
+
+void qib_ud_rcv(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, void *data, u32 tlen, struct qib_qp *qp);
+
+int qib_alloc_lkey(struct qib_lkey_table *rkt, struct qib_mregion *mr);
+
+int qib_free_lkey(struct qib_ibdev *dev, struct qib_mregion *mr);
+
+int qib_lkey_ok(struct qib_lkey_table *rkt, struct qib_pd *pd,
+ struct qib_sge *isge, struct ib_sge *sge, int acc);
+
+int qib_rkey_ok(struct qib_qp *qp, struct qib_sge *sge,
+ u32 len, u64 vaddr, u32 rkey, int acc);
+
+int qib_post_srq_receive(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
+ struct ib_recv_wr **bad_wr);
+
+struct ib_srq *qib_create_srq(struct ib_pd *ibpd,
+ struct ib_srq_init_attr *srq_init_attr,
+ struct ib_udata *udata);
+
+int qib_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
+ enum ib_srq_attr_mask attr_mask,
+ struct ib_udata *udata);
+
+int qib_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr);
+
+int qib_destroy_srq(struct ib_srq *ibsrq);
+
+void qib_cq_enter(struct qib_cq *cq, struct ib_wc *entry, int sig);
+
+int qib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *entry);
+
+struct ib_cq *qib_create_cq(struct ib_device *ibdev, int entries,
+ int comp_vector, struct ib_ucontext *context,
+ struct ib_udata *udata);
+
+int qib_destroy_cq(struct ib_cq *ibcq);
+
+int qib_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags notify_flags);
+
+int qib_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata);
+
+struct ib_mr *qib_get_dma_mr(struct ib_pd *pd, int acc);
+
+struct ib_mr *qib_reg_phys_mr(struct ib_pd *pd,
+ struct ib_phys_buf *buffer_list,
+ int num_phys_buf, int acc, u64 *iova_start);
+
+struct ib_mr *qib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ u64 virt_addr, int mr_access_flags,
+ struct ib_udata *udata);
+
+int qib_dereg_mr(struct ib_mr *ibmr);
+
+struct ib_mr *qib_alloc_fast_reg_mr(struct ib_pd *pd, int max_page_list_len);
+
+struct ib_fast_reg_page_list *qib_alloc_fast_reg_page_list(
+ struct ib_device *ibdev, int page_list_len);
+
+void qib_free_fast_reg_page_list(struct ib_fast_reg_page_list *pl);
+
+int qib_fast_reg_mr(struct qib_qp *qp, struct ib_send_wr *wr);
+
+struct ib_fmr *qib_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
+ struct ib_fmr_attr *fmr_attr);
+
+int qib_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
+ int list_len, u64 iova);
+
+int qib_unmap_fmr(struct list_head *fmr_list);
+
+int qib_dealloc_fmr(struct ib_fmr *ibfmr);
+
+void qib_release_mmap_info(struct kref *ref);
+
+struct qib_mmap_info *qib_create_mmap_info(struct qib_ibdev *dev, u32 size,
+ struct ib_ucontext *context,
+ void *obj);
+
+void qib_update_mmap_info(struct qib_ibdev *dev, struct qib_mmap_info *ip,
+ u32 size, void *obj);
+
+int qib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma);
+
+int qib_get_rwqe(struct qib_qp *qp, int wr_id_only);
+
+void qib_migrate_qp(struct qib_qp *qp);
+
+int qib_ruc_check_hdr(struct qib_ibport *ibp, struct qib_ib_header *hdr,
+ int has_grh, struct qib_qp *qp, u32 bth0);
+
+u32 qib_make_grh(struct qib_ibport *ibp, struct ib_grh *hdr,
+ struct ib_global_route *grh, u32 hwords, u32 nwords);
+
+void qib_make_ruc_header(struct qib_qp *qp, struct qib_other_headers *ohdr,
+ u32 bth0, u32 bth2);
+
+void qib_do_send(struct work_struct *work);
+
+void qib_send_complete(struct qib_qp *qp, struct qib_swqe *wqe,
+ enum ib_wc_status status);
+
+void qib_send_rc_ack(struct qib_qp *qp);
+
+int qib_make_rc_req(struct qib_qp *qp);
+
+int qib_make_uc_req(struct qib_qp *qp);
+
+int qib_make_ud_req(struct qib_qp *qp);
+
+int qib_register_ib_device(struct qib_devdata *);
+
+void qib_unregister_ib_device(struct qib_devdata *);
+
+void qib_ib_rcv(struct qib_ctxtdata *, void *, void *, u32);
+
+void qib_ib_piobufavail(struct qib_devdata *);
+
+unsigned qib_get_npkeys(struct qib_devdata *);
+
+unsigned qib_get_pkey(struct qib_ibport *, unsigned);
+
+extern const enum ib_wc_opcode ib_qib_wc_opcode[];
+
+/*
+ * Below HCA-independent IB PhysPortState values, returned
+ * by the f_ibphys_portstate() routine.
+ */
+#define IB_PHYSPORTSTATE_SLEEP 1
+#define IB_PHYSPORTSTATE_POLL 2
+#define IB_PHYSPORTSTATE_DISABLED 3
+#define IB_PHYSPORTSTATE_CFG_TRAIN 4
+#define IB_PHYSPORTSTATE_LINKUP 5
+#define IB_PHYSPORTSTATE_LINK_ERR_RECOVER 6
+#define IB_PHYSPORTSTATE_CFG_DEBOUNCE 8
+#define IB_PHYSPORTSTATE_CFG_IDLE 0xB
+#define IB_PHYSPORTSTATE_RECOVERY_RETRAIN 0xC
+#define IB_PHYSPORTSTATE_RECOVERY_WAITRMT 0xE
+#define IB_PHYSPORTSTATE_RECOVERY_IDLE 0xF
+#define IB_PHYSPORTSTATE_CFG_ENH 0x10
+#define IB_PHYSPORTSTATE_CFG_WAIT_ENH 0x13
+
+extern const int ib_qib_state_ops[];
+
+extern __be64 ib_qib_sys_image_guid; /* in network order */
+
+extern unsigned int ib_qib_lkey_table_size;
+
+extern unsigned int ib_qib_max_cqes;
+
+extern unsigned int ib_qib_max_cqs;
+
+extern unsigned int ib_qib_max_qp_wrs;
+
+extern unsigned int ib_qib_max_qps;
+
+extern unsigned int ib_qib_max_sges;
+
+extern unsigned int ib_qib_max_mcast_grps;
+
+extern unsigned int ib_qib_max_mcast_qp_attached;
+
+extern unsigned int ib_qib_max_srqs;
+
+extern unsigned int ib_qib_max_srq_sges;
+
+extern unsigned int ib_qib_max_srq_wrs;
+
+extern const u32 ib_qib_rnr_table[];
+
+extern struct ib_dma_mapping_ops qib_dma_mapping_ops;
+
+#endif /* QIB_VERBS_H */
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
* [PATCH v4 10/11] IB/qib: Add qib_sd7220.c
From: Ralph Campbell @ 2010-05-19 0:27 UTC (permalink / raw)
To: Roland Dreier; +Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20100519002647.11950.62870.stgit-/vjeY7uYZjrPXfVEPVhPGq6RkeBMCJyt@public.gmane.org>
creates the qib_sd7220.c file.
Signed-off-by: Ralph Campbell <ralph.campbell-h88ZbnxC6KDQT0dZR+AlfA@public.gmane.org>
---
drivers/infiniband/hw/qib/qib_sd7220.c | 1413 ++++++++++++++++++++++++++++++++
1 files changed, 1413 insertions(+), 0 deletions(-)
create mode 100644 drivers/infiniband/hw/qib/qib_sd7220.c
diff --git a/drivers/infiniband/hw/qib/qib_sd7220.c b/drivers/infiniband/hw/qib/qib_sd7220.c
new file mode 100644
index 0000000..0aeed0e
--- /dev/null
+++ b/drivers/infiniband/hw/qib/qib_sd7220.c
@@ -0,0 +1,1413 @@
+/*
+ * Copyright (c) 2006, 2007, 2008, 2009 QLogic Corporation. All rights reserved.
+ * Copyright (c) 2003, 2004, 2005, 2006 PathScale, Inc. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses. You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+/*
+ * This file contains all of the code that is specific to the SerDes
+ * on the QLogic_IB 7220 chip.
+ */
+
+#include <linux/pci.h>
+#include <linux/delay.h>
+
+#include "qib.h"
+#include "qib_7220.h"
+
+/*
+ * Same as in qib_iba7220.c, but just the registers needed here.
+ * Could move whole set to qib_7220.h, but decided better to keep
+ * local.
+ */
+#define KREG_IDX(regname) (QIB_7220_##regname##_OFFS / sizeof(u64))
+#define kr_hwerrclear KREG_IDX(HwErrClear)
+#define kr_hwerrmask KREG_IDX(HwErrMask)
+#define kr_hwerrstatus KREG_IDX(HwErrStatus)
+#define kr_ibcstatus KREG_IDX(IBCStatus)
+#define kr_ibserdesctrl KREG_IDX(IBSerDesCtrl)
+#define kr_scratch KREG_IDX(Scratch)
+#define kr_xgxs_cfg KREG_IDX(XGXSCfg)
+/* these are used only here, not in qib_iba7220.c */
+#define kr_ibsd_epb_access_ctrl KREG_IDX(ibsd_epb_access_ctrl)
+#define kr_ibsd_epb_transaction_reg KREG_IDX(ibsd_epb_transaction_reg)
+#define kr_pciesd_epb_transaction_reg KREG_IDX(pciesd_epb_transaction_reg)
+#define kr_pciesd_epb_access_ctrl KREG_IDX(pciesd_epb_access_ctrl)
+#define kr_serdes_ddsrxeq0 KREG_IDX(SerDes_DDSRXEQ0)
+
+/*
+ * The IBSerDesMappTable is a memory that holds values to be stored in
+ * various SerDes registers by IBC.
+ */
+#define kr_serdes_maptable KREG_IDX(IBSerDesMappTable)
+
+/*
+ * Below used for sdnum parameter, selecting one of the two sections
+ * used for PCIe, or the single SerDes used for IB.
+ */
+#define PCIE_SERDES0 0
+#define PCIE_SERDES1 1
+
+/*
+ * The EPB requires addressing in a particular form. EPB_LOC() is intended
+ * to make #definitions a little more readable.
+ */
+#define EPB_ADDR_SHF 8
+#define EPB_LOC(chn, elt, reg) \
+ (((elt & 0xf) | ((chn & 7) << 4) | ((reg & 0x3f) << 9)) << \
+ EPB_ADDR_SHF)
+#define EPB_IB_QUAD0_CS_SHF (25)
+#define EPB_IB_QUAD0_CS (1U << EPB_IB_QUAD0_CS_SHF)
+#define EPB_IB_UC_CS_SHF (26)
+#define EPB_PCIE_UC_CS_SHF (27)
+#define EPB_GLOBAL_WR (1U << (EPB_ADDR_SHF + 8))
+
+/* Forward declarations. */
+static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc,
+ u32 data, u32 mask);
+static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
+ int mask);
+static int qib_sd_trimdone_poll(struct qib_devdata *dd);
+static void qib_sd_trimdone_monitor(struct qib_devdata *dd, const char *where);
+static int qib_sd_setvals(struct qib_devdata *dd);
+static int qib_sd_early(struct qib_devdata *dd);
+static int qib_sd_dactrim(struct qib_devdata *dd);
+static int qib_internal_presets(struct qib_devdata *dd);
+/* Tweak the register (CMUCTRL5) that contains the TRIMSELF controls */
+static int qib_sd_trimself(struct qib_devdata *dd, int val);
+static int epb_access(struct qib_devdata *dd, int sdnum, int claim);
+
+/*
+ * Below keeps track of whether the "once per power-on" initialization has
+ * been done, because uC code Version 1.32.17 or higher allows the uC to
+ * be reset at will, and Automatic Equalization may require it. So the
+ * state of the reset "pin", is no longer valid. Instead, we check for the
+ * actual uC code having been loaded.
+ */
+static int qib_ibsd_ucode_loaded(struct qib_pportdata *ppd)
+{
+ struct qib_devdata *dd = ppd->dd;
+ if (!dd->cspec->serdes_first_init_done && (qib_sd7220_ib_vfy(dd) > 0))
+ dd->cspec->serdes_first_init_done = 1;
+ return dd->cspec->serdes_first_init_done;
+}
+
+/* repeat #define for local use. "Real" #define is in qib_iba7220.c */
+#define QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR 0x0000004000000000ULL
+#define IB_MPREG5 (EPB_LOC(6, 0, 0xE) | (1L << EPB_IB_UC_CS_SHF))
+#define IB_MPREG6 (EPB_LOC(6, 0, 0xF) | (1U << EPB_IB_UC_CS_SHF))
+#define UC_PAR_CLR_D 8
+#define UC_PAR_CLR_M 0xC
+#define IB_CTRL2(chn) (EPB_LOC(chn, 7, 3) | EPB_IB_QUAD0_CS)
+#define START_EQ1(chan) EPB_LOC(chan, 7, 0x27)
+
+void qib_sd7220_clr_ibpar(struct qib_devdata *dd)
+{
+ int ret;
+
+ /* clear, then re-enable parity errs */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6,
+ UC_PAR_CLR_D, UC_PAR_CLR_M);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed clearing IBSerDes Parity err\n");
+ goto bail;
+ }
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0,
+ UC_PAR_CLR_M);
+
+ qib_read_kreg32(dd, kr_scratch);
+ udelay(4);
+ qib_write_kreg(dd, kr_hwerrclear,
+ QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+ qib_read_kreg32(dd, kr_scratch);
+bail:
+ return;
+}
+
+/*
+ * After a reset or other unusual event, the epb interface may need
+ * to be re-synchronized, between the host and the uC.
+ * returns <0 for failure to resync within IBSD_RESYNC_TRIES (not expected)
+ */
+#define IBSD_RESYNC_TRIES 3
+#define IB_PGUDP(chn) (EPB_LOC((chn), 2, 1) | EPB_IB_QUAD0_CS)
+#define IB_CMUDONE(chn) (EPB_LOC((chn), 7, 0xF) | EPB_IB_QUAD0_CS)
+
+static int qib_resync_ibepb(struct qib_devdata *dd)
+{
+ int ret, pat, tries, chn;
+ u32 loc;
+
+ ret = -1;
+ chn = 0;
+ for (tries = 0; tries < (4 * IBSD_RESYNC_TRIES); ++tries) {
+ loc = IB_PGUDP(chn);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed read in resync\n");
+ continue;
+ }
+ if (ret != 0xF0 && ret != 0x55 && tries == 0)
+ qib_dev_err(dd, "unexpected pattern in resync\n");
+ pat = ret ^ 0xA5; /* alternate F0 and 55 */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, pat, 0xFF);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed write in resync\n");
+ continue;
+ }
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed re-read in resync\n");
+ continue;
+ }
+ if (ret != pat) {
+ qib_dev_err(dd, "Failed compare1 in resync\n");
+ continue;
+ }
+ loc = IB_CMUDONE(chn);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, 0, 0);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed CMUDONE rd in resync\n");
+ continue;
+ }
+ if ((ret & 0x70) != ((chn << 4) | 0x40)) {
+ qib_dev_err(dd, "Bad CMUDONE value %02X, chn %d\n",
+ ret, chn);
+ continue;
+ }
+ if (++chn == 4)
+ break; /* Success */
+ }
+ return (ret > 0) ? 0 : ret;
+}
+
+/*
+ * Localize the stuff that should be done to change IB uC reset
+ * returns <0 for errors.
+ */
+static int qib_ibsd_reset(struct qib_devdata *dd, int assert_rst)
+{
+ u64 rst_val;
+ int ret = 0;
+ unsigned long flags;
+
+ rst_val = qib_read_kreg64(dd, kr_ibserdesctrl);
+ if (assert_rst) {
+ /*
+ * Vendor recommends "interrupting" uC before reset, to
+ * minimize possible glitches.
+ */
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+ epb_access(dd, IB_7220_SERDES, 1);
+ rst_val |= 1ULL;
+ /* Squelch possible parity error from _asserting_ reset */
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask &
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+ qib_write_kreg(dd, kr_ibserdesctrl, rst_val);
+ /* flush write, delay to ensure it took effect */
+ qib_read_kreg32(dd, kr_scratch);
+ udelay(2);
+ /* once it's reset, can remove interrupt */
+ epb_access(dd, IB_7220_SERDES, -1);
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ } else {
+ /*
+ * Before we de-assert reset, we need to deal with
+ * possible glitch on the Parity-error line.
+ * Suppress it around the reset, both in chip-level
+ * hwerrmask and in IB uC control reg. uC will allow
+ * it again during startup.
+ */
+ u64 val;
+ rst_val &= ~(1ULL);
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask &
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR);
+
+ ret = qib_resync_ibepb(dd);
+ if (ret < 0)
+ qib_dev_err(dd, "unable to re-sync IB EPB\n");
+
+ /* set uC control regs to suppress parity errs */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG5, 1, 1);
+ if (ret < 0)
+ goto bail;
+ /* IB uC code past Version 1.32.17 allow suppression of wdog */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80,
+ 0x80);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to set WDOG disable\n");
+ goto bail;
+ }
+ qib_write_kreg(dd, kr_ibserdesctrl, rst_val);
+ /* flush write, delay for startup */
+ qib_read_kreg32(dd, kr_scratch);
+ udelay(1);
+ /* clear, then re-enable parity errs */
+ qib_sd7220_clr_ibpar(dd);
+ val = qib_read_kreg64(dd, kr_hwerrstatus);
+ if (val & QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR) {
+ qib_dev_err(dd, "IBUC Parity still set after RST\n");
+ dd->cspec->hwerrmask &=
+ ~QLOGIC_IB_HWE_IB_UC_MEMORYPARITYERR;
+ }
+ qib_write_kreg(dd, kr_hwerrmask,
+ dd->cspec->hwerrmask);
+ }
+
+bail:
+ return ret;
+}
+
+static void qib_sd_trimdone_monitor(struct qib_devdata *dd,
+ const char *where)
+{
+ int ret, chn, baduns;
+ u64 val;
+
+ if (!where)
+ where = "?";
+
+ /* give time for reset to settle out in EPB */
+ udelay(2);
+
+ ret = qib_resync_ibepb(dd);
+ if (ret < 0)
+ qib_dev_err(dd, "not able to re-sync IB EPB (%s)\n", where);
+
+ /* Do "sacrificial read" to get EPB in sane state after reset */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_CTRL2(0), 0, 0);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed TRIMDONE 1st read, (%s)\n", where);
+
+ /* Check/show "summary" Trim-done bit in IBCStatus */
+ val = qib_read_kreg64(dd, kr_ibcstatus);
+ if (!(val & (1ULL << 11)))
+ qib_dev_err(dd, "IBCS TRIMDONE clear (%s)\n", where);
+ /*
+ * Do "dummy read/mod/wr" to get EPB in sane state after reset
+ * The default value for MPREG6 is 0.
+ */
+ udelay(2);
+
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, IB_MPREG6, 0x80, 0x80);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed Dummy RMW, (%s)\n", where);
+ udelay(10);
+
+ baduns = 0;
+
+ for (chn = 3; chn >= 0; --chn) {
+ /* Read CTRL reg for each channel to check TRIMDONE */
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0, 0);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed checking TRIMDONE, chn %d"
+ " (%s)\n", chn, where);
+
+ if (!(ret & 0x10)) {
+ int probe;
+
+ baduns |= (1 << chn);
+ qib_dev_err(dd, "TRIMDONE cleared on chn %d (%02X)."
+ " (%s)\n", chn, ret, where);
+ probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_PGUDP(0), 0, 0);
+ qib_dev_err(dd, "probe is %d (%02X)\n",
+ probe, probe);
+ probe = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0, 0);
+ qib_dev_err(dd, "re-read: %d (%02X)\n",
+ probe, probe);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0x10, 0x10);
+ if (ret < 0)
+ qib_dev_err(dd,
+ "Err on TRIMDONE rewrite1\n");
+ }
+ }
+ for (chn = 3; chn >= 0; --chn) {
+ /* Read CTRL reg for each channel to check TRIMDONE */
+ if (baduns & (1 << chn)) {
+ qib_dev_err(dd,
+ "Reseting TRIMDONE on chn %d (%s)\n",
+ chn, where);
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ IB_CTRL2(chn), 0x10, 0x10);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed re-setting "
+ "TRIMDONE, chn %d (%s)\n",
+ chn, where);
+ }
+ }
+}
+
+/*
+ * Below is portion of IBA7220-specific bringup_serdes() that actually
+ * deals with registers and memory within the SerDes itself.
+ * Post IB uC code version 1.32.17, was_reset being 1 is not really
+ * informative, so we double-check.
+ */
+int qib_sd7220_init(struct qib_devdata *dd)
+{
+ int ret = 1; /* default to failure */
+ int first_reset, was_reset;
+
+ /* SERDES MPU reset recorded in D0 */
+ was_reset = (qib_read_kreg64(dd, kr_ibserdesctrl) & 1);
+ if (!was_reset) {
+ /* entered with reset not asserted, we need to do it */
+ qib_ibsd_reset(dd, 1);
+ qib_sd_trimdone_monitor(dd, "Driver-reload");
+ }
+ /* Substitute our deduced value for was_reset */
+ ret = qib_ibsd_ucode_loaded(dd->pport);
+ if (ret < 0)
+ goto bail;
+
+ first_reset = !ret; /* First reset if IBSD uCode not yet loaded */
+ /*
+ * Alter some regs per vendor latest doc, reset-defaults
+ * are not right for IB.
+ */
+ ret = qib_sd_early(dd);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to set IB SERDES early defaults\n");
+ goto bail;
+ }
+ /*
+ * Set DAC manual trim IB.
+ * We only do this once after chip has been reset (usually
+ * same as once per system boot).
+ */
+ if (first_reset) {
+ ret = qib_sd_dactrim(dd);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed IB SERDES DAC trim\n");
+ goto bail;
+ }
+ }
+ /*
+ * Set various registers (DDS and RXEQ) that will be
+ * controlled by IBC (in 1.2 mode) to reasonable preset values
+ * Calling the "internal" version avoids the "check for needed"
+ * and "trimdone monitor" that might be counter-productive.
+ */
+ ret = qib_internal_presets(dd);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to set IB SERDES presets\n");
+ goto bail;
+ }
+ ret = qib_sd_trimself(dd, 0x80);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to set IB SERDES TRIMSELF\n");
+ goto bail;
+ }
+
+ /* Load image, then try to verify */
+ ret = 0; /* Assume success */
+ if (first_reset) {
+ int vfy;
+ int trim_done;
+
+ ret = qib_sd7220_ib_load(dd);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed to load IB SERDES image\n");
+ goto bail;
+ } else {
+ /* Loaded image, try to verify */
+ vfy = qib_sd7220_ib_vfy(dd);
+ if (vfy != ret) {
+ qib_dev_err(dd, "SERDES PRAM VFY failed\n");
+ goto bail;
+ } /* end if verified */
+ } /* end if loaded */
+
+ /*
+ * Loaded and verified. Almost good...
+ * hold "success" in ret
+ */
+ ret = 0;
+ /*
+ * Prev steps all worked, continue bringup
+ * De-assert RESET to uC, only in first reset, to allow
+ * trimming.
+ *
+ * Since our default setup sets START_EQ1 to
+ * PRESET, we need to clear that for this very first run.
+ */
+ ret = ibsd_mod_allchnls(dd, START_EQ1(0), 0, 0x38);
+ if (ret < 0) {
+ qib_dev_err(dd, "Failed clearing START_EQ1\n");
+ goto bail;
+ }
+
+ qib_ibsd_reset(dd, 0);
+ /*
+ * If this is not the first reset, trimdone should be set
+ * already. We may need to check about this.
+ */
+ trim_done = qib_sd_trimdone_poll(dd);
+ /*
+ * Whether or not trimdone succeeded, we need to put the
+ * uC back into reset to avoid a possible fight with the
+ * IBC state-machine.
+ */
+ qib_ibsd_reset(dd, 1);
+
+ if (!trim_done) {
+ qib_dev_err(dd, "No TRIMDONE seen\n");
+ goto bail;
+ }
+ /*
+ * DEBUG: check each time we reset if trimdone bits have
+ * gotten cleared, and re-set them.
+ */
+ qib_sd_trimdone_monitor(dd, "First-reset");
+ /* Remember so we do not re-do the load, dactrim, etc. */
+ dd->cspec->serdes_first_init_done = 1;
+ }
+ /*
+ * setup for channel training and load values for
+ * RxEq and DDS in tables used by IBC in IB1.2 mode
+ */
+ ret = 0;
+ if (qib_sd_setvals(dd) >= 0)
+ goto done;
+bail:
+ ret = 1;
+done:
+ /* start relock timer regardless, but start at 1 second */
+ set_7220_relock_poll(dd, -1);
+ return ret;
+}
+
+#define EPB_ACC_REQ 1
+#define EPB_ACC_GNT 0x100
+#define EPB_DATA_MASK 0xFF
+#define EPB_RD (1ULL << 24)
+#define EPB_TRANS_RDY (1ULL << 31)
+#define EPB_TRANS_ERR (1ULL << 30)
+#define EPB_TRANS_TRIES 5
+
+/*
+ * query, claim, release ownership of the EPB (External Parallel Bus)
+ * for a specified SERDES.
+ * the "claim" parameter is >0 to claim, <0 to release, 0 to query.
+ * Returns <0 for errors, >0 if we had ownership, else 0.
+ */
+static int epb_access(struct qib_devdata *dd, int sdnum, int claim)
+{
+ u16 acc;
+ u64 accval;
+ int owned = 0;
+ u64 oct_sel = 0;
+
+ switch (sdnum) {
+ case IB_7220_SERDES:
+ /*
+ * The IB SERDES "ownership" is fairly simple. A single each
+ * request/grant.
+ */
+ acc = kr_ibsd_epb_access_ctrl;
+ break;
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
+ /* PCIe SERDES has two "octants", need to select which */
+ acc = kr_pciesd_epb_access_ctrl;
+ oct_sel = (2 << (sdnum - PCIE_SERDES0));
+ break;
+
+ default:
+ return 0;
+ }
+
+ /* Make sure any outstanding transaction was seen */
+ qib_read_kreg32(dd, kr_scratch);
+ udelay(15);
+
+ accval = qib_read_kreg32(dd, acc);
+
+ owned = !!(accval & EPB_ACC_GNT);
+ if (claim < 0) {
+ /* Need to release */
+ u64 pollval;
+ /*
+ * The only writeable bits are the request and CS.
+ * Both should be clear
+ */
+ u64 newval = 0;
+ qib_write_kreg(dd, acc, newval);
+ /* First read after write is not trustworthy */
+ pollval = qib_read_kreg32(dd, acc);
+ udelay(5);
+ pollval = qib_read_kreg32(dd, acc);
+ if (pollval & EPB_ACC_GNT)
+ owned = -1;
+ } else if (claim > 0) {
+ /* Need to claim */
+ u64 pollval;
+ u64 newval = EPB_ACC_REQ | oct_sel;
+ qib_write_kreg(dd, acc, newval);
+ /* First read after write is not trustworthy */
+ pollval = qib_read_kreg32(dd, acc);
+ udelay(5);
+ pollval = qib_read_kreg32(dd, acc);
+ if (!(pollval & EPB_ACC_GNT))
+ owned = -1;
+ }
+ return owned;
+}
+
+/*
+ * Lemma to deal with race condition of write..read to epb regs
+ */
+static int epb_trans(struct qib_devdata *dd, u16 reg, u64 i_val, u64 *o_vp)
+{
+ int tries;
+ u64 transval;
+
+ qib_write_kreg(dd, reg, i_val);
+ /* Throw away first read, as RDY bit may be stale */
+ transval = qib_read_kreg64(dd, reg);
+
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = qib_read_kreg32(dd, reg);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+ if (transval & EPB_TRANS_ERR)
+ return -1;
+ if (tries > 0 && o_vp)
+ *o_vp = transval;
+ return tries;
+}
+
+/**
+ * qib_sd7220_reg_mod - modify SERDES register
+ * @dd: the qlogic_ib device
+ * @sdnum: which SERDES to access
+ * @loc: location - channel, element, register, as packed by EPB_LOC() macro.
+ * @wd: Write Data - value to set in register
+ * @mask: ones where data should be spliced into reg.
+ *
+ * Basic register read/modify/write, with un-needed acesses elided. That is,
+ * a mask of zero will prevent write, while a mask of 0xFF will prevent read.
+ * returns current (presumed, if a write was done) contents of selected
+ * register, or <0 if errors.
+ */
+static int qib_sd7220_reg_mod(struct qib_devdata *dd, int sdnum, u32 loc,
+ u32 wd, u32 mask)
+{
+ u16 trans;
+ u64 transval;
+ int owned;
+ int tries, ret;
+ unsigned long flags;
+
+ switch (sdnum) {
+ case IB_7220_SERDES:
+ trans = kr_ibsd_epb_transaction_reg;
+ break;
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
+ trans = kr_pciesd_epb_transaction_reg;
+ break;
+
+ default:
+ return -1;
+ }
+
+ /*
+ * All access is locked in software (vs other host threads) and
+ * hardware (vs uC access).
+ */
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+
+ owned = epb_access(dd, sdnum, 1);
+ if (owned < 0) {
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ return -1;
+ }
+ ret = 0;
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = qib_read_kreg32(dd, trans);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+
+ if (tries > 0) {
+ tries = 1; /* to make read-skip work */
+ if (mask != 0xFF) {
+ /*
+ * Not a pure write, so need to read.
+ * loc encodes chip-select as well as address
+ */
+ transval = loc | EPB_RD;
+ tries = epb_trans(dd, trans, transval, &transval);
+ }
+ if (tries > 0 && mask != 0) {
+ /*
+ * Not a pure read, so need to write.
+ */
+ wd = (wd & mask) | (transval & ~mask);
+ transval = loc | (wd & EPB_DATA_MASK);
+ tries = epb_trans(dd, trans, transval, &transval);
+ }
+ }
+ /* else, failed to see ready, what error-handling? */
+
+ /*
+ * Release bus. Failure is an error.
+ */
+ if (epb_access(dd, sdnum, -1) < 0)
+ ret = -1;
+ else
+ ret = transval & EPB_DATA_MASK;
+
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ if (tries <= 0)
+ ret = -1;
+ return ret;
+}
+
+#define EPB_ROM_R (2)
+#define EPB_ROM_W (1)
+/*
+ * Below, all uC-related, use appropriate UC_CS, depending
+ * on which SerDes is used.
+ */
+#define EPB_UC_CTL EPB_LOC(6, 0, 0)
+#define EPB_MADDRL EPB_LOC(6, 0, 2)
+#define EPB_MADDRH EPB_LOC(6, 0, 3)
+#define EPB_ROMDATA EPB_LOC(6, 0, 4)
+#define EPB_RAMDATA EPB_LOC(6, 0, 5)
+
+/* Transfer date to/from uC Program RAM of IB or PCIe SerDes */
+static int qib_sd7220_ram_xfer(struct qib_devdata *dd, int sdnum, u32 loc,
+ u8 *buf, int cnt, int rd_notwr)
+{
+ u16 trans;
+ u64 transval;
+ u64 csbit;
+ int owned;
+ int tries;
+ int sofar;
+ int addr;
+ int ret;
+ unsigned long flags;
+ const char *op;
+
+ /* Pick appropriate transaction reg and "Chip select" for this serdes */
+ switch (sdnum) {
+ case IB_7220_SERDES:
+ csbit = 1ULL << EPB_IB_UC_CS_SHF;
+ trans = kr_ibsd_epb_transaction_reg;
+ break;
+
+ case PCIE_SERDES0:
+ case PCIE_SERDES1:
+ /* PCIe SERDES has uC "chip select" in different bit, too */
+ csbit = 1ULL << EPB_PCIE_UC_CS_SHF;
+ trans = kr_pciesd_epb_transaction_reg;
+ break;
+
+ default:
+ return -1;
+ }
+
+ op = rd_notwr ? "Rd" : "Wr";
+ spin_lock_irqsave(&dd->cspec->sdepb_lock, flags);
+
+ owned = epb_access(dd, sdnum, 1);
+ if (owned < 0) {
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ return -1;
+ }
+
+ /*
+ * In future code, we may need to distinguish several address ranges,
+ * and select various memories based on this. For now, just trim
+ * "loc" (location including address and memory select) to
+ * "addr" (address within memory). we will only support PRAM
+ * The memory is 8KB.
+ */
+ addr = loc & 0x1FFF;
+ for (tries = EPB_TRANS_TRIES; tries; --tries) {
+ transval = qib_read_kreg32(dd, trans);
+ if (transval & EPB_TRANS_RDY)
+ break;
+ udelay(5);
+ }
+
+ sofar = 0;
+ if (tries > 0) {
+ /*
+ * Every "memory" access is doubly-indirect.
+ * We set two bytes of address, then read/write
+ * one or mores bytes of data.
+ */
+
+ /* First, we set control to "Read" or "Write" */
+ transval = csbit | EPB_UC_CTL |
+ (rd_notwr ? EPB_ROM_R : EPB_ROM_W);
+ tries = epb_trans(dd, trans, transval, &transval);
+ while (tries > 0 && sofar < cnt) {
+ if (!sofar) {
+ /* Only set address at start of chunk */
+ int addrbyte = (addr + sofar) >> 8;
+ transval = csbit | EPB_MADDRH | addrbyte;
+ tries = epb_trans(dd, trans, transval,
+ &transval);
+ if (tries <= 0)
+ break;
+ addrbyte = (addr + sofar) & 0xFF;
+ transval = csbit | EPB_MADDRL | addrbyte;
+ tries = epb_trans(dd, trans, transval,
+ &transval);
+ if (tries <= 0)
+ break;
+ }
+
+ if (rd_notwr)
+ transval = csbit | EPB_ROMDATA | EPB_RD;
+ else
+ transval = csbit | EPB_ROMDATA | buf[sofar];
+ tries = epb_trans(dd, trans, transval, &transval);
+ if (tries <= 0)
+ break;
+ if (rd_notwr)
+ buf[sofar] = transval & EPB_DATA_MASK;
+ ++sofar;
+ }
+ /* Finally, clear control-bit for Read or Write */
+ transval = csbit | EPB_UC_CTL;
+ tries = epb_trans(dd, trans, transval, &transval);
+ }
+
+ ret = sofar;
+ /* Release bus. Failure is an error */
+ if (epb_access(dd, sdnum, -1) < 0)
+ ret = -1;
+
+ spin_unlock_irqrestore(&dd->cspec->sdepb_lock, flags);
+ if (tries <= 0)
+ ret = -1;
+ return ret;
+}
+
+#define PROG_CHUNK 64
+
+int qib_sd7220_prog_ld(struct qib_devdata *dd, int sdnum,
+ u8 *img, int len, int offset)
+{
+ int cnt, sofar, req;
+
+ sofar = 0;
+ while (sofar < len) {
+ req = len - sofar;
+ if (req > PROG_CHUNK)
+ req = PROG_CHUNK;
+ cnt = qib_sd7220_ram_xfer(dd, sdnum, offset + sofar,
+ img + sofar, req, 0);
+ if (cnt < req) {
+ sofar = -1;
+ break;
+ }
+ sofar += req;
+ }
+ return sofar;
+}
+
+#define VFY_CHUNK 64
+#define SD_PRAM_ERROR_LIMIT 42
+
+int qib_sd7220_prog_vfy(struct qib_devdata *dd, int sdnum,
+ const u8 *img, int len, int offset)
+{
+ int cnt, sofar, req, idx, errors;
+ unsigned char readback[VFY_CHUNK];
+
+ errors = 0;
+ sofar = 0;
+ while (sofar < len) {
+ req = len - sofar;
+ if (req > VFY_CHUNK)
+ req = VFY_CHUNK;
+ cnt = qib_sd7220_ram_xfer(dd, sdnum, sofar + offset,
+ readback, req, 1);
+ if (cnt < req) {
+ /* failed in read itself */
+ sofar = -1;
+ break;
+ }
+ for (idx = 0; idx < cnt; ++idx) {
+ if (readback[idx] != img[idx+sofar])
+ ++errors;
+ }
+ sofar += cnt;
+ }
+ return errors ? -errors : sofar;
+}
+
+/*
+ * IRQ not set up at this point in init, so we poll.
+ */
+#define IB_SERDES_TRIM_DONE (1ULL << 11)
+#define TRIM_TMO (30)
+
+static int qib_sd_trimdone_poll(struct qib_devdata *dd)
+{
+ int trim_tmo, ret;
+ uint64_t val;
+
+ /*
+ * Default to failure, so IBC will not start
+ * without IB_SERDES_TRIM_DONE.
+ */
+ ret = 0;
+ for (trim_tmo = 0; trim_tmo < TRIM_TMO; ++trim_tmo) {
+ val = qib_read_kreg64(dd, kr_ibcstatus);
+ if (val & IB_SERDES_TRIM_DONE) {
+ ret = 1;
+ break;
+ }
+ msleep(10);
+ }
+ if (trim_tmo >= TRIM_TMO) {
+ qib_dev_err(dd, "No TRIMDONE in %d tries\n", trim_tmo);
+ ret = 0;
+ }
+ return ret;
+}
+
+#define TX_FAST_ELT (9)
+
+/*
+ * Set the "negotiation" values for SERDES. These are used by the IB1.2
+ * link negotiation. Macros below are attempt to keep the values a
+ * little more human-editable.
+ * First, values related to Drive De-emphasis Settings.
+ */
+
+#define NUM_DDS_REGS 6
+#define DDS_REG_MAP 0x76A910 /* LSB-first list of regs (in elt 9) to mod */
+
+#define DDS_VAL(amp_d, main_d, ipst_d, ipre_d, amp_s, main_s, ipst_s, ipre_s) \
+ { { ((amp_d & 0x1F) << 1) | 1, ((amp_s & 0x1F) << 1) | 1, \
+ (main_d << 3) | 4 | (ipre_d >> 2), \
+ (main_s << 3) | 4 | (ipre_s >> 2), \
+ ((ipst_d & 0xF) << 1) | ((ipre_d & 3) << 6) | 0x21, \
+ ((ipst_s & 0xF) << 1) | ((ipre_s & 3) << 6) | 0x21 } }
+
+static struct dds_init {
+ uint8_t reg_vals[NUM_DDS_REGS];
+} dds_init_vals[] = {
+ /* DDR(FDR) SDR(HDR) */
+ /* Vendor recommends below for 3m cable */
+#define DDS_3M 0
+ DDS_VAL(31, 19, 12, 0, 29, 22, 9, 0),
+ DDS_VAL(31, 12, 15, 4, 31, 15, 15, 1),
+ DDS_VAL(31, 13, 15, 3, 31, 16, 15, 0),
+ DDS_VAL(31, 14, 15, 2, 31, 17, 14, 0),
+ DDS_VAL(31, 15, 15, 1, 31, 18, 13, 0),
+ DDS_VAL(31, 16, 15, 0, 31, 19, 12, 0),
+ DDS_VAL(31, 17, 14, 0, 31, 20, 11, 0),
+ DDS_VAL(31, 18, 13, 0, 30, 21, 10, 0),
+ DDS_VAL(31, 20, 11, 0, 28, 23, 8, 0),
+ DDS_VAL(31, 21, 10, 0, 27, 24, 7, 0),
+ DDS_VAL(31, 22, 9, 0, 26, 25, 6, 0),
+ DDS_VAL(30, 23, 8, 0, 25, 26, 5, 0),
+ DDS_VAL(29, 24, 7, 0, 23, 27, 4, 0),
+ /* Vendor recommends below for 1m cable */
+#define DDS_1M 13
+ DDS_VAL(28, 25, 6, 0, 21, 28, 3, 0),
+ DDS_VAL(27, 26, 5, 0, 19, 29, 2, 0),
+ DDS_VAL(25, 27, 4, 0, 17, 30, 1, 0)
+};
+
+/*
+ * Now the RXEQ section of the table.
+ */
+/* Hardware packs an element number and register address thus: */
+#define RXEQ_INIT_RDESC(elt, addr) (((elt) & 0xF) | ((addr) << 4))
+#define RXEQ_VAL(elt, adr, val0, val1, val2, val3) \
+ {RXEQ_INIT_RDESC((elt), (adr)), {(val0), (val1), (val2), (val3)} }
+
+#define RXEQ_VAL_ALL(elt, adr, val) \
+ {RXEQ_INIT_RDESC((elt), (adr)), {(val), (val), (val), (val)} }
+
+#define RXEQ_SDR_DFELTH 0
+#define RXEQ_SDR_TLTH 0
+#define RXEQ_SDR_G1CNT_Z1CNT 0x11
+#define RXEQ_SDR_ZCNT 23
+
+static struct rxeq_init {
+ u16 rdesc; /* in form used in SerDesDDSRXEQ */
+ u8 rdata[4];
+} rxeq_init_vals[] = {
+ /* Set Rcv Eq. to Preset node */
+ RXEQ_VAL_ALL(7, 0x27, 0x10),
+ /* Set DFELTHFDR/HDR thresholds */
+ RXEQ_VAL(7, 8, 0, 0, 0, 0), /* FDR, was 0, 1, 2, 3 */
+ RXEQ_VAL(7, 0x21, 0, 0, 0, 0), /* HDR */
+ /* Set TLTHFDR/HDR theshold */
+ RXEQ_VAL(7, 9, 2, 2, 2, 2), /* FDR, was 0, 2, 4, 6 */
+ RXEQ_VAL(7, 0x23, 2, 2, 2, 2), /* HDR, was 0, 1, 2, 3 */
+ /* Set Preamp setting 2 (ZFR/ZCNT) */
+ RXEQ_VAL(7, 0x1B, 12, 12, 12, 12), /* FDR, was 12, 16, 20, 24 */
+ RXEQ_VAL(7, 0x1C, 12, 12, 12, 12), /* HDR, was 12, 16, 20, 24 */
+ /* Set Preamp DC gain and Setting 1 (GFR/GHR) */
+ RXEQ_VAL(7, 0x1E, 16, 16, 16, 16), /* FDR, was 16, 17, 18, 20 */
+ RXEQ_VAL(7, 0x1F, 16, 16, 16, 16), /* HDR, was 16, 17, 18, 20 */
+ /* Toggle RELOCK (in VCDL_CTRL0) to lock to data */
+ RXEQ_VAL_ALL(6, 6, 0x20), /* Set D5 High */
+ RXEQ_VAL_ALL(6, 6, 0), /* Set D5 Low */
+};
+
+/* There are 17 values from vendor, but IBC only accesses the first 16 */
+#define DDS_ROWS (16)
+#define RXEQ_ROWS ARRAY_SIZE(rxeq_init_vals)
+
+static int qib_sd_setvals(struct qib_devdata *dd)
+{
+ int idx, midx;
+ int min_idx; /* Minimum index for this portion of table */
+ uint32_t dds_reg_map;
+ u64 __iomem *taddr, *iaddr;
+ uint64_t data;
+ uint64_t sdctl;
+
+ taddr = dd->kregbase + kr_serdes_maptable;
+ iaddr = dd->kregbase + kr_serdes_ddsrxeq0;
+
+ /*
+ * Init the DDS section of the table.
+ * Each "row" of the table provokes NUM_DDS_REG writes, to the
+ * registers indicated in DDS_REG_MAP.
+ */
+ sdctl = qib_read_kreg64(dd, kr_ibserdesctrl);
+ sdctl = (sdctl & ~(0x1f << 8)) | (NUM_DDS_REGS << 8);
+ sdctl = (sdctl & ~(0x1f << 13)) | (RXEQ_ROWS << 13);
+ qib_write_kreg(dd, kr_ibserdesctrl, sdctl);
+
+ /*
+ * Iterate down table within loop for each register to store.
+ */
+ dds_reg_map = DDS_REG_MAP;
+ for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+ data = ((dds_reg_map & 0xF) << 4) | TX_FAST_ELT;
+ writeq(data, iaddr + idx);
+ mmiowb();
+ qib_read_kreg32(dd, kr_scratch);
+ dds_reg_map >>= 4;
+ for (midx = 0; midx < DDS_ROWS; ++midx) {
+ u64 __iomem *daddr = taddr + ((midx << 4) + idx);
+ data = dds_init_vals[midx].reg_vals[idx];
+ writeq(data, daddr);
+ mmiowb();
+ qib_read_kreg32(dd, kr_scratch);
+ } /* End inner for (vals for this reg, each row) */
+ } /* end outer for (regs to be stored) */
+
+ /*
+ * Init the RXEQ section of the table.
+ * This runs in a different order, as the pattern of
+ * register references is more complex, but there are only
+ * four "data" values per register.
+ */
+ min_idx = idx; /* RXEQ indices pick up where DDS left off */
+ taddr += 0x100; /* RXEQ data is in second half of table */
+ /* Iterate through RXEQ register addresses */
+ for (idx = 0; idx < RXEQ_ROWS; ++idx) {
+ int didx; /* "destination" */
+ int vidx;
+
+ /* didx is offset by min_idx to address RXEQ range of regs */
+ didx = idx + min_idx;
+ /* Store the next RXEQ register address */
+ writeq(rxeq_init_vals[idx].rdesc, iaddr + didx);
+ mmiowb();
+ qib_read_kreg32(dd, kr_scratch);
+ /* Iterate through RXEQ values */
+ for (vidx = 0; vidx < 4; vidx++) {
+ data = rxeq_init_vals[idx].rdata[vidx];
+ writeq(data, taddr + (vidx << 6) + idx);
+ mmiowb();
+ qib_read_kreg32(dd, kr_scratch);
+ }
+ } /* end outer for (Reg-writes for RXEQ) */
+ return 0;
+}
+
+#define CMUCTRL5 EPB_LOC(7, 0, 0x15)
+#define RXHSCTRL0(chan) EPB_LOC(chan, 6, 0)
+#define VCDL_DAC2(chan) EPB_LOC(chan, 6, 5)
+#define VCDL_CTRL0(chan) EPB_LOC(chan, 6, 6)
+#define VCDL_CTRL2(chan) EPB_LOC(chan, 6, 8)
+#define START_EQ2(chan) EPB_LOC(chan, 7, 0x28)
+
+/*
+ * Repeat a "store" across all channels of the IB SerDes.
+ * Although nominally it inherits the "read value" of the last
+ * channel it modified, the only really useful return is <0 for
+ * failure, >= 0 for success. The parameter 'loc' is assumed to
+ * be the location in some channel of the register to be modified
+ * The caller can specify use of the "gang write" option of EPB,
+ * in which case we use the specified channel data for any fields
+ * not explicitely written.
+ */
+static int ibsd_mod_allchnls(struct qib_devdata *dd, int loc, int val,
+ int mask)
+{
+ int ret = -1;
+ int chnl;
+
+ if (loc & EPB_GLOBAL_WR) {
+ /*
+ * Our caller has assured us that we can set all four
+ * channels at once. Trust that. If mask is not 0xFF,
+ * we will read the _specified_ channel for our starting
+ * value.
+ */
+ loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+ chnl = (loc >> (4 + EPB_ADDR_SHF)) & 7;
+ if (mask != 0xFF) {
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES,
+ loc & ~EPB_GLOBAL_WR, 0, 0);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+
+ qib_dev_err(dd, "pre-read failed: elt %d,"
+ " addr 0x%X, chnl %d\n",
+ (sloc & 0xF),
+ (sloc >> 9) & 0x3f, chnl);
+ return ret;
+ }
+ val = (ret & ~mask) | (val & mask);
+ }
+ loc &= ~(7 << (4+EPB_ADDR_SHF));
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+
+ qib_dev_err(dd, "Global WR failed: elt %d,"
+ " addr 0x%X, val %02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, val);
+ }
+ return ret;
+ }
+ /* Clear "channel" and set CS so we can simply iterate */
+ loc &= ~(7 << (4+EPB_ADDR_SHF));
+ loc |= (1U << EPB_IB_QUAD0_CS_SHF);
+ for (chnl = 0; chnl < 4; ++chnl) {
+ int cloc = loc | (chnl << (4+EPB_ADDR_SHF));
+
+ ret = qib_sd7220_reg_mod(dd, IB_7220_SERDES, cloc, val, mask);
+ if (ret < 0) {
+ int sloc = loc >> EPB_ADDR_SHF;
+
+ qib_dev_err(dd, "Write failed: elt %d,"
+ " addr 0x%X, chnl %d, val 0x%02X,"
+ " mask 0x%02X\n",
+ (sloc & 0xF), (sloc >> 9) & 0x3f, chnl,
+ val & 0xFF, mask & 0xFF);
+ break;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Set the Tx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from first row of init table.
+ */
+static int set_dds_vals(struct qib_devdata *dd, struct dds_init *ddi)
+{
+ int ret;
+ int idx, reg, data;
+ uint32_t regmap;
+
+ regmap = DDS_REG_MAP;
+ for (idx = 0; idx < NUM_DDS_REGS; ++idx) {
+ reg = (regmap & 0xF);
+ regmap >>= 4;
+ data = ddi->reg_vals[idx];
+ /* Vendor says RMW not needed for these regs, use 0xFF mask */
+ ret = ibsd_mod_allchnls(dd, EPB_LOC(0, 9, reg), data, 0xFF);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Set the Rx values normally modified by IBC in IB1.2 mode to default
+ * values, as gotten from selected column of init table.
+ */
+static int set_rxeq_vals(struct qib_devdata *dd, int vsel)
+{
+ int ret;
+ int ridx;
+ int cnt = ARRAY_SIZE(rxeq_init_vals);
+
+ for (ridx = 0; ridx < cnt; ++ridx) {
+ int elt, reg, val, loc;
+
+ elt = rxeq_init_vals[ridx].rdesc & 0xF;
+ reg = rxeq_init_vals[ridx].rdesc >> 4;
+ loc = EPB_LOC(0, elt, reg);
+ val = rxeq_init_vals[ridx].rdata[vsel];
+ /* mask of 0xFF, because hardware does full-byte store. */
+ ret = ibsd_mod_allchnls(dd, loc, val, 0xFF);
+ if (ret < 0)
+ break;
+ }
+ return ret;
+}
+
+/*
+ * Set the default values (row 0) for DDR Driver Demphasis.
+ * we do this initially and whenever we turn off IB-1.2
+ *
+ * The "default" values for Rx equalization are also stored to
+ * SerDes registers. Formerly (and still default), we used set 2.
+ * For experimenting with cables and link-partners, we allow changing
+ * that via a module parameter.
+ */
+static unsigned qib_rxeq_set = 2;
+module_param_named(rxeq_default_set, qib_rxeq_set, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(rxeq_default_set,
+ "Which set [0..3] of Rx Equalization values is default");
+
+static int qib_internal_presets(struct qib_devdata *dd)
+{
+ int ret = 0;
+
+ ret = set_dds_vals(dd, dds_init_vals + DDS_3M);
+
+ if (ret < 0)
+ qib_dev_err(dd, "Failed to set default DDS values\n");
+ ret = set_rxeq_vals(dd, qib_rxeq_set & 3);
+ if (ret < 0)
+ qib_dev_err(dd, "Failed to set default RXEQ values\n");
+ return ret;
+}
+
+int qib_sd7220_presets(struct qib_devdata *dd)
+{
+ int ret = 0;
+
+ if (!dd->cspec->presets_needed)
+ return ret;
+ dd->cspec->presets_needed = 0;
+ /* Assert uC reset, so we don't clash with it. */
+ qib_ibsd_reset(dd, 1);
+ udelay(2);
+ qib_sd_trimdone_monitor(dd, "link-down");
+
+ ret = qib_internal_presets(dd);
+ return ret;
+}
+
+static int qib_sd_trimself(struct qib_devdata *dd, int val)
+{
+ int loc = CMUCTRL5 | (1U << EPB_IB_QUAD0_CS_SHF);
+
+ return qib_sd7220_reg_mod(dd, IB_7220_SERDES, loc, val, 0xFF);
+}
+
+static int qib_sd_early(struct qib_devdata *dd)
+{
+ int ret;
+
+ ret = ibsd_mod_allchnls(dd, RXHSCTRL0(0) | EPB_GLOBAL_WR, 0xD4, 0xFF);
+ if (ret < 0)
+ goto bail;
+ ret = ibsd_mod_allchnls(dd, START_EQ1(0) | EPB_GLOBAL_WR, 0x10, 0xFF);
+ if (ret < 0)
+ goto bail;
+ ret = ibsd_mod_allchnls(dd, START_EQ2(0) | EPB_GLOBAL_WR, 0x30, 0xFF);
+bail:
+ return ret;
+}
+
+#define BACTRL(chnl) EPB_LOC(chnl, 6, 0x0E)
+#define LDOUTCTRL1(chnl) EPB_LOC(chnl, 7, 6)
+#define RXHSSTATUS(chnl) EPB_LOC(chnl, 6, 0xF)
+
+static int qib_sd_dactrim(struct qib_devdata *dd)
+{
+ int ret;
+
+ ret = ibsd_mod_allchnls(dd, VCDL_DAC2(0) | EPB_GLOBAL_WR, 0x2D, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ /* more fine-tuning of what will be default */
+ ret = ibsd_mod_allchnls(dd, VCDL_CTRL2(0), 3, 0xF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, BACTRL(0) | EPB_GLOBAL_WR, 0x40, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x04, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ ret = ibsd_mod_allchnls(dd, RXHSSTATUS(0) | EPB_GLOBAL_WR, 0x04, 0xFF);
+ if (ret < 0)
+ goto bail;
+
+ /*
+ * Delay for max possible number of steps, with slop.
+ * Each step is about 4usec.
+ */
+ udelay(415);
+
+ ret = ibsd_mod_allchnls(dd, LDOUTCTRL1(0) | EPB_GLOBAL_WR, 0x00, 0xFF);
+
+bail:
+ return ret;
+}
+
+#define RELOCK_FIRST_MS 3
+#define RXLSPPM(chan) EPB_LOC(chan, 0, 2)
+void toggle_7220_rclkrls(struct qib_devdata *dd)
+{
+ int loc = RXLSPPM(0) | EPB_GLOBAL_WR;
+ int ret;
+
+ ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+ if (ret < 0)
+ qib_dev_err(dd, "RCLKRLS failed to clear D7\n");
+ else {
+ udelay(1);
+ ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+ }
+ /* And again for good measure */
+ udelay(1);
+ ret = ibsd_mod_allchnls(dd, loc, 0, 0x80);
+ if (ret < 0)
+ qib_dev_err(dd, "RCLKRLS failed to clear D7\n");
+ else {
+ udelay(1);
+ ibsd_mod_allchnls(dd, loc, 0x80, 0x80);
+ }
+ /* Now reset xgxs and IBC to complete the recovery */
+ dd->f_xgxs_reset(dd->pport);
+}
+
+/*
+ * Shut down the timer that polls for relock occasions, if needed
+ * this is "hooked" from qib_7220_quiet_serdes(), which is called
+ * just before qib_shutdown_device() in qib_driver.c shuts down all
+ * the other timers
+ */
+void shutdown_7220_relock_poll(struct qib_devdata *dd)
+{
+ if (dd->cspec->relock_timer_active)
+ del_timer_sync(&dd->cspec->relock_timer);
+}
+
+static unsigned qib_relock_by_timer = 1;
+module_param_named(relock_by_timer, qib_relock_by_timer, uint,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(relock_by_timer, "Allow relock attempt if link not up");
+
+static void qib_run_relock(unsigned long opaque)
+{
+ struct qib_devdata *dd = (struct qib_devdata *)opaque;
+ struct qib_pportdata *ppd = dd->pport;
+ struct qib_chip_specific *cs = dd->cspec;
+ int timeoff;
+
+ /*
+ * Check link-training state for "stuck" state, when down.
+ * if found, try relock and schedule another try at
+ * exponentially growing delay, maxed at one second.
+ * if not stuck, our work is done.
+ */
+ if ((dd->flags & QIB_INITTED) && !(ppd->lflags &
+ (QIBL_IB_AUTONEG_INPROG | QIBL_LINKINIT | QIBL_LINKARMED |
+ QIBL_LINKACTIVE))) {
+ if (qib_relock_by_timer) {
+ if (!(ppd->lflags & QIBL_IB_LINK_DISABLED))
+ toggle_7220_rclkrls(dd);
+ }
+ /* re-set timer for next check */
+ timeoff = cs->relock_interval << 1;
+ if (timeoff > HZ)
+ timeoff = HZ;
+ cs->relock_interval = timeoff;
+ } else
+ timeoff = HZ;
+ mod_timer(&cs->relock_timer, jiffies + timeoff);
+}
+
+void set_7220_relock_poll(struct qib_devdata *dd, int ibup)
+{
+ struct qib_chip_specific *cs = dd->cspec;
+
+ if (ibup) {
+ /* We are now up, relax timer to 1 second interval */
+ if (cs->relock_timer_active) {
+ cs->relock_interval = HZ;
+ mod_timer(&cs->relock_timer, jiffies + HZ);
+ }
+ } else {
+ /* Transition to down, (re-)set timer to short interval. */
+ unsigned int timeout;
+
+ timeout = msecs_to_jiffies(RELOCK_FIRST_MS);
+ if (timeout == 0)
+ timeout = 1;
+ /* If timer has not yet been started, do so. */
+ if (!cs->relock_timer_active) {
+ cs->relock_timer_active = 1;
+ init_timer(&cs->relock_timer);
+ cs->relock_timer.function = qib_run_relock;
+ cs->relock_timer.data = (unsigned long) dd;
+ cs->relock_interval = timeout;
+ cs->relock_timer.expires = jiffies + timeout;
+ add_timer(&cs->relock_timer);
+ } else {
+ cs->relock_interval = timeout;
+ mod_timer(&cs->relock_timer, jiffies + timeout);
+ }
+ }
+}
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox