* [PATCH for-3.11 1/7] IB/iser: Use proper debug level value for info prints
[not found] ` <1374153931-7313-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
@ 2013-07-18 13:25 ` Or Gerlitz
2013-07-18 13:25 ` [PATCH for-3.11 2/7] IB/iser: Restructure allocation/deallocation of connection resources Or Gerlitz
` (5 subsequent siblings)
6 siblings, 0 replies; 20+ messages in thread
From: Or Gerlitz @ 2013-07-18 13:25 UTC (permalink / raw)
To: roland-DgEjT+Ai2ygdnm+yROfE0A
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, sagig-VPRAkNaXOzVWk0Htik3J/w,
roid-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
Or Gerlitz
Commit 4f363882612 "IB/iser: Move informational messages from error to info level"
was setting info prints to require lower value for the debug level vs warning prints
which isn't the common convention, fix that. Also move the prints on unaligned SG
from warning to debug level.
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/ulp/iser/iscsi_iser.h | 4 ++--
drivers/infiniband/ulp/iser/iser_memory.c | 13 ++++++-------
2 files changed, 8 insertions(+), 9 deletions(-)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 4f069c0..d694bcd 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -78,14 +78,14 @@
#define iser_warn(fmt, arg...) \
do { \
- if (iser_debug_level > 1) \
+ if (iser_debug_level > 0) \
pr_warn(PFX "%s:" fmt, \
__func__ , ## arg); \
} while (0)
#define iser_info(fmt, arg...) \
do { \
- if (iser_debug_level > 0) \
+ if (iser_debug_level > 1) \
pr_info(PFX "%s:" fmt, \
__func__ , ## arg); \
} while (0)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 7827baf..797e49f 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -267,11 +267,8 @@ static void iser_data_buf_dump(struct iser_data_buf *data,
struct scatterlist *sg;
int i;
- if (iser_debug_level == 0)
- return;
-
for_each_sg(sgl, sg, data->dma_nents, i)
- iser_warn("sg[%d] dma_addr:0x%lX page:0x%p "
+ iser_dbg("sg[%d] dma_addr:0x%lX page:0x%p "
"off:0x%x sz:0x%x dma_len:0x%x\n",
i, (unsigned long)ib_sg_dma_address(ibdev, sg),
sg_page(sg), sg->offset,
@@ -373,9 +370,11 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
if (aligned_len != mem->dma_nents ||
(!ib_conn->fmr_pool && mem->dma_nents > 1)) {
iscsi_conn->fmr_unalign_cnt++;
- iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
- aligned_len, mem->size);
- iser_data_buf_dump(mem, ibdev);
+ iser_dbg("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
+ aligned_len, mem->size);
+
+ if (iser_debug_level > 0)
+ iser_data_buf_dump(mem, ibdev);
/* unmap the command data before accessing it */
iser_dma_unmap_task_data(iser_task);
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH for-3.11 2/7] IB/iser: Restructure allocation/deallocation of connection resources
[not found] ` <1374153931-7313-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-07-18 13:25 ` [PATCH for-3.11 1/7] IB/iser: Use proper debug level value for info prints Or Gerlitz
@ 2013-07-18 13:25 ` Or Gerlitz
2013-07-18 13:25 ` [PATCH for-3.11 3/7] IB/iser: Accept session->cmds_max from user space Or Gerlitz
` (4 subsequent siblings)
6 siblings, 0 replies; 20+ messages in thread
From: Or Gerlitz @ 2013-07-18 13:25 UTC (permalink / raw)
To: roland-DgEjT+Ai2ygdnm+yROfE0A
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, sagig-VPRAkNaXOzVWk0Htik3J/w,
roid-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
Shlomo Pongratz, Or Gerlitz
From: Shlomo Pongratz <shlomop-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
This is a preparation step to a patch that accepts the number of max SCSI
commands to be supported for the session from the user space iSCSI tools.
Move the allocation of the login buffer, FMR pool and its associated
page vector from iser_create_ib_conn_res() which is called prior to
a point in time where we actually know how many commands should be
supported to iser_alloc_rx_descriptors() which is called during the
iscsi connection bind step where this quantity is known.
Also do small refactoring around the deallocation to make that path
similar to the allocation one.
Signed-off-by: Shlomo Pongratz <shlomop-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/ulp/iser/iscsi_iser.h | 2 +
drivers/infiniband/ulp/iser/iser_initiator.c | 92 ++++++++++++++++++-
drivers/infiniband/ulp/iser/iser_verbs.c | 128 ++++++++++++--------------
3 files changed, 151 insertions(+), 71 deletions(-)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index d694bcd..fee8829 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -395,4 +395,6 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
int iser_alloc_rx_descriptors(struct iser_conn *ib_conn);
+int iser_create_fmr_pool(struct iser_conn *ib_conn);
+void iser_free_fmr_pool(struct iser_conn *ib_conn);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index b6d81a8..626d950 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -170,6 +170,76 @@ static void iser_create_send_desc(struct iser_conn *ib_conn,
}
}
+static void iser_free_login_buf(struct iser_conn *ib_conn)
+{
+ if (!ib_conn->login_buf)
+ return;
+
+ if (ib_conn->login_req_dma)
+ ib_dma_unmap_single(ib_conn->device->ib_device,
+ ib_conn->login_req_dma,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+ if (ib_conn->login_resp_dma)
+ ib_dma_unmap_single(ib_conn->device->ib_device,
+ ib_conn->login_resp_dma,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+ kfree(ib_conn->login_buf);
+
+ /* make sure we never redo any unmapping */
+ ib_conn->login_req_dma = 0;
+ ib_conn->login_resp_dma = 0;
+ ib_conn->login_buf = NULL;
+}
+
+static int iser_alloc_login_buf(struct iser_conn *ib_conn)
+{
+ struct iser_device *device;
+ int req_err, resp_err;
+
+ BUG_ON(ib_conn->device == NULL);
+
+ device = ib_conn->device;
+
+ ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
+ ISER_RX_LOGIN_SIZE, GFP_KERNEL);
+ if (!ib_conn->login_buf)
+ goto out_err;
+
+ ib_conn->login_req_buf = ib_conn->login_buf;
+ ib_conn->login_resp_buf = ib_conn->login_buf +
+ ISCSI_DEF_MAX_RECV_SEG_LEN;
+
+ ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_req_buf,
+ ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
+
+ ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
+ (void *)ib_conn->login_resp_buf,
+ ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
+
+ req_err = ib_dma_mapping_error(device->ib_device,
+ ib_conn->login_req_dma);
+ resp_err = ib_dma_mapping_error(device->ib_device,
+ ib_conn->login_resp_dma);
+
+ if (req_err || resp_err) {
+ if (req_err)
+ ib_conn->login_req_dma = 0;
+ if (resp_err)
+ ib_conn->login_resp_dma = 0;
+ goto free_login_buf;
+ }
+ return 0;
+
+free_login_buf:
+ iser_free_login_buf(ib_conn);
+
+out_err:
+ iser_err("unable to alloc or map login buf\n");
+ return -ENOMEM;
+}
int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
{
@@ -179,6 +249,12 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
struct ib_sge *rx_sg;
struct iser_device *device = ib_conn->device;
+ if (iser_create_fmr_pool(ib_conn))
+ goto create_fmr_pool_failed;
+
+ if (iser_alloc_login_buf(ib_conn))
+ goto alloc_login_buf_fail;
+
ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
sizeof(struct iser_rx_desc), GFP_KERNEL);
if (!ib_conn->rx_descs)
@@ -207,10 +283,14 @@ rx_desc_dma_map_failed:
rx_desc = ib_conn->rx_descs;
for (j = 0; j < i; j++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
- ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
ib_conn->rx_descs = NULL;
rx_desc_alloc_fail:
+ iser_free_login_buf(ib_conn);
+alloc_login_buf_fail:
+ iser_free_fmr_pool(ib_conn);
+create_fmr_pool_failed:
iser_err("failed allocating rx descriptors / data buffers\n");
return -ENOMEM;
}
@@ -222,13 +302,19 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
struct iser_device *device = ib_conn->device;
if (!ib_conn->rx_descs)
- return;
+ goto free_login_buf;
rx_desc = ib_conn->rx_descs;
for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
- ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
+ ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
+ /* make sure we never redo any unmapping */
+ ib_conn->rx_descs = NULL;
+
+free_login_buf:
+ iser_free_login_buf(ib_conn);
+ iser_free_fmr_pool(ib_conn);
}
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 2c4941d..b72e349 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -178,56 +178,23 @@ static void iser_free_device_ib_res(struct iser_device *device)
}
/**
- * iser_create_ib_conn_res - Creates FMR pool and Queue-Pair (QP)
+ * iser_create_fmr_pool - Creates FMR pool and page_vector
*
- * returns 0 on success, -1 on failure
+ * returns 0 on success, or errno code on failure
*/
-static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+int iser_create_fmr_pool(struct iser_conn *ib_conn)
{
- struct iser_device *device;
- struct ib_qp_init_attr init_attr;
- int req_err, resp_err, ret = -ENOMEM;
+ struct iser_device *device = ib_conn->device;
struct ib_fmr_pool_param params;
- int index, min_index = 0;
-
- BUG_ON(ib_conn->device == NULL);
-
- device = ib_conn->device;
-
- ib_conn->login_buf = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
- ISER_RX_LOGIN_SIZE, GFP_KERNEL);
- if (!ib_conn->login_buf)
- goto out_err;
-
- ib_conn->login_req_buf = ib_conn->login_buf;
- ib_conn->login_resp_buf = ib_conn->login_buf + ISCSI_DEF_MAX_RECV_SEG_LEN;
-
- ib_conn->login_req_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_req_buf,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
-
- ib_conn->login_resp_dma = ib_dma_map_single(ib_conn->device->ib_device,
- (void *)ib_conn->login_resp_buf,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
-
- req_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_req_dma);
- resp_err = ib_dma_mapping_error(device->ib_device, ib_conn->login_resp_dma);
-
- if (req_err || resp_err) {
- if (req_err)
- ib_conn->login_req_dma = 0;
- if (resp_err)
- ib_conn->login_resp_dma = 0;
- goto out_err;
- }
+ int ret = -ENOMEM;
ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
- (sizeof(u64) * (ISCSI_ISER_SG_TABLESIZE +1)),
+ (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE+1)),
GFP_KERNEL);
if (!ib_conn->page_vec)
- goto out_err;
+ return ret;
- ib_conn->page_vec->pages = (u64 *) (ib_conn->page_vec + 1);
+ ib_conn->page_vec->pages = (u64 *)(ib_conn->page_vec + 1);
params.page_shift = SHIFT_4K;
/* when the first/last SG element are not start/end *
@@ -244,15 +211,56 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
IB_ACCESS_REMOTE_READ);
ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms);
+ if (!IS_ERR(ib_conn->fmr_pool))
+ return 0;
+
+ /* no FMR => no need for page_vec */
+ kfree(ib_conn->page_vec);
+ ib_conn->page_vec = NULL;
+
ret = PTR_ERR(ib_conn->fmr_pool);
- if (IS_ERR(ib_conn->fmr_pool) && ret != -ENOSYS) {
- ib_conn->fmr_pool = NULL;
- goto out_err;
- } else if (ret == -ENOSYS) {
- ib_conn->fmr_pool = NULL;
+ ib_conn->fmr_pool = NULL;
+ if (ret != -ENOSYS) {
+ iser_err("FMR allocation failed, err %d\n", ret);
+ return ret;
+ } else {
iser_warn("FMRs are not supported, using unaligned mode\n");
- ret = 0;
+ return 0;
}
+}
+
+/**
+ * iser_free_fmr_pool - releases the FMR pool and page vec
+ */
+void iser_free_fmr_pool(struct iser_conn *ib_conn)
+{
+ iser_info("freeing conn %p fmr pool %p\n",
+ ib_conn, ib_conn->fmr_pool);
+
+ if (ib_conn->fmr_pool != NULL)
+ ib_destroy_fmr_pool(ib_conn->fmr_pool);
+
+ ib_conn->fmr_pool = NULL;
+
+ kfree(ib_conn->page_vec);
+ ib_conn->page_vec = NULL;
+}
+
+/**
+ * iser_create_ib_conn_res - Queue-Pair (QP)
+ *
+ * returns 0 on success, -1 on failure
+ */
+static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
+{
+ struct iser_device *device;
+ struct ib_qp_init_attr init_attr;
+ int ret = -ENOMEM;
+ int index, min_index = 0;
+
+ BUG_ON(ib_conn->device == NULL);
+
+ device = ib_conn->device;
memset(&init_attr, 0, sizeof init_attr);
@@ -282,9 +290,9 @@ static int iser_create_ib_conn_res(struct iser_conn *ib_conn)
goto out_err;
ib_conn->qp = ib_conn->cma_id->qp;
- iser_info("setting conn %p cma_id %p: fmr_pool %p qp %p\n",
+ iser_info("setting conn %p cma_id %p qp %p\n",
ib_conn, ib_conn->cma_id,
- ib_conn->fmr_pool, ib_conn->cma_id->qp);
+ ib_conn->cma_id->qp);
return ret;
out_err:
@@ -293,7 +301,7 @@ out_err:
}
/**
- * releases the FMR pool and QP objects, returns 0 on success,
+ * releases the QP objects, returns 0 on success,
* -1 on failure
*/
static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
@@ -301,13 +309,11 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
int cq_index;
BUG_ON(ib_conn == NULL);
- iser_info("freeing conn %p cma_id %p fmr pool %p qp %p\n",
+ iser_info("freeing conn %p cma_id %p qp %p\n",
ib_conn, ib_conn->cma_id,
- ib_conn->fmr_pool, ib_conn->qp);
+ ib_conn->qp);
/* qp is created only once both addr & route are resolved */
- if (ib_conn->fmr_pool != NULL)
- ib_destroy_fmr_pool(ib_conn->fmr_pool);
if (ib_conn->qp != NULL) {
cq_index = ((struct iser_cq_desc *)ib_conn->qp->recv_cq->cq_context)->cq_index;
@@ -316,21 +322,7 @@ static int iser_free_ib_conn_res(struct iser_conn *ib_conn)
rdma_destroy_qp(ib_conn->cma_id);
}
- ib_conn->fmr_pool = NULL;
ib_conn->qp = NULL;
- kfree(ib_conn->page_vec);
-
- if (ib_conn->login_buf) {
- if (ib_conn->login_req_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_req_dma,
- ISCSI_DEF_MAX_RECV_SEG_LEN, DMA_TO_DEVICE);
- if (ib_conn->login_resp_dma)
- ib_dma_unmap_single(ib_conn->device->ib_device,
- ib_conn->login_resp_dma,
- ISER_RX_LOGIN_SIZE, DMA_FROM_DEVICE);
- kfree(ib_conn->login_buf);
- }
return 0;
}
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH for-3.11 3/7] IB/iser: Accept session->cmds_max from user space
[not found] ` <1374153931-7313-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
2013-07-18 13:25 ` [PATCH for-3.11 1/7] IB/iser: Use proper debug level value for info prints Or Gerlitz
2013-07-18 13:25 ` [PATCH for-3.11 2/7] IB/iser: Restructure allocation/deallocation of connection resources Or Gerlitz
@ 2013-07-18 13:25 ` Or Gerlitz
2013-07-18 13:25 ` [PATCH for-3.11 4/7] IB/iser: Generalize rdma memory registration Or Gerlitz
` (3 subsequent siblings)
6 siblings, 0 replies; 20+ messages in thread
From: Or Gerlitz @ 2013-07-18 13:25 UTC (permalink / raw)
To: roland-DgEjT+Ai2ygdnm+yROfE0A
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, sagig-VPRAkNaXOzVWk0Htik3J/w,
roid-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
Shlomo Pongratz, Or Gerlitz
From: Shlomo Pongratz <shlomop-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Use cmds_max passed from user space to be the number of PDUs to be
supported for the session instead of hard-coded ISCSI_DEF_XMIT_CMDS_MAX.
Specifically, this allows to control the max number of SCSI commands
for the seesion. Also don't ignore the qdepth passed from user space.
Derive from session->cmds_max the actual number of RX buffers
and FMR pool size to allocate during the connection bind phase.
Since the iser transport connection is established before the iscsi
session/connection are created and bounded, we still use one hard coded
quantity ISER_DEF_XMIT_CMDS_MAX to compute the maximum number of
work-requests to be supported by the RC QP used for the connection.
The above quantity is made to be a power of two between ISCSI_TOTAL_CMDS_MIN
(16) and ISER_DEF_XMIT_CMDS_MAX (512) inclusive.
Signed-off-by: Shlomo Pongratz <shlomop-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/ulp/iser/iscsi_iser.c | 19 ++++++++++++-------
drivers/infiniband/ulp/iser/iscsi_iser.h | 21 +++++++++++++++------
drivers/infiniband/ulp/iser/iser_initiator.c | 25 +++++++++++++++----------
drivers/infiniband/ulp/iser/iser_verbs.c | 8 ++++----
4 files changed, 46 insertions(+), 27 deletions(-)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 2e84ef8..705de7b 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -347,6 +347,7 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
{
struct iscsi_conn *conn = cls_conn->dd_data;
struct iscsi_iser_conn *iser_conn;
+ struct iscsi_session *session;
struct iser_conn *ib_conn;
struct iscsi_endpoint *ep;
int error;
@@ -365,7 +366,8 @@ iscsi_iser_conn_bind(struct iscsi_cls_session *cls_session,
}
ib_conn = ep->dd_data;
- if (iser_alloc_rx_descriptors(ib_conn))
+ session = conn->session;
+ if (iser_alloc_rx_descriptors(ib_conn, session))
return -ENOMEM;
/* binds the iSER connection retrieved from the previously
@@ -419,12 +421,13 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
struct iscsi_cls_session *cls_session;
struct iscsi_session *session;
struct Scsi_Host *shost;
- struct iser_conn *ib_conn;
+ struct iser_conn *ib_conn = NULL;
shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
if (!shost)
return NULL;
shost->transportt = iscsi_iser_scsi_transport;
+ shost->cmd_per_lun = qdepth;
shost->max_lun = iscsi_max_lun;
shost->max_id = 0;
shost->max_channel = 0;
@@ -441,12 +444,14 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep,
ep ? ib_conn->device->ib_device->dma_device : NULL))
goto free_host;
- /*
- * we do not support setting can_queue cmd_per_lun from userspace yet
- * because we preallocate so many resources
- */
+ if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+ iser_info("cmds_max changed from %u to %u\n",
+ cmds_max, ISER_DEF_XMIT_CMDS_MAX);
+ cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+ }
+
cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,
- ISCSI_DEF_XMIT_CMDS_MAX, 0,
+ cmds_max, 0,
sizeof(struct iscsi_iser_task),
initial_cmdsn, 0);
if (!cls_session)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index fee8829..d2fc55a 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -102,7 +102,13 @@
/* support up to 512KB in one RDMA */
#define ISCSI_ISER_SG_TABLESIZE (0x80000 >> SHIFT_4K)
-#define ISER_DEF_CMD_PER_LUN ISCSI_DEF_XMIT_CMDS_MAX
+#define ISER_DEF_XMIT_CMDS_DEFAULT 512
+#if ISCSI_DEF_XMIT_CMDS_MAX > ISER_DEF_XMIT_CMDS_DEFAULT
+ #define ISER_DEF_XMIT_CMDS_MAX ISCSI_DEF_XMIT_CMDS_MAX
+#else
+ #define ISER_DEF_XMIT_CMDS_MAX ISER_DEF_XMIT_CMDS_DEFAULT
+#endif
+#define ISER_DEF_CMD_PER_LUN ISER_DEF_XMIT_CMDS_MAX
/* QP settings */
/* Maximal bounds on received asynchronous PDUs */
@@ -111,9 +117,9 @@
#define ISER_MAX_TX_MISC_PDUS 6 /* NOOP_OUT(2), TEXT(1), *
* SCSI_TMFUNC(2), LOGOUT(1) */
-#define ISER_QP_MAX_RECV_DTOS (ISCSI_DEF_XMIT_CMDS_MAX)
+#define ISER_QP_MAX_RECV_DTOS (ISER_DEF_XMIT_CMDS_MAX)
-#define ISER_MIN_POSTED_RX (ISCSI_DEF_XMIT_CMDS_MAX >> 2)
+#define ISER_MIN_POSTED_RX (ISER_DEF_XMIT_CMDS_MAX >> 2)
/* the max TX (send) WR supported by the iSER QP is defined by *
* max_send_wr = T * (1 + D) + C ; D is how many inflight dataouts we expect *
@@ -123,7 +129,7 @@
#define ISER_INFLIGHT_DATAOUTS 8
-#define ISER_QP_MAX_REQ_DTOS (ISCSI_DEF_XMIT_CMDS_MAX * \
+#define ISER_QP_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \
(1 + ISER_INFLIGHT_DATAOUTS) + \
ISER_MAX_TX_MISC_PDUS + \
ISER_MAX_RX_MISC_PDUS)
@@ -272,6 +278,9 @@ struct iser_conn {
struct ib_qp *qp; /* QP */
struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
wait_queue_head_t wait; /* waitq for conn/disconn */
+ unsigned qp_max_recv_dtos; /* num of rx buffers */
+ unsigned qp_max_recv_dtos_mask; /* above minus 1 */
+ unsigned min_posted_rx; /* qp_max_recv_dtos >> 2 */
int post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
char name[ISER_OBJECT_NAME_SIZE];
@@ -394,7 +403,7 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task,
void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task);
int iser_initialize_task_headers(struct iscsi_task *task,
struct iser_tx_desc *tx_desc);
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn);
-int iser_create_fmr_pool(struct iser_conn *ib_conn);
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
void iser_free_fmr_pool(struct iser_conn *ib_conn);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 626d950..5c2b142 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -241,7 +241,7 @@ out_err:
return -ENOMEM;
}
-int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
+int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session)
{
int i, j;
u64 dma_addr;
@@ -249,20 +249,24 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn)
struct ib_sge *rx_sg;
struct iser_device *device = ib_conn->device;
- if (iser_create_fmr_pool(ib_conn))
+ ib_conn->qp_max_recv_dtos = session->cmds_max;
+ ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
+ ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
+
+ if (iser_create_fmr_pool(ib_conn, session->scsi_cmds_max))
goto create_fmr_pool_failed;
if (iser_alloc_login_buf(ib_conn))
goto alloc_login_buf_fail;
- ib_conn->rx_descs = kmalloc(ISER_QP_MAX_RECV_DTOS *
+ ib_conn->rx_descs = kmalloc(session->cmds_max *
sizeof(struct iser_rx_desc), GFP_KERNEL);
if (!ib_conn->rx_descs)
goto rx_desc_alloc_fail;
rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++) {
+ for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++) {
dma_addr = ib_dma_map_single(device->ib_device, (void *)rx_desc,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(device->ib_device, dma_addr))
@@ -305,7 +309,7 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
goto free_login_buf;
rx_desc = ib_conn->rx_descs;
- for (i = 0; i < ISER_QP_MAX_RECV_DTOS; i++, rx_desc++)
+ for (i = 0; i < ib_conn->qp_max_recv_dtos; i++, rx_desc++)
ib_dma_unmap_single(device->ib_device, rx_desc->dma_addr,
ISER_RX_PAYLOAD_SIZE, DMA_FROM_DEVICE);
kfree(ib_conn->rx_descs);
@@ -334,9 +338,10 @@ static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
WARN_ON(iser_conn->ib_conn->post_recv_buf_count != 1);
WARN_ON(atomic_read(&iser_conn->ib_conn->post_send_buf_count) != 0);
- iser_dbg("Initially post: %d\n", ISER_MIN_POSTED_RX);
+ iser_dbg("Initially post: %d\n", iser_conn->ib_conn->min_posted_rx);
/* Initial post receive buffers */
- if (iser_post_recvm(iser_conn->ib_conn, ISER_MIN_POSTED_RX))
+ if (iser_post_recvm(iser_conn->ib_conn,
+ iser_conn->ib_conn->min_posted_rx))
return -ENOMEM;
return 0;
@@ -573,9 +578,9 @@ void iser_rcv_completion(struct iser_rx_desc *rx_desc,
return;
outstanding = ib_conn->post_recv_buf_count;
- if (outstanding + ISER_MIN_POSTED_RX <= ISER_QP_MAX_RECV_DTOS) {
- count = min(ISER_QP_MAX_RECV_DTOS - outstanding,
- ISER_MIN_POSTED_RX);
+ if (outstanding + ib_conn->min_posted_rx <= ib_conn->qp_max_recv_dtos) {
+ count = min(ib_conn->qp_max_recv_dtos - outstanding,
+ ib_conn->min_posted_rx);
err = iser_post_recvm(ib_conn, count);
if (err)
iser_err("posting %d rx bufs err %d\n", count, err);
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index b72e349..5e49a36 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -182,7 +182,7 @@ static void iser_free_device_ib_res(struct iser_device *device)
*
* returns 0 on success, or errno code on failure
*/
-int iser_create_fmr_pool(struct iser_conn *ib_conn)
+int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
{
struct iser_device *device = ib_conn->device;
struct ib_fmr_pool_param params;
@@ -202,8 +202,8 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn)
params.max_pages_per_fmr = ISCSI_ISER_SG_TABLESIZE + 1;
/* make the pool size twice the max number of SCSI commands *
* the ML is expected to queue, watermark for unmap at 50% */
- params.pool_size = ISCSI_DEF_XMIT_CMDS_MAX * 2;
- params.dirty_watermark = ISCSI_DEF_XMIT_CMDS_MAX;
+ params.pool_size = cmds_max * 2;
+ params.dirty_watermark = cmds_max;
params.cache = 0;
params.flush_function = NULL;
params.access = (IB_ACCESS_LOCAL_WRITE |
@@ -771,7 +771,7 @@ int iser_post_recvm(struct iser_conn *ib_conn, int count)
rx_wr->sg_list = &rx_desc->rx_sg;
rx_wr->num_sge = 1;
rx_wr->next = rx_wr + 1;
- my_rx_head = (my_rx_head + 1) & (ISER_QP_MAX_RECV_DTOS - 1);
+ my_rx_head = (my_rx_head + 1) & ib_conn->qp_max_recv_dtos_mask;
}
rx_wr--;
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH for-3.11 4/7] IB/iser: Generalize rdma memory registration
[not found] ` <1374153931-7313-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
` (2 preceding siblings ...)
2013-07-18 13:25 ` [PATCH for-3.11 3/7] IB/iser: Accept session->cmds_max from user space Or Gerlitz
@ 2013-07-18 13:25 ` Or Gerlitz
2013-07-18 13:25 ` [PATCH for-3.11 5/7] IB/iser: Handle unaligned SG in separate function Or Gerlitz
` (2 subsequent siblings)
6 siblings, 0 replies; 20+ messages in thread
From: Or Gerlitz @ 2013-07-18 13:25 UTC (permalink / raw)
To: roland-DgEjT+Ai2ygdnm+yROfE0A
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, sagig-VPRAkNaXOzVWk0Htik3J/w,
roid-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
Or Gerlitz
From: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Currently the driver uses FMRs as the only means to register the memory
pointed by SG provided by the SCSI mid-layer with the RDMA device.
As perperation step for adding more methods for fast path memory
registrarion, make the alloc/free and reg/unreg calls be function
pointers which are now just set to the existing FMR ones.
Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/ulp/iser/iscsi_iser.h | 13 +++++++++++-
drivers/infiniband/ulp/iser/iser_initiator.c | 28 +++++++++++--------------
drivers/infiniband/ulp/iser/iser_verbs.c | 13 +++++++++++-
3 files changed, 36 insertions(+), 18 deletions(-)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index d2fc55a..78117be 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -252,6 +252,9 @@ struct iser_rx_desc {
#define ISER_MAX_CQ 4
+struct iser_conn;
+struct iscsi_iser_task;
+
struct iser_device {
struct ib_device *ib_device;
struct ib_pd *pd;
@@ -265,6 +268,13 @@ struct iser_device {
int cq_active_qps[ISER_MAX_CQ];
int cqs_used;
struct iser_cq_desc *cq_desc;
+ int (*iser_alloc_rdma_reg_res)(struct iser_conn *ib_conn,
+ unsigned cmds_max);
+ void (*iser_free_rdma_reg_res)(struct iser_conn *ib_conn);
+ int (*iser_reg_rdma_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
+ void (*iser_unreg_rdma_mem)(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
};
struct iser_conn {
@@ -389,7 +399,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg);
-void iser_unreg_mem(struct iser_mem_reg *mem_reg);
+void iser_unreg_mem(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
int iser_post_recvl(struct iser_conn *ib_conn);
int iser_post_recvm(struct iser_conn *ib_conn, int count);
diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 5c2b142..72ea86c 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c
@@ -49,6 +49,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -69,7 +70,7 @@ static int iser_prepare_read_cmd(struct iscsi_task *task,
return -EINVAL;
}
- err = iser_reg_rdma_mem(iser_task,ISER_DIR_IN);
+ err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_IN);
if (err) {
iser_err("Failed to set up Data-IN RDMA\n");
return err;
@@ -98,6 +99,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
unsigned int edtl)
{
struct iscsi_iser_task *iser_task = task->dd_data;
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
struct iser_regd_buf *regd_buf;
int err;
struct iser_hdr *hdr = &iser_task->desc.iser_header;
@@ -119,7 +121,7 @@ iser_prepare_write_cmd(struct iscsi_task *task,
return -EINVAL;
}
- err = iser_reg_rdma_mem(iser_task,ISER_DIR_OUT);
+ err = device->iser_reg_rdma_mem(iser_task, ISER_DIR_OUT);
if (err != 0) {
iser_err("Failed to register write cmd RDMA mem\n");
return err;
@@ -253,7 +255,7 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *s
ib_conn->qp_max_recv_dtos_mask = session->cmds_max - 1; /* cmds_max is 2^N */
ib_conn->min_posted_rx = ib_conn->qp_max_recv_dtos >> 2;
- if (iser_create_fmr_pool(ib_conn, session->scsi_cmds_max))
+ if (device->iser_alloc_rdma_reg_res(ib_conn, session->scsi_cmds_max))
goto create_fmr_pool_failed;
if (iser_alloc_login_buf(ib_conn))
@@ -293,7 +295,7 @@ rx_desc_dma_map_failed:
rx_desc_alloc_fail:
iser_free_login_buf(ib_conn);
alloc_login_buf_fail:
- iser_free_fmr_pool(ib_conn);
+ device->iser_free_rdma_reg_res(ib_conn);
create_fmr_pool_failed:
iser_err("failed allocating rx descriptors / data buffers\n");
return -ENOMEM;
@@ -318,7 +320,7 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn)
free_login_buf:
iser_free_login_buf(ib_conn);
- iser_free_fmr_pool(ib_conn);
+ device->iser_free_rdma_reg_res(ib_conn);
}
static int iser_post_rx_bufs(struct iscsi_conn *conn, struct iscsi_hdr *req)
@@ -629,8 +631,8 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task)
void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
{
+ struct iser_device *device = iser_task->iser_conn->ib_conn->device;
int is_rdma_aligned = 1;
- struct iser_regd_buf *regd;
/* if we were reading, copy back to unaligned sglist,
* anyway dma_unmap and free the copy
@@ -644,17 +646,11 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task)
iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT);
}
- if (iser_task->dir[ISER_DIR_IN]) {
- regd = &iser_task->rdma_regd[ISER_DIR_IN];
- if (regd->reg.is_fmr)
- iser_unreg_mem(®d->reg);
- }
+ if (iser_task->dir[ISER_DIR_IN])
+ device->iser_unreg_rdma_mem(iser_task, ISER_DIR_IN);
- if (iser_task->dir[ISER_DIR_OUT]) {
- regd = &iser_task->rdma_regd[ISER_DIR_OUT];
- if (regd->reg.is_fmr)
- iser_unreg_mem(®d->reg);
- }
+ if (iser_task->dir[ISER_DIR_OUT])
+ device->iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT);
/* if the data was unaligned, it was already unmapped and then copied */
if (is_rdma_aligned)
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 5e49a36..5b53428 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -74,6 +74,12 @@ static int iser_create_device_ib_res(struct iser_device *device)
int i, j;
struct iser_cq_desc *cq_desc;
+ /* Assign function handles */
+ device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
+ device->iser_free_rdma_reg_res = iser_free_fmr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem;
+ device->iser_unreg_rdma_mem = iser_unreg_mem;
+
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
iser_info("using %d CQs, device %s supports %d vectors\n",
device->cqs_used, device->ib_device->name,
@@ -721,10 +727,15 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
/**
* Unregister (previosuly registered) memory.
*/
-void iser_unreg_mem(struct iser_mem_reg *reg)
+void iser_unreg_mem(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
+ if (!reg->is_fmr)
+ return;
+
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
ret = ib_fmr_pool_unmap((struct ib_pool_fmr *)reg->mem_h);
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH for-3.11 5/7] IB/iser: Handle unaligned SG in separate function
[not found] ` <1374153931-7313-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
` (3 preceding siblings ...)
2013-07-18 13:25 ` [PATCH for-3.11 4/7] IB/iser: Generalize rdma memory registration Or Gerlitz
@ 2013-07-18 13:25 ` Or Gerlitz
2013-07-18 13:25 ` [PATCH for-3.11 6/7] IB/iser: Place the fmr pool into a union in iser's IB conn struct Or Gerlitz
2013-07-18 13:25 ` [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR) Or Gerlitz
6 siblings, 0 replies; 20+ messages in thread
From: Or Gerlitz @ 2013-07-18 13:25 UTC (permalink / raw)
To: roland-DgEjT+Ai2ygdnm+yROfE0A
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, sagig-VPRAkNaXOzVWk0Htik3J/w,
roid-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
Or Gerlitz
From: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
This routine will be shared with other rdma management schemes. The bounce
buffer solution for unaligned SG-lists and the sg_to_page_vec routine are
likely to be used for other registration schemes and not just FMR.
Move them out of the FMR specific code, and call them from there, later
they will be called also from other reg methods code.
Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/ulp/iser/iser_memory.c | 66 +++++++++++++++++++----------
1 files changed, 43 insertions(+), 23 deletions(-)
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 797e49f..4dea1ba 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -170,8 +170,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task,
*/
static int iser_sg_to_page_vec(struct iser_data_buf *data,
- struct iser_page_vec *page_vec,
- struct ib_device *ibdev)
+ struct ib_device *ibdev, u64 *pages,
+ int *offset, int *data_size)
{
struct scatterlist *sg, *sgl = (struct scatterlist *)data->buf;
u64 start_addr, end_addr, page, chunk_start = 0;
@@ -180,7 +180,7 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
int i, new_chunk, cur_page, last_ent = data->dma_nents - 1;
/* compute the offset of first element */
- page_vec->offset = (u64) sgl[0].offset & ~MASK_4K;
+ *offset = (u64) sgl[0].offset & ~MASK_4K;
new_chunk = 1;
cur_page = 0;
@@ -204,13 +204,14 @@ static int iser_sg_to_page_vec(struct iser_data_buf *data,
which might be unaligned */
page = chunk_start & MASK_4K;
do {
- page_vec->pages[cur_page++] = page;
+ pages[cur_page++] = page;
page += SIZE_4K;
} while (page < end_addr);
}
- page_vec->data_size = total_sz;
- iser_dbg("page_vec->data_size:%d cur_page %d\n", page_vec->data_size,cur_page);
+ *data_size = total_sz;
+ iser_dbg("page_vec->data_size:%d cur_page %d\n",
+ *data_size, cur_page);
return cur_page;
}
@@ -295,8 +296,10 @@ static void iser_page_vec_build(struct iser_data_buf *data,
page_vec->offset = 0;
iser_dbg("Translating sg sz: %d\n", data->dma_nents);
- page_vec_len = iser_sg_to_page_vec(data, page_vec, ibdev);
- iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents,page_vec_len);
+ page_vec_len = iser_sg_to_page_vec(data, ibdev, page_vec->pages,
+ &page_vec->offset,
+ &page_vec->data_size);
+ iser_dbg("sg len %d page_vec_len %d\n", data->dma_nents, page_vec_len);
page_vec->length = page_vec_len;
@@ -344,6 +347,32 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task)
}
}
+static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
+ struct ib_device *ibdev,
+ enum iser_data_dir cmd_dir,
+ int aligned_len)
+{
+ struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
+ struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+
+ iscsi_conn->fmr_unalign_cnt++;
+ iser_warn("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
+ aligned_len, mem->size);
+
+ if (iser_debug_level > 0)
+ iser_data_buf_dump(mem, ibdev);
+
+ /* unmap the command data before accessing it */
+ iser_dma_unmap_task_data(iser_task);
+
+ /* allocate copy buf, if we are writing, copy the */
+ /* unaligned scatterlist, dma map the copy */
+ if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
+ return -ENOMEM;
+
+ return 0;
+}
+
/**
* iser_reg_rdma_mem - Registers memory intended for RDMA,
* obtaining rkey and va
@@ -353,7 +382,6 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task)
int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir)
{
- struct iscsi_conn *iscsi_conn = iser_task->iser_conn->iscsi_conn;
struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
struct ib_device *ibdev = device->ib_device;
@@ -369,20 +397,12 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents ||
(!ib_conn->fmr_pool && mem->dma_nents > 1)) {
- iscsi_conn->fmr_unalign_cnt++;
- iser_dbg("rdma alignment violation (%d/%d aligned) or FMR not supported\n",
- aligned_len, mem->size);
-
- if (iser_debug_level > 0)
- iser_data_buf_dump(mem, ibdev);
-
- /* unmap the command data before accessing it */
- iser_dma_unmap_task_data(iser_task);
-
- /* allocate copy buf, if we are writing, copy the */
- /* unaligned scatterlist, dma map the copy */
- if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0)
- return -ENOMEM;
+ err = fall_to_bounce_buf(iser_task, ibdev,
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
mem = &iser_task->data_copy[cmd_dir];
}
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH for-3.11 6/7] IB/iser: Place the fmr pool into a union in iser's IB conn struct
[not found] ` <1374153931-7313-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
` (4 preceding siblings ...)
2013-07-18 13:25 ` [PATCH for-3.11 5/7] IB/iser: Handle unaligned SG in separate function Or Gerlitz
@ 2013-07-18 13:25 ` Or Gerlitz
2013-07-18 13:25 ` [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR) Or Gerlitz
6 siblings, 0 replies; 20+ messages in thread
From: Or Gerlitz @ 2013-07-18 13:25 UTC (permalink / raw)
To: roland-DgEjT+Ai2ygdnm+yROfE0A
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, sagig-VPRAkNaXOzVWk0Htik3J/w,
roid-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
Or Gerlitz
From: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
This is preparation step for other memory registration methods to be
added. In addition, change reg/unreg routines signature to indicate
they use FMRs.
Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/ulp/iser/iscsi_iser.h | 18 +++++++----
drivers/infiniband/ulp/iser/iser_memory.c | 24 ++++++++-------
drivers/infiniband/ulp/iser/iser_verbs.c | 47 +++++++++++++++--------------
3 files changed, 48 insertions(+), 41 deletions(-)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 78117be..75c5352 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -286,7 +286,6 @@ struct iser_conn {
struct iser_device *device; /* device context */
struct rdma_cm_id *cma_id; /* CMA ID */
struct ib_qp *qp; /* QP */
- struct ib_fmr_pool *fmr_pool; /* pool of IB FMRs */
wait_queue_head_t wait; /* waitq for conn/disconn */
unsigned qp_max_recv_dtos; /* num of rx buffers */
unsigned qp_max_recv_dtos_mask; /* above minus 1 */
@@ -294,8 +293,6 @@ struct iser_conn {
int post_recv_buf_count; /* posted rx count */
atomic_t post_send_buf_count; /* posted tx count */
char name[ISER_OBJECT_NAME_SIZE];
- struct iser_page_vec *page_vec; /* represents SG to fmr maps*
- * maps serialized as tx is*/
struct list_head conn_list; /* entry in ig conn list */
char *login_buf;
@@ -304,6 +301,13 @@ struct iser_conn {
unsigned int rx_desc_head;
struct iser_rx_desc *rx_descs;
struct ib_recv_wr rx_wr[ISER_MIN_POSTED_RX];
+ union {
+ struct {
+ struct ib_fmr_pool *pool; /* pool of IB FMRs */
+ struct iser_page_vec *page_vec; /* represents SG to fmr maps*
+ * maps serialized as tx is*/
+ } fmr;
+ } fastreg;
};
struct iscsi_iser_conn {
@@ -387,8 +391,8 @@ void iser_free_rx_descriptors(struct iser_conn *ib_conn);
void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
-int iser_reg_rdma_mem(struct iscsi_iser_task *task,
- enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
int iser_connect(struct iser_conn *ib_conn,
struct sockaddr_in *src_addr,
@@ -399,8 +403,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
struct iser_page_vec *page_vec,
struct iser_mem_reg *mem_reg);
-void iser_unreg_mem(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir);
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
int iser_post_recvl(struct iser_conn *ib_conn);
int iser_post_recvm(struct iser_conn *ib_conn, int count);
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 4dea1ba..1985e90 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -374,13 +374,13 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task,
}
/**
- * iser_reg_rdma_mem - Registers memory intended for RDMA,
- * obtaining rkey and va
+ * iser_reg_rdma_mem_fmr - Registers memory intended for RDMA,
+ * using FMR (if possible) obtaining rkey and va
*
* returns 0 on success, errno code on failure
*/
-int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
struct iser_device *device = ib_conn->device;
@@ -396,7 +396,7 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
if (aligned_len != mem->dma_nents ||
- (!ib_conn->fmr_pool && mem->dma_nents > 1)) {
+ (!ib_conn->fastreg.fmr.pool && mem->dma_nents > 1)) {
err = fall_to_bounce_buf(iser_task, ibdev,
cmd_dir, aligned_len);
if (err) {
@@ -423,19 +423,21 @@ int iser_reg_rdma_mem(struct iscsi_iser_task *iser_task,
(unsigned long)regd_buf->reg.va,
(unsigned long)regd_buf->reg.len);
} else { /* use FMR for multiple dma entries */
- iser_page_vec_build(mem, ib_conn->page_vec, ibdev);
- err = iser_reg_page_vec(ib_conn, ib_conn->page_vec, ®d_buf->reg);
+ iser_page_vec_build(mem, ib_conn->fastreg.fmr.page_vec, ibdev);
+ err = iser_reg_page_vec(ib_conn, ib_conn->fastreg.fmr.page_vec,
+ ®d_buf->reg);
if (err && err != -EAGAIN) {
iser_data_buf_dump(mem, ibdev);
iser_err("mem->dma_nents = %d (dlength = 0x%x)\n",
mem->dma_nents,
ntoh24(iser_task->desc.iscsi_header.dlength));
iser_err("page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n",
- ib_conn->page_vec->data_size, ib_conn->page_vec->length,
- ib_conn->page_vec->offset);
- for (i=0 ; i<ib_conn->page_vec->length ; i++)
+ ib_conn->fastreg.fmr.page_vec->data_size,
+ ib_conn->fastreg.fmr.page_vec->length,
+ ib_conn->fastreg.fmr.page_vec->offset);
+ for (i = 0; i < ib_conn->fastreg.fmr.page_vec->length; i++)
iser_err("page_vec[%d] = 0x%llx\n", i,
- (unsigned long long) ib_conn->page_vec->pages[i]);
+ (unsigned long long) ib_conn->fastreg.fmr.page_vec->pages[i]);
}
if (err)
return err;
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 5b53428..d9a47b9 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -77,8 +77,8 @@ static int iser_create_device_ib_res(struct iser_device *device)
/* Assign function handles */
device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
device->iser_free_rdma_reg_res = iser_free_fmr_pool;
- device->iser_reg_rdma_mem = iser_reg_rdma_mem;
- device->iser_unreg_rdma_mem = iser_unreg_mem;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
iser_info("using %d CQs, device %s supports %d vectors\n",
@@ -194,13 +194,13 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
struct ib_fmr_pool_param params;
int ret = -ENOMEM;
- ib_conn->page_vec = kmalloc(sizeof(struct iser_page_vec) +
- (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE+1)),
- GFP_KERNEL);
- if (!ib_conn->page_vec)
+ ib_conn->fastreg.fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) +
+ (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)),
+ GFP_KERNEL);
+ if (!ib_conn->fastreg.fmr.page_vec)
return ret;
- ib_conn->page_vec->pages = (u64 *)(ib_conn->page_vec + 1);
+ ib_conn->fastreg.fmr.page_vec->pages = (u64 *)(ib_conn->fastreg.fmr.page_vec + 1);
params.page_shift = SHIFT_4K;
/* when the first/last SG element are not start/end *
@@ -216,16 +216,16 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
IB_ACCESS_REMOTE_WRITE |
IB_ACCESS_REMOTE_READ);
- ib_conn->fmr_pool = ib_create_fmr_pool(device->pd, ¶ms);
- if (!IS_ERR(ib_conn->fmr_pool))
+ ib_conn->fastreg.fmr.pool = ib_create_fmr_pool(device->pd, ¶ms);
+ if (!IS_ERR(ib_conn->fastreg.fmr.pool))
return 0;
/* no FMR => no need for page_vec */
- kfree(ib_conn->page_vec);
- ib_conn->page_vec = NULL;
+ kfree(ib_conn->fastreg.fmr.page_vec);
+ ib_conn->fastreg.fmr.page_vec = NULL;
- ret = PTR_ERR(ib_conn->fmr_pool);
- ib_conn->fmr_pool = NULL;
+ ret = PTR_ERR(ib_conn->fastreg.fmr.pool);
+ ib_conn->fastreg.fmr.pool = NULL;
if (ret != -ENOSYS) {
iser_err("FMR allocation failed, err %d\n", ret);
return ret;
@@ -241,15 +241,15 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
void iser_free_fmr_pool(struct iser_conn *ib_conn)
{
iser_info("freeing conn %p fmr pool %p\n",
- ib_conn, ib_conn->fmr_pool);
+ ib_conn, ib_conn->fastreg.fmr.pool);
- if (ib_conn->fmr_pool != NULL)
- ib_destroy_fmr_pool(ib_conn->fmr_pool);
+ if (ib_conn->fastreg.fmr.pool != NULL)
+ ib_destroy_fmr_pool(ib_conn->fastreg.fmr.pool);
- ib_conn->fmr_pool = NULL;
+ ib_conn->fastreg.fmr.pool = NULL;
- kfree(ib_conn->page_vec);
- ib_conn->page_vec = NULL;
+ kfree(ib_conn->fastreg.fmr.page_vec);
+ ib_conn->fastreg.fmr.page_vec = NULL;
}
/**
@@ -692,7 +692,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
page_list = page_vec->pages;
io_addr = page_list[0];
- mem = ib_fmr_pool_map_phys(ib_conn->fmr_pool,
+ mem = ib_fmr_pool_map_phys(ib_conn->fastreg.fmr.pool,
page_list,
page_vec->length,
io_addr);
@@ -725,10 +725,11 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
}
/**
- * Unregister (previosuly registered) memory.
+ * Unregister (previosuly registered using FMR) memory.
+ * If memory is non-FMR does nothing.
*/
-void iser_unreg_mem(struct iscsi_iser_task *iser_task,
- enum iser_data_dir cmd_dir)
+void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
{
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH for-3.11 7/7] IB/iser: Introduce fast memory registration model (FRWR)
[not found] ` <1374153931-7313-1-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
` (5 preceding siblings ...)
2013-07-18 13:25 ` [PATCH for-3.11 6/7] IB/iser: Place the fmr pool into a union in iser's IB conn struct Or Gerlitz
@ 2013-07-18 13:25 ` Or Gerlitz
[not found] ` <1374153931-7313-8-git-send-email-ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
6 siblings, 1 reply; 20+ messages in thread
From: Or Gerlitz @ 2013-07-18 13:25 UTC (permalink / raw)
To: roland-DgEjT+Ai2ygdnm+yROfE0A
Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, sagig-VPRAkNaXOzVWk0Htik3J/w,
roid-VPRAkNaXOzVWk0Htik3J/w, oren-VPRAkNaXOzVWk0Htik3J/w,
Or Gerlitz
From: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Newer HCAs and Virtual functions may not support FMRs but rather a fast
registration model, which we call FRWR - "Fast Registration Work Requests".
This model was introduced in 00f7ec36c "RDMA/core: Add memory management
extensions support" and works when the IB device supports the
IB_DEVICE_MEM_MGT_EXTENSIONS capability.
Upon creating the iser device iser will test wether the HCA supports FMRs.
if no support for FMRs, check if IB_DEVICE_MEM_MGT_EXTENSIONS is supported and
assign function handles that handle fast registration and allocation of
appropriate resources (fast_reg descriptors).
Registration is done using posting IB_WR_FAST_REG_MR to the QP and
invalidations using posting IB_WR_LOCAL_INV.
Signed-off-by: Sagi Grimberg <sagig-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
Signed-off-by: Or Gerlitz <ogerlitz-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
---
drivers/infiniband/ulp/iser/iscsi_iser.h | 21 ++++-
drivers/infiniband/ulp/iser/iser_memory.c | 140 ++++++++++++++++++++++++++++-
drivers/infiniband/ulp/iser/iser_verbs.c | 138 ++++++++++++++++++++++++++--
3 files changed, 287 insertions(+), 12 deletions(-)
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index 75c5352..6791402 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h
@@ -211,7 +211,7 @@ struct iser_mem_reg {
u64 va;
u64 len;
void *mem_h;
- int is_fmr;
+ int is_mr;
};
struct iser_regd_buf {
@@ -277,6 +277,15 @@ struct iser_device {
enum iser_data_dir cmd_dir);
};
+struct fast_reg_descriptor {
+ struct list_head list;
+ /* For fast registration - FRWR */
+ struct ib_mr *data_mr;
+ struct ib_fast_reg_page_list *data_frpl;
+ /* Valid for fast registration flag */
+ bool valid;
+};
+
struct iser_conn {
struct iscsi_iser_conn *iser_conn; /* iser conn for upcalls */
struct iscsi_endpoint *ep;
@@ -307,6 +316,10 @@ struct iser_conn {
struct iser_page_vec *page_vec; /* represents SG to fmr maps*
* maps serialized as tx is*/
} fmr;
+ struct {
+ struct list_head pool;
+ int pool_size;
+ } frwr;
} fastreg;
};
@@ -393,6 +406,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task,
int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task,
enum iser_data_dir cmd_dir);
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task,
+ enum iser_data_dir cmd_dir);
int iser_connect(struct iser_conn *ib_conn,
struct sockaddr_in *src_addr,
@@ -405,6 +420,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
enum iser_data_dir cmd_dir);
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir);
int iser_post_recvl(struct iser_conn *ib_conn);
int iser_post_recvm(struct iser_conn *ib_conn, int count);
@@ -421,4 +438,6 @@ int iser_initialize_task_headers(struct iscsi_task *task,
int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session);
int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
void iser_free_fmr_pool(struct iser_conn *ib_conn);
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max);
+void iser_free_frwr_pool(struct iser_conn *ib_conn);
#endif
diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 1985e90..1ce0c97 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c
@@ -395,8 +395,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
regd_buf = &iser_task->rdma_regd[cmd_dir];
aligned_len = iser_data_buf_aligned_len(mem, ibdev);
- if (aligned_len != mem->dma_nents ||
- (!ib_conn->fastreg.fmr.pool && mem->dma_nents > 1)) {
+ if (aligned_len != mem->dma_nents) {
err = fall_to_bounce_buf(iser_task, ibdev,
cmd_dir, aligned_len);
if (err) {
@@ -414,7 +413,7 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
regd_buf->reg.rkey = device->mr->rkey;
regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
- regd_buf->reg.is_fmr = 0;
+ regd_buf->reg.is_mr = 0;
iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X "
"va: 0x%08lX sz: %ld]\n",
@@ -444,3 +443,138 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task,
}
return 0;
}
+
+static int iser_fast_reg_mr(struct fast_reg_descriptor *desc,
+ struct iser_conn *ib_conn,
+ struct iser_regd_buf *regd_buf,
+ u32 offset, unsigned int data_size,
+ unsigned int page_list_len)
+{
+ struct ib_send_wr fastreg_wr, inv_wr;
+ struct ib_send_wr *bad_wr, *wr = NULL;
+ u8 key;
+ int ret;
+
+ if (!desc->valid) {
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED;
+ inv_wr.ex.invalidate_rkey = desc->data_mr->rkey;
+ wr = &inv_wr;
+ /* Bump the key */
+ key = (u8)(desc->data_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(desc->data_mr, ++key);
+ }
+
+ /* Prepare FASTREG WR */
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.send_flags = IB_SEND_SIGNALED;
+ fastreg_wr.wr.fast_reg.iova_start = desc->data_frpl->page_list[0] + offset;
+ fastreg_wr.wr.fast_reg.page_list = desc->data_frpl;
+ fastreg_wr.wr.fast_reg.page_list_len = page_list_len;
+ fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K;
+ fastreg_wr.wr.fast_reg.length = data_size;
+ fastreg_wr.wr.fast_reg.rkey = desc->data_mr->rkey;
+ fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_WRITE |
+ IB_ACCESS_REMOTE_READ);
+
+ if (!wr) {
+ wr = &fastreg_wr;
+ atomic_inc(&ib_conn->post_send_buf_count);
+ } else {
+ wr->next = &fastreg_wr;
+ atomic_add(2, &ib_conn->post_send_buf_count);
+ }
+
+ ret = ib_post_send(ib_conn->qp, wr, &bad_wr);
+ if (ret) {
+ if (bad_wr->next)
+ atomic_sub(2, &ib_conn->post_send_buf_count);
+ else
+ atomic_dec(&ib_conn->post_send_buf_count);
+ iser_err("fast registration failed, ret:%d\n", ret);
+ return ret;
+ }
+ desc->valid = false;
+
+ regd_buf->reg.mem_h = desc;
+ regd_buf->reg.lkey = desc->data_mr->lkey;
+ regd_buf->reg.rkey = desc->data_mr->rkey;
+ regd_buf->reg.va = desc->data_frpl->page_list[0] + offset;
+ regd_buf->reg.len = data_size;
+ regd_buf->reg.is_mr = 1;
+
+ return ret;
+}
+
+/**
+ * iser_reg_rdma_mem_frwr - Registers memory intended for RDMA,
+ * using Fast Registration WR (if possible) obtaining rkey and va
+ *
+ * returns 0 on success, errno code on failure
+ */
+int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct iser_device *device = ib_conn->device;
+ struct ib_device *ibdev = device->ib_device;
+ struct iser_data_buf *mem = &iser_task->data[cmd_dir];
+ struct iser_regd_buf *regd_buf = &iser_task->rdma_regd[cmd_dir];
+ struct fast_reg_descriptor *desc;
+ unsigned int data_size, page_list_len;
+ int err, aligned_len;
+ unsigned long flags;
+ u32 offset;
+
+ aligned_len = iser_data_buf_aligned_len(mem, ibdev);
+ if (aligned_len != mem->dma_nents) {
+ err = fall_to_bounce_buf(iser_task, ibdev,
+ cmd_dir, aligned_len);
+ if (err) {
+ iser_err("failed to allocate bounce buffer\n");
+ return err;
+ }
+ mem = &iser_task->data_copy[cmd_dir];
+ }
+
+ /* if there a single dma entry, dma mr suffices */
+ if (mem->dma_nents == 1) {
+ struct scatterlist *sg = (struct scatterlist *)mem->buf;
+
+ regd_buf->reg.lkey = device->mr->lkey;
+ regd_buf->reg.rkey = device->mr->rkey;
+ regd_buf->reg.len = ib_sg_dma_len(ibdev, &sg[0]);
+ regd_buf->reg.va = ib_sg_dma_address(ibdev, &sg[0]);
+ regd_buf->reg.is_mr = 0;
+ } else {
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ desc = list_first_entry(&ib_conn->fastreg.frwr.pool,
+ struct fast_reg_descriptor, list);
+ list_del(&desc->list);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ page_list_len = iser_sg_to_page_vec(mem, device->ib_device,
+ desc->data_frpl->page_list,
+ &offset, &data_size);
+
+ if (page_list_len * SIZE_4K < data_size) {
+ iser_err("fast reg page_list too short to hold this SG\n");
+ err = -EINVAL;
+ goto err_reg;
+ }
+
+ err = iser_fast_reg_mr(desc, ib_conn, regd_buf,
+ offset, data_size, page_list_len);
+ if (err)
+ goto err_reg;
+ }
+
+ return 0;
+err_reg:
+ spin_lock_irqsave(&ib_conn->lock, flags);
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ spin_unlock_irqrestore(&ib_conn->lock, flags);
+ return err;
+}
diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index d9a47b9..a0ea8de 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -73,12 +73,36 @@ static int iser_create_device_ib_res(struct iser_device *device)
{
int i, j;
struct iser_cq_desc *cq_desc;
+ struct ib_device_attr *dev_attr;
- /* Assign function handles */
- device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
- device->iser_free_rdma_reg_res = iser_free_fmr_pool;
- device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
- device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
+ dev_attr = kmalloc(sizeof *dev_attr, GFP_KERNEL);
+ if (!dev_attr)
+ return -ENOMEM;
+
+ if (ib_query_device(device->ib_device, dev_attr)) {
+ pr_warn("Query device failed for %s\n", device->ib_device->name);
+ goto dev_attr_err;
+ }
+
+ /* Assign function handles - based on FMR support */
+ if (device->ib_device->alloc_fmr && device->ib_device->dealloc_fmr &&
+ device->ib_device->map_phys_fmr && device->ib_device->unmap_fmr) {
+ iser_info("FMR supported, using FMR for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_fmr_pool;
+ device->iser_free_rdma_reg_res = iser_free_fmr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_fmr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_fmr;
+ } else
+ if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+ iser_info("FRWR supported, using FRWR for registration\n");
+ device->iser_alloc_rdma_reg_res = iser_create_frwr_pool;
+ device->iser_free_rdma_reg_res = iser_free_frwr_pool;
+ device->iser_reg_rdma_mem = iser_reg_rdma_mem_frwr;
+ device->iser_unreg_rdma_mem = iser_unreg_mem_frwr;
+ } else {
+ iser_err("IB device does not support FMRs nor FRWRs, can't register memory\n");
+ goto dev_attr_err;
+ }
device->cqs_used = min(ISER_MAX_CQ, device->ib_device->num_comp_vectors);
iser_info("using %d CQs, device %s supports %d vectors\n",
@@ -134,6 +158,7 @@ static int iser_create_device_ib_res(struct iser_device *device)
if (ib_register_event_handler(&device->event_handler))
goto handler_err;
+ kfree(dev_attr);
return 0;
handler_err:
@@ -153,6 +178,8 @@ pd_err:
kfree(device->cq_desc);
cq_desc_err:
iser_err("failed to allocate an IB resource\n");
+dev_attr_err:
+ kfree(dev_attr);
return -1;
}
@@ -253,6 +280,80 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn)
}
/**
+ * iser_create_frwr_pool - Creates pool of fast_reg descriptors
+ * for fast registration work requests.
+ * returns 0 on success, or errno code on failure
+ */
+int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max)
+{
+ struct iser_device *device = ib_conn->device;
+ struct fast_reg_descriptor *desc;
+ int i, ret;
+
+ INIT_LIST_HEAD(&ib_conn->fastreg.frwr.pool);
+ ib_conn->fastreg.frwr.pool_size = 0;
+ for (i = 0; i < cmds_max; i++) {
+ desc = kmalloc(sizeof(*desc), GFP_KERNEL);
+ if (!desc) {
+ iser_err("Failed to allocate a new fast_reg descriptor\n");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ desc->data_frpl = ib_alloc_fast_reg_page_list(device->ib_device,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_frpl)) {
+ iser_err("Failed to allocate ib_fast_reg_page_list err=%ld\n",
+ PTR_ERR(desc->data_frpl));
+ goto err;
+ }
+
+ desc->data_mr = ib_alloc_fast_reg_mr(device->pd,
+ ISCSI_ISER_SG_TABLESIZE + 1);
+ if (IS_ERR(desc->data_mr)) {
+ iser_err("Failed to allocate ib_fast_reg_mr err=%ld\n",
+ PTR_ERR(desc->data_mr));
+ ib_free_fast_reg_page_list(desc->data_frpl);
+ goto err;
+ }
+ desc->valid = true;
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ ib_conn->fastreg.frwr.pool_size++;
+ }
+
+ return 0;
+err:
+ iser_free_frwr_pool(ib_conn);
+ return ret;
+}
+
+/**
+ * iser_free_frwr_pool - releases the pool of fast_reg descriptors
+ */
+void iser_free_frwr_pool(struct iser_conn *ib_conn)
+{
+ struct fast_reg_descriptor *desc, *tmp;
+ int i = 0;
+
+ if (list_empty(&ib_conn->fastreg.frwr.pool))
+ return;
+
+ iser_err("freeing conn %p frwr pool\n", ib_conn);
+
+ list_for_each_entry_safe(desc, tmp, &ib_conn->fastreg.frwr.pool, list) {
+ list_del(&desc->list);
+ ib_free_fast_reg_page_list(desc->data_frpl);
+ ib_dereg_mr(desc->data_mr);
+ kfree(desc);
+ ++i;
+ }
+
+ if (i < ib_conn->fastreg.frwr.pool_size)
+ iser_warn("pool still has %d regions registered\n",
+ ib_conn->fastreg.frwr.pool_size - i);
+}
+
+/**
* iser_create_ib_conn_res - Queue-Pair (QP)
*
* returns 0 on success, -1 on failure
@@ -707,7 +808,7 @@ int iser_reg_page_vec(struct iser_conn *ib_conn,
mem_reg->rkey = mem->fmr->rkey;
mem_reg->len = page_vec->length * SIZE_4K;
mem_reg->va = io_addr;
- mem_reg->is_fmr = 1;
+ mem_reg->is_mr = 1;
mem_reg->mem_h = (void *)mem;
mem_reg->va += page_vec->offset;
@@ -734,7 +835,7 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
int ret;
- if (!reg->is_fmr)
+ if (!reg->is_mr)
return;
iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
@@ -746,6 +847,23 @@ void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task,
reg->mem_h = NULL;
}
+void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task,
+ enum iser_data_dir cmd_dir)
+{
+ struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
+ struct iser_conn *ib_conn = iser_task->iser_conn->ib_conn;
+ struct fast_reg_descriptor *desc = reg->mem_h;
+
+ if (!reg->is_mr)
+ return;
+
+ reg->mem_h = NULL;
+ reg->is_mr = 0;
+ spin_lock_bh(&ib_conn->lock);
+ list_add_tail(&desc->list, &ib_conn->fastreg.frwr.pool);
+ spin_unlock_bh(&ib_conn->lock);
+}
+
int iser_post_recvl(struct iser_conn *ib_conn)
{
struct ib_recv_wr rx_wr, *rx_wr_failed;
@@ -867,7 +985,11 @@ static int iser_drain_tx_cq(struct iser_device *device, int cq_index)
if (wc.status == IB_WC_SUCCESS) {
if (wc.opcode == IB_WC_SEND)
iser_snd_completion(tx_desc, ib_conn);
- else
+ else if (wc.opcode == IB_WC_LOCAL_INV ||
+ wc.opcode == IB_WC_FAST_REG_MR) {
+ atomic_dec(&ib_conn->post_send_buf_count);
+ continue;
+ } else
iser_err("expected opcode %d got %d\n",
IB_WC_SEND, wc.opcode);
} else {
--
1.7.1
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
^ permalink raw reply related [flat|nested] 20+ messages in thread