Linux RDMA and InfiniBand development
 help / color / mirror / Atom feed
* Re: [PATCH 2/3] iopmem : Add a block device driver for PCIe attached IO memory.
From: Christoph Hellwig @ 2016-10-28  6:45 UTC (permalink / raw)
  To: Stephen Bates
  Cc: linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg,
	dan.j.williams-ral2JQCrhuEAvxtiuMwx3w,
	ross.zwisler-VuQAYsv1563Yd54FQh9/CA, willy-VuQAYsv1563Yd54FQh9/CA,
	jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	haggaie-VPRAkNaXOzVWk0Htik3J/w, hch-wEGCiKHe2LqWVfeAwA7xHQ,
	axboe-b10kYP2dOMg, corbet-T1hC0tSOHrs,
	jim.macdonald-FgSLVYC75IpWk0Htik3J/w,
	sbates-Rgftl6RXld5BDgjK7y7TUQ, logang-OTvnGxWRz7hWk0Htik3J/w
In-Reply-To: <1476826937-20665-3-git-send-email-sbates-pv7U853sEMVWk0Htik3J/w@public.gmane.org>

> Signed-off-by: Stephen Bates <sbates-pv7U853sEMVWk0Htik3J/w@public.gmane.org>

FYI, that address has bounced throught the whole thread for me,
replacing it with a known good one for now.


> + * This driver is heavily based on drivers/block/pmem.c.
> + * Copyright (c) 2014, Intel Corporation.
> + * Copyright (C) 2007 Nick Piggin
> + * Copyright (C) 2007 Novell Inc.

Is there anything left of it actually?  I didn't spot anything
obvious.  Nevermind that we don't have a file with that name anymore :)

> +  /*
> +   * We can only access the iopmem device with full 32-bit word
> +   * accesses which cannot be gaurantee'd by the regular memcpy
> +   */

Odd comment formatting. 

> +static void memcpy_from_iopmem(void *dst, const void *src, size_t sz)
> +{
> +	u64 *wdst = dst;
> +	const u64 *wsrc = src;
> +	u64 tmp;
> +
> +	while (sz >= sizeof(*wdst)) {
> +		*wdst++ = *wsrc++;
> +		sz -= sizeof(*wdst);
> +	}
> +
> +	if (!sz)
> +		return;
> +
> +	tmp = *wsrc;
> +	memcpy(wdst, &tmp, sz);
> +}

And then we dod a memcpy here anyway.  And no volatile whatsover, so
the compiler could do anything to it.  I defintively feel a bit uneasy
about having this in the driver as well.  Can we define the exact
semantics for this and define it by the system, possibly in an arch
specific way?

> +static void iopmem_do_bvec(struct iopmem_device *iopmem, struct page *page,
> +			   unsigned int len, unsigned int off, bool is_write,
> +			   sector_t sector)
> +{
> +	phys_addr_t iopmem_off = sector * 512;
> +	void *iopmem_addr = iopmem->virt_addr + iopmem_off;
> +
> +	if (!is_write) {
> +		read_iopmem(page, off, iopmem_addr, len);
> +		flush_dcache_page(page);
> +	} else {
> +		flush_dcache_page(page);
> +		write_iopmem(iopmem_addr, page, off, len);
> +	}

How about moving the  address and offset calculation as well as the
cache flushing into read_iopmem/write_iopmem and removing this function?

> +static blk_qc_t iopmem_make_request(struct request_queue *q, struct bio *bio)
> +{
> +	struct iopmem_device *iopmem = q->queuedata;
> +	struct bio_vec bvec;
> +	struct bvec_iter iter;
> +
> +	bio_for_each_segment(bvec, bio, iter) {
> +		iopmem_do_bvec(iopmem, bvec.bv_page, bvec.bv_len,
> +			    bvec.bv_offset, op_is_write(bio_op(bio)),
> +			    iter.bi_sector);

op_is_write just checks the data direction.  I'd feel much more
comfortable with a switch on the op, e.g.

	switch (bio_op(bio))) {
	case REQ_OP_READ:
		bio_for_each_segment(bvec, bio, iter)
			read_iopmem(iopmem, bvec, iter.bi_sector);
		break;
	case REQ_OP_READ:
		bio_for_each_segment(bvec, bio, iter)
			write_iopmem(iopmem, bvec, iter.bi_sector);
	defualt:
		WARN_ON_ONCE(1);
		bio->bi_error = -EIO;
		break;
	}
			

> +static long iopmem_direct_access(struct block_device *bdev, sector_t sector,
> +			       void **kaddr, pfn_t *pfn, long size)
> +{
> +	struct iopmem_device *iopmem = bdev->bd_queue->queuedata;
> +	resource_size_t offset = sector * 512;
> +
> +	if (!iopmem)
> +		return -ENODEV;

I don't think this can ever happen, can it?

> +static DEFINE_IDA(iopmem_instance_ida);
> +static DEFINE_SPINLOCK(ida_lock);
> +
> +static int iopmem_set_instance(struct iopmem_device *iopmem)
> +{
> +	int instance, error;
> +
> +	do {
> +		if (!ida_pre_get(&iopmem_instance_ida, GFP_KERNEL))
> +			return -ENODEV;
> +
> +		spin_lock(&ida_lock);
> +		error = ida_get_new(&iopmem_instance_ida, &instance);
> +		spin_unlock(&ida_lock);
> +
> +	} while (error == -EAGAIN);
> +
> +	if (error)
> +		return -ENODEV;
> +
> +	iopmem->instance = instance;
> +	return 0;
> +}
> +
> +static void iopmem_release_instance(struct iopmem_device *iopmem)
> +{
> +	spin_lock(&ida_lock);
> +	ida_remove(&iopmem_instance_ida, iopmem->instance);
> +	spin_unlock(&ida_lock);
> +}
> +

Just use ida_simple_get/ida_simple_remove instead to take care
of the locking and preloading, and get rid of these two functions.


> +static int iopmem_attach_disk(struct iopmem_device *iopmem)
> +{
> +	struct gendisk *disk;
> +	int nid = dev_to_node(iopmem->dev);
> +	struct request_queue *q = iopmem->queue;
> +
> +	blk_queue_write_cache(q, true, true);

You don't handle flush commands or the fua bit in make_request, so
this setting seems wrong.

> +	int err = 0;
> +	int nid = dev_to_node(&pdev->dev);
> +
> +	if (pci_enable_device_mem(pdev) < 0) {

propagate the actual error code, please.

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 3/3] iopmem : Add documentation for iopmem driver
From: Christoph Hellwig @ 2016-10-28  6:46 UTC (permalink / raw)
  To: Stephen Bates
  Cc: jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/,
	sbates-Rgftl6RXld5BDgjK7y7TUQ, haggaie-VPRAkNaXOzVWk0Htik3J/w,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linux-nvdimm-y27Ovi1pjclAfugRpC6u6w, corbet-T1hC0tSOHrs,
	linux-kernel-u79uwXL29TY76Z2rM5mHXA,
	jim.macdonald-FgSLVYC75IpWk0Htik3J/w,
	linux-block-u79uwXL29TY76Z2rM5mHXA,
	linux-mm-Bw31MaZKKs3YtjvyW6yDsg, axboe-b10kYP2dOMg,
	hch-wEGCiKHe2LqWVfeAwA7xHQ
In-Reply-To: <1476826937-20665-4-git-send-email-sbates-pv7U853sEMVWk0Htik3J/w@public.gmane.org>

I'd say please fold this into the previous patch.

^ permalink raw reply

* Re: [PATCH rdma-core 3/4] verbs: Replace infiniband/sa-kern-abi.h with the kernel's uapi/rdma/ib_user_sa.h
From: Christoph Hellwig @ 2016-10-28  6:53 UTC (permalink / raw)
  To: Jason Gunthorpe; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <1477609570-8087-4-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>

I can't see how this is supposed to work, there is no copy at all
of ib_user_sa.h in the tree.

Having to rely on system headers is a sure way to make the build
break most of the time.

What we need is a canonical copy of the kernel heades in the rdma-core
tree, with the option of just pointing to a kernel tree instead.

E.g. by default use headers from rdma-core/kernel/headers, but
optionally allow the build systems to use those from a kernel tree
explicitly specified.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC ABI V5 07/10] IB/core: Support getting IOCTL header/SGEs from kernel space
From: Christoph Hellwig @ 2016-10-28  6:59 UTC (permalink / raw)
  To: Matan Barak
  Cc: linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford, Jason Gunthorpe,
	Sean Hefty, Christoph Lameter, Liran Liss, Haggai Eran,
	Majd Dibbiny, Tal Alon, Leon Romanovsky
In-Reply-To: <1477579398-6875-8-git-send-email-matanb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

> +	mm_segment_t currentfs = get_fs();
>  
>  	if (!ib_dev)
>  		return -EIO;
> @@ -240,8 +242,10 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
>  		goto out;
>  	}
>  
> +	set_fs(oldfs);
>  	err = uverbs_handle_action(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
>  				   file, action, ctx->uverbs_attr_array);
> +	set_fs(currentfs);

Adding this magic in new code is not acceptable.  Any given API
must take either a kernel or a user pointer.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-core 1/7] libhns: Add initial main frame
From: oulijun @ 2016-10-28  7:28 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linuxarm-hv44wF8Li93QT0dZR+AlfA
In-Reply-To: <20161027145139.GD6818-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>

在 2016/10/27 22:51, Jason Gunthorpe 写道:
> On Thu, Oct 27, 2016 at 11:41:35AM +0800, oulijun wrote:
> 
>> when startup by DT, the content of device/modalias is
>> of:NinfinibandT<NULL>Chisilicon,hns-roce-v1.  it is long and
>> complex.
> 
> If you want to match the hardware then properly parsing the mod alias
> is the right way to do it.
> 
>> the content of device/of_node/compatible is hisilicon,hns-roce-v1
> 
> No, it is more complex than that, future DTs may have a list for
> instance, just a string match is not good enough
> 
>> when startup by APCI, the content of device/modalias is
>> acpi:HISI00D1:
> 
> Also may be more complex..
> 
>>   if (ibv_read_sysfs-file(uverbs_sys_path, "device/vendor", value, sizeof(value)) > 0)
>> 	...
>>   if (ibv_read_sysfs-file(uverbs_sys_path, "device/vendor", value, sizeof(value)) > 0)
>>  	...
> 
> You can also match PCI with modalias.
> 
>>> But I wonder if this isn't generically better to be
>>>
>>>  last_dir(readlink("device/driver")) == "hns"
>>>
> 
>> I think it is not insteaded. because it will be find the hns in the
>> path(device/driver)
> 
> I said readlink, which will return something like '../../../../bus/platform/drivers/hhns'
> 
> So just detect the driver is calld hhns and let the kernel deal with
> figuring it out.
> 
> Jason
> 
> .
> 
Hi, Jason
  I have verified it according to your advice and learned the readlink. i think that it need to
exist the file, as follows:
   xx -> /xxxx/../../

but not have it.
rs/t@(none)$ cd /sys/class/infiniband/hns_0/device/driver/module/driver
root@(none)$ ls
platform:hns_roce

I try to read the result of readlink(), but it is fail:
readlink("/sys/class/infiniband/hns_0/device/driver/module/driver", buf, sizeof *buf)

in addition that, the last() is not exit in C library.

Lijun Ou


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-core 1/7] libhns: Add initial main frame
From: oulijun @ 2016-10-28  7:59 UTC (permalink / raw)
  To: Jason Gunthorpe
  Cc: dledford-H+wXaHxf7aLQT0dZR+AlfA,
	linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	linuxarm-hv44wF8Li93QT0dZR+AlfA
In-Reply-To: <20161027145139.GD6818-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>

在 2016/10/27 22:51, Jason Gunthorpe 写道:
> On Thu, Oct 27, 2016 at 11:41:35AM +0800, oulijun wrote:
> 
>> when startup by DT, the content of device/modalias is
>> of:NinfinibandT<NULL>Chisilicon,hns-roce-v1.  it is long and
>> complex.
> 
> If you want to match the hardware then properly parsing the mod alias
> is the right way to do it.
> 
>> the content of device/of_node/compatible is hisilicon,hns-roce-v1
> 
> No, it is more complex than that, future DTs may have a list for
> instance, just a string match is not good enough
> 
>> when startup by APCI, the content of device/modalias is
>> acpi:HISI00D1:
> 
> Also may be more complex..
> 
>>   if (ibv_read_sysfs-file(uverbs_sys_path, "device/vendor", value, sizeof(value)) > 0)
>> 	...
>>   if (ibv_read_sysfs-file(uverbs_sys_path, "device/vendor", value, sizeof(value)) > 0)
>>  	...
> 
> You can also match PCI with modalias.
> 
>>> But I wonder if this isn't generically better to be
>>>
>>>  last_dir(readlink("device/driver")) == "hns"
>>>
> 
>> I think it is not insteaded. because it will be find the hns in the
>> path(device/driver)
> 
> I said readlink, which will return something like '../../../../bus/platform/drivers/hhns'
> 
> So just detect the driver is calld hhns and let the kernel deal with
> figuring it out.
> 
> Jason
> 
> .
> 
Hi, Jason
  My understand is wrong with my reply in the previous email. it is really exit the link, as
follows:

root@(none)$ cd /sys/class/infiniband/hns_0/device/
root@(none)$ ls -l
total 0
lrwxrwxrwx    1 root     root             0 Oct 27 11:07 driver -> ../../../../bus/platform/drivers/hns_roce

but I think it is the standard approach. because my device(hip06) is only platform device and the other device(hip07/hip0x0 will
be pcie device, it will be distinguished separately.
Hence, we adpot the origin approach.

Lijun Ou

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* [PATCH] IBcore/CM: Issue DREQ when receiving REQ/REP for stale QP
From: Hans Westgaard Ry @ 2016-10-28 11:14 UTC (permalink / raw)
  To: Doug Ledford, Sean Hefty, Hal Rosenstock, Matan Barak,
	Erez Shitrit, Bart Van Assche, Ira Weiny, Or Gerlitz, Hakon Bugge,
	Yuval Shaia, linux-rdma, linux-kernel

from "InfiBand Architecture Specifications Volume 1":

  A QP is said to have a stale connection when only one side has
  connection information. A stale connection may result if the remote CM
  had dropped the connection and sent a DREQ but the DREQ was never
  received by the local CM. Alternatively the remote CM may have lost
  all record of past connections because its node crashed and rebooted,
  while the local CM did not become aware of the remote node's reboot
  and therefore did not clean up stale connections.

and:

   A local CM may receive a REQ/REP for a stale connection. It shall
   abort the connection issuing REJ to the REQ/REP. It shall then issue
   DREQ with "DREQ:remote QPN” set to the remote QPN from the REQ/REP.

This patch solves a problem with reuse of QPN. Current codebase, that
is IPoIB, relies on a REAP-mechanism to do cleanup of the structures
in CM. A problem with this is the timeconstants governing this
mechanism; they are up to 768 seconds and the interface may look
inresponsive in that period.  Issuing a DREQ (and receiving a DREP)
does the necessary cleanup and the interface comes up.

Signed-off-by: Hans Westgaard Ry <hans.westgaard.ry@oracle.com>
Reviewed-by: Håkon Bugge <haakon.bugge@oracle.com>
---
 drivers/infiniband/core/cm.c | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index c995255..c97e4d5 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -1519,6 +1519,7 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 	struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
 	struct cm_timewait_info *timewait_info;
 	struct cm_req_msg *req_msg;
+	struct ib_cm_id *cm_id;
 
 	req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
 
@@ -1540,10 +1541,18 @@ static struct cm_id_private * cm_match_req(struct cm_work *work,
 	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
 	if (timewait_info) {
 		cm_cleanup_timewait(cm_id_priv->timewait_info);
+		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+					   timewait_info->work.remote_id);
+
 		spin_unlock_irq(&cm.lock);
 		cm_issue_rej(work->port, work->mad_recv_wc,
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
 			     NULL, 0);
+		if (cur_cm_id_priv) {
+			cm_id = &cur_cm_id_priv->id;
+			ib_send_cm_dreq(cm_id, NULL, 0);
+			cm_deref_id(cur_cm_id_priv);
+		}
 		return NULL;
 	}
 
@@ -1919,6 +1928,9 @@ static int cm_rep_handler(struct cm_work *work)
 	struct cm_id_private *cm_id_priv;
 	struct cm_rep_msg *rep_msg;
 	int ret;
+	struct cm_id_private *cur_cm_id_priv;
+	struct ib_cm_id *cm_id;
+	struct cm_timewait_info *timewait_info;
 
 	rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
 	cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
@@ -1953,16 +1965,26 @@ static int cm_rep_handler(struct cm_work *work)
 		goto error;
 	}
 	/* Check for a stale connection. */
-	if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
+	timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
+	if (timewait_info) {
 		rb_erase(&cm_id_priv->timewait_info->remote_id_node,
 			 &cm.remote_id_table);
 		cm_id_priv->timewait_info->inserted_remote_id = 0;
+		cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
+					   timewait_info->work.remote_id);
+
 		spin_unlock(&cm.lock);
 		spin_unlock_irq(&cm_id_priv->lock);
 		cm_issue_rej(work->port, work->mad_recv_wc,
 			     IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
 			     NULL, 0);
 		ret = -EINVAL;
+		if (cur_cm_id_priv) {
+			cm_id = &cur_cm_id_priv->id;
+			ib_send_cm_dreq(cm_id, NULL, 0);
+			cm_deref_id(cur_cm_id_priv);
+		}
+
 		goto error;
 	}
 	spin_unlock(&cm.lock);
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH rdma-rc 01/12] IB/mlx5: Replace numerical constant with predefined MACRO
From: Or Gerlitz @ 2016-10-28 12:54 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Max Gurtovoy
In-Reply-To: <1477575407-20562-2-git-send-email-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

On Thu, Oct 27, 2016 at 4:36 PM, Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> From: Max Gurtovoy <maxg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
>
> Replace the pre-defined macro signifying inline umr instead
> of the numerical constant.

Leon,

By all means (or no means, choose), this is a fix. If you are telling
vmware and Broadcom people here for months how to write their code,
let's stop for a minute and look in the mirror, drop this patch.

Or.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-rc 3/6] IB/core: Set routable RoCE gid type for ipv4/ipv6 networks
From: Or Gerlitz @ 2016-10-28 12:57 UTC (permalink / raw)
  To: Mark Bloch, Maor Gottlieb
  Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <1477575391-20134-4-git-send-email-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

On Thu, Oct 27, 2016 at 4:36 PM, Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> From: Mark Bloch <markb-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>

M/M, spelling...

> If the underlying netowrk type is ipv4 or ipv6 and the device supports

netowrk...

> routable RoCE, prefer it so the traffic could cross subnets.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-rc 5/6] IB/core: Save QP in ib_flow structure
From: Or Gerlitz @ 2016-10-28 13:00 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <1477575391-20134-6-git-send-email-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

On Thu, Oct 27, 2016 at 4:36 PM, Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:

> This bug wasn't seen in the wild because there are no kernel consumers
> currently in the kernel.

Indeed, it's nice to avoid future bug, but there's no point to put it
into rc fix as no new kernel consumers are to be added in 4.9, agree?
it's a -next thing
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-rc 12/12] IB/mlx5: Limit mkey page size to 2GB
From: Or Gerlitz @ 2016-10-28 13:02 UTC (permalink / raw)
  To: Majd Dibbiny
  Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Maor Gottlieb
In-Reply-To: <1477575407-20562-13-git-send-email-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

On Thu, Oct 27, 2016 at 4:36 PM, Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> From: Majd Dibbiny <majd-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
>
> The maximum page size in the mkey context is 2GB.
>
> Until today, we didn't enforce this requirement in the code,
> and therefore, if we got a page size larger than 2GB, we

got a page size larger than 2GB? who are those pages?

> have passed zeros in the log_page_shift instead of the actual value
> and the registration failed.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-rc 08/12] IB/mlx5: Wait for all async command completions to complete
From: Or Gerlitz @ 2016-10-28 13:04 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
In-Reply-To: <1477575407-20562-9-git-send-email-leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org>

On Thu, Oct 27, 2016 at 4:36 PM, Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:


> +static void wait_for_async_commands(struct mlx5_ib_dev *dev)
> +{
> +       struct mlx5_mr_cache *cache = &dev->cache;
> +       struct mlx5_cache_ent *ent;
> +       int total = 0;
> +       int i;
> +       int j;
> +
> +       for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
> +               ent = &cache->ent[i];
> +               for (j = 0 ; j < 1000; j++) {
> +                       if (!ent->pending)
> +                               break;
> +                       msleep(50);
> +               }

you had another patch on this series which change a hard coded
constant into a define, why this patch add two new hard coded
constants, so all to all, we're not making progress on that
no-hard-coded-constants front... better decide where you want to go
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-rc 01/12] IB/mlx5: Replace numerical constant with predefined MACRO
From: Leon Romanovsky @ 2016-10-28 14:55 UTC (permalink / raw)
  To: Or Gerlitz
  Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA@public.gmane.org,
	Max Gurtovoy
In-Reply-To: <CAJ3xEMiNynmpbBt0M=k5qywrvnfAq+ogZLD2PUbHBvYVJ4pLyQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 1092 bytes --]

On Fri, Oct 28, 2016 at 03:54:25PM +0300, Or Gerlitz wrote:
> On Thu, Oct 27, 2016 at 4:36 PM, Leon Romanovsky <leon-DgEjT+Ai2ygdnm+yROfE0A@public.gmane.org> wrote:
> > From: Max Gurtovoy <maxg-VPRAkNaXOzVWk0Htik3J/w@public.gmane.org>
> >
> > Replace the pre-defined macro signifying inline umr instead
> > of the numerical constant.
>
> Leon,
>
> By all means (or no means, choose), this is a fix. If you are telling
> vmware and Broadcom people here for months how to write their code,
> let's stop for a minute and look in the mirror, drop this patch.

Thanks Or,
I appreciate your time invested in the review and providing so much
valuable feedback.

Since change of numeric value to the same value defined by macro is not
a fix but improvement and provided here to simplify maintainer's work (Doug),
I have no plans to change the patch or/and drop it.

>
> Or.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* RE: [PATCH rdma-core 2/4] Move rdma_netlink compat into CMake
From: Steve Wise @ 2016-10-28 14:59 UTC (permalink / raw)
  To: 'Jason Gunthorpe', 'Doug Ledford',
	linux-rdma-u79uwXL29TY76Z2rM5mHXA
  Cc: 'Tatyana Nikolova', 'Latif, Faisal'
In-Reply-To: <1477609570-8087-3-git-send-email-jgunthorpe-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>

+Tatyana
+Faisal

Intel, please have a look.

> 
> Steve,
> 
> Can you check if the changes to iwpmd/iwarp_pm_server.c make sense?

I was confused at first, but I see that the iwarp_clients array still has
clients 2 and 3, so it should work fine and still preserve compatibility with
older drivers that embedded the iwarp port mapping in iw_cxgb4 and iw_nes.  And
the RDMA_NL_* enum is consistent with the local header and the kernel uapi
header.  So it looks fine.  I should test this though to make sure...

>
> Should we do something to fix the kernel header?
>

What is wrong with the kernel header exactly?

Steve.


--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC ABI V5 07/10] IB/core: Support getting IOCTL header/SGEs from kernel space
From: Leon Romanovsky @ 2016-10-28 15:16 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Matan Barak, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford,
	Jason Gunthorpe, Sean Hefty, Christoph Lameter, Liran Liss,
	Haggai Eran, Majd Dibbiny, Tal Alon
In-Reply-To: <20161028065943.GA10418-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 931 bytes --]

On Thu, Oct 27, 2016 at 11:59:43PM -0700, Christoph Hellwig wrote:
> > +	mm_segment_t currentfs = get_fs();
> >
> >  	if (!ib_dev)
> >  		return -EIO;
> > @@ -240,8 +242,10 @@ static long ib_uverbs_cmd_verbs(struct ib_device *ib_dev,
> >  		goto out;
> >  	}
> >
> > +	set_fs(oldfs);
> >  	err = uverbs_handle_action(buf, ctx->uattrs, hdr->num_attrs, ib_dev,
> >  				   file, action, ctx->uverbs_attr_array);
> > +	set_fs(currentfs);
>
> Adding this magic in new code is not acceptable.  Any given API
> must take either a kernel or a user pointer.

And it is indeed happen for new code. This magic is needed to allow legacy
write interface to be converted to new ioctl interface internally in
kernel.


> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [RFC ABI V5 07/10] IB/core: Support getting IOCTL header/SGEs from kernel space
From: Christoph Hellwig @ 2016-10-28 15:21 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Christoph Hellwig, Matan Barak, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	Doug Ledford, Jason Gunthorpe, Sean Hefty, Christoph Lameter,
	Liran Liss, Haggai Eran, Majd Dibbiny, Tal Alon
In-Reply-To: <20161028151606.GN3617-2ukJVAZIZ/Y@public.gmane.org>

On Fri, Oct 28, 2016 at 06:16:06PM +0300, Leon Romanovsky wrote:
> And it is indeed happen for new code. This magic is needed to allow legacy
> write interface to be converted to new ioctl interface internally in
> kernel.

You just added these statements which are by defintion new code.
Don't do that - create a clean kernel internal interface instead.

The canonical one would be to only pass kernel pointers in the internal
interface.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC ABI V5 07/10] IB/core: Support getting IOCTL header/SGEs from kernel space
From: Leon Romanovsky @ 2016-10-28 15:33 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Matan Barak, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford,
	Jason Gunthorpe, Sean Hefty, Christoph Lameter, Liran Liss,
	Haggai Eran, Majd Dibbiny, Tal Alon
In-Reply-To: <20161028152138.GA16421-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 1147 bytes --]

On Fri, Oct 28, 2016 at 08:21:38AM -0700, Christoph Hellwig wrote:
> On Fri, Oct 28, 2016 at 06:16:06PM +0300, Leon Romanovsky wrote:
> > And it is indeed happen for new code. This magic is needed to allow legacy
> > write interface to be converted to new ioctl interface internally in
> > kernel.
>
> You just added these statements which are by defintion new code.
> Don't do that - create a clean kernel internal interface instead.
>
> The canonical one would be to only pass kernel pointers in the internal
> interface.

Just to summarize, to be sure that I understood you correctly.

---------    --------------------
| write | -> | conversion logic | ---
---------    --------------------   |      ----------------------
                                    -----> | internal interface |
---------                           |      ----------------------
| ioctl | ---------------------------
---------

Am I right?

> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* Re: [RFC ABI V5 07/10] IB/core: Support getting IOCTL header/SGEs from kernel space
From: Christoph Hellwig @ 2016-10-28 15:37 UTC (permalink / raw)
  To: Leon Romanovsky
  Cc: Christoph Hellwig, Matan Barak, linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	Doug Ledford, Jason Gunthorpe, Sean Hefty, Christoph Lameter,
	Liran Liss, Haggai Eran, Majd Dibbiny, Tal Alon
In-Reply-To: <20161028153306.GO3617-2ukJVAZIZ/Y@public.gmane.org>

On Fri, Oct 28, 2016 at 06:33:06PM +0300, Leon Romanovsky wrote:
> Just to summarize, to be sure that I understood you correctly.
> 
> ---------    --------------------
> | write | -> | conversion logic | ---
> ---------    --------------------   |      ----------------------
>                                     -----> | internal interface |
> ---------                           |      ----------------------
> | ioctl | ---------------------------
> ---------
> 
> Am I right?

Yes, as long as the write and ioctl boxes do the copy_{from,to}_user.
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-core 3/4] verbs: Replace infiniband/sa-kern-abi.h with the kernel's uapi/rdma/ib_user_sa.h
From: Jason Gunthorpe @ 2016-10-28 15:42 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: Doug Ledford, linux-rdma-u79uwXL29TY76Z2rM5mHXA
In-Reply-To: <20161028065344.GA28303-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>

On Thu, Oct 27, 2016 at 11:53:44PM -0700, Christoph Hellwig wrote:
> I can't see how this is supposed to work, there is no copy at all
> of ib_user_sa.h in the tree.

It uses the one that comes with the distro. Which is new enough on all
supported distros..

In this specific case because ib_user_sa.h is included by a public
header we really don't have much choice, it has to work with the
distro-version.

Fortunately ib_user_sa.h has not changed in a long time, so this is
not a problem and I didn't include an in-tree copy. Several of the
other headers are like that too.

rxe and netlink are counter examples, where we need really new headers
and they are not used in public headers so they have in-tree copies.

> E.g. by default use headers from rdma-core/kernel/headers, but
> optionally allow the build systems to use those from a kernel tree
> explicitly specified.

I've done both of these things, with the twist that the build tests
the distro-header first and uses it before the in-tree copy if it is
new enough.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH rdma-core 2/4] Move rdma_netlink compat into CMake
From: Jason Gunthorpe @ 2016-10-28 15:44 UTC (permalink / raw)
  To: Steve Wise
  Cc: 'Doug Ledford', linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	'Tatyana Nikolova', 'Latif, Faisal'
In-Reply-To: <002501d2312b$da377cf0$8ea676d0$@opengridcomputing.com>

On Fri, Oct 28, 2016 at 09:59:16AM -0500, Steve Wise wrote:
> +Tatyana
> +Faisal
> 
> Intel, please have a look.
> 
> > 
> > Steve,
> > 
> > Can you check if the changes to iwpmd/iwarp_pm_server.c make sense?
> 
> I was confused at first, but I see that the iwarp_clients array still has
> clients 2 and 3, so it should work fine and still preserve compatibility with
> older drivers that embedded the iwarp port mapping in iw_cxgb4 and iw_nes.  And
> the RDMA_NL_* enum is consistent with the local header and the kernel uapi
> header.  So it looks fine.  I should test this though to make sure...
> 
> >
> > Should we do something to fix the kernel header?
> 
> What is wrong with the kernel header exactly?

RDMA_NL_RSVD, should perhaps be called RDMA_NL_IWCM_COMPAT since user
space still need to use that value for compat with old kernels.

Jason
--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [RFC ABI V5 07/10] IB/core: Support getting IOCTL header/SGEs from kernel space
From: Leon Romanovsky @ 2016-10-28 15:46 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Matan Barak, linux-rdma-u79uwXL29TY76Z2rM5mHXA, Doug Ledford,
	Jason Gunthorpe, Sean Hefty, Christoph Lameter, Liran Liss,
	Haggai Eran, Majd Dibbiny, Tal Alon
In-Reply-To: <20161028153725.GA14166-wEGCiKHe2LqWVfeAwA7xHQ@public.gmane.org>

[-- Attachment #1: Type: text/plain, Size: 880 bytes --]

On Fri, Oct 28, 2016 at 08:37:25AM -0700, Christoph Hellwig wrote:
> On Fri, Oct 28, 2016 at 06:33:06PM +0300, Leon Romanovsky wrote:
> > Just to summarize, to be sure that I understood you correctly.
> >
> > ---------    --------------------
> > | write | -> | conversion logic | ---
> > ---------    --------------------   |      ----------------------
> >                                     -----> | internal interface |
> > ---------                           |      ----------------------
> > | ioctl | ---------------------------
> > ---------
> >
> > Am I right?
>
> Yes, as long as the write and ioctl boxes do the copy_{from,to}_user.

Thanks

> --
> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply

* RE: [PATCH rdma-core 2/4] Move rdma_netlink compat into CMake
From: Steve Wise @ 2016-10-28 15:46 UTC (permalink / raw)
  To: 'Jason Gunthorpe'
  Cc: 'Doug Ledford', linux-rdma-u79uwXL29TY76Z2rM5mHXA,
	'Tatyana Nikolova', 'Latif, Faisal'
In-Reply-To: <20161028154455.GB10441-ePGOBjL8dl3ta4EC/59zMFaTQe2KTcn/@public.gmane.org>

> > > Can you check if the changes to iwpmd/iwarp_pm_server.c make sense?
> >
> > I was confused at first, but I see that the iwarp_clients array still has
> > clients 2 and 3, so it should work fine and still preserve compatibility
with
> > older drivers that embedded the iwarp port mapping in iw_cxgb4 and iw_nes.
> And
> > the RDMA_NL_* enum is consistent with the local header and the kernel uapi
> > header.  So it looks fine.  I should test this though to make sure...
> >
> > >
> > > Should we do something to fix the kernel header?
> >
> > What is wrong with the kernel header exactly?
> 
> RDMA_NL_RSVD, should perhaps be called RDMA_NL_IWCM_COMPAT since user
> space still need to use that value for compat with old kernels.

Sounds reasonable.

For this patch:

Acked-by: Steve Wise <swise-7bPotxP6k4+P2YhJcF5u+vpXobYPEAuW@public.gmane.org>

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply

* Re: [PATCH 11/12] nvme: Use BLK_MQ_S_STOPPED instead of QUEUE_FLAG_STOPPED in blk-mq code
From: Keith Busch @ 2016-10-28 16:01 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: Jens Axboe, Christoph Hellwig, James Bottomley,
	Martin K. Petersen, Mike Snitzer, Doug Ledford, Ming Lei,
	Laurence Oberman, linux-block@vger.kernel.org,
	linux-scsi@vger.kernel.org, linux-rdma@vger.kernel.org,
	linux-nvme@lists.infradead.org
In-Reply-To: <805e1911-cd10-0563-c76b-256d76054b08@sandisk.com>

On Wed, Oct 26, 2016 at 03:56:04PM -0700, Bart Van Assche wrote:
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 7bb73ba..b662416 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -205,7 +205,7 @@ void nvme_requeue_req(struct request *req)
>  
>  	blk_mq_requeue_request(req, false);
>  	spin_lock_irqsave(req->q->queue_lock, flags);
> -	if (!blk_queue_stopped(req->q))
> +	if (!blk_mq_queue_stopped(req->q))
>  		blk_mq_kick_requeue_list(req->q);
>  	spin_unlock_irqrestore(req->q->queue_lock, flags);
>  }
> @@ -2079,10 +2079,6 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl)
>  
>  	mutex_lock(&ctrl->namespaces_mutex);
>  	list_for_each_entry(ns, &ctrl->namespaces, list) {
> -		spin_lock_irq(ns->queue->queue_lock);
> -		queue_flag_set(QUEUE_FLAG_STOPPED, ns->queue);
> -		spin_unlock_irq(ns->queue->queue_lock);
> -
>  		blk_mq_cancel_requeue_work(ns->queue);
>  		blk_mq_stop_hw_queues(ns->queue);

There's actually a reason the queue stoppage is using a different flag
than blk_mq_queue_stopped: kicking the queue starts stopped queues.
The driver has to ensure the requeue work can't be kicked prior to
cancelling the current requeue work. Once we know requeue work isn't
running and can't restart again, then we're safe to stop the hw queues.

It's a pretty obscure condition, requiring the controller post an
error completion at the same time the driver decides to reset the
controller. Here's the sequence with the wrong outcome:

 CPU A                   CPU B
 -----                   -----
nvme_stop_queues         nvme_requeue_req
 blk_mq_stop_hw_queues    if (blk_mq_queue_stopped) <- returns false
  blk_mq_stop_hw_queue     blk_mq_kick_requeue_list
                         blk_mq_requeue_work
                          blk_mq_start_hw_queues

^ permalink raw reply

* [PATCH] qedr: Fix missing unlock on error in qedr_post_send()
From: Wei Yongjun @ 2016-10-28 16:33 UTC (permalink / raw)
  To: Doug Ledford, Sean Hefty, Hal Rosenstock, Ram Amrani,
	Rajesh Borundia
  Cc: Wei Yongjun, linux-rdma-u79uwXL29TY76Z2rM5mHXA

From: Wei Yongjun <weiyongjun1-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

Add the missing unlock before return from function qedr_post_send()
in the error handling case.

Signed-off-by: Wei Yongjun <weiyongjun1-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 drivers/infiniband/hw/qedr/verbs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index a615142..e7c7417 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -2983,7 +2983,8 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 	if (!wr) {
 		DP_ERR(dev, "Got an empty post send.\n");
-		return -EINVAL;
+		rc = -EINVAL;
+		goto out_unlock;
 	}
 
 	while (wr) {
@@ -3012,6 +3013,7 @@ int qedr_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	/* Make sure write sticks */
 	mmiowb();
 
+out_unlock:
 	spin_unlock_irqrestore(&qp->q_lock, flags);
 
 	return rc;



--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related

* [PATCH] qedr: Fix possible memory leak in qedr_create_qp()
From: Wei Yongjun @ 2016-10-28 16:33 UTC (permalink / raw)
  To: Doug Ledford, Sean Hefty, Hal Rosenstock, Ram Amrani,
	Rajesh Borundia
  Cc: Wei Yongjun, linux-rdma-u79uwXL29TY76Z2rM5mHXA

From: Wei Yongjun <weiyongjun1-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>

'qp' is malloced in qedr_create_qp() and should be freed before leaving
from the error handling cases, otherwise it will cause memory leak.

Signed-off-by: Wei Yongjun <weiyongjun1-hv44wF8Li93QT0dZR+AlfA@public.gmane.org>
---
 drivers/infiniband/hw/qedr/verbs.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
index a615142..b60f145 100644
--- a/drivers/infiniband/hw/qedr/verbs.c
+++ b/drivers/infiniband/hw/qedr/verbs.c
@@ -1477,6 +1477,7 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 	struct qedr_ucontext *ctx = NULL;
 	struct qedr_create_qp_ureq ureq;
 	struct qedr_qp *qp;
+	struct ib_qp *ibqp;
 	int rc = 0;
 
 	DP_DEBUG(dev, QEDR_MSG_QP, "create qp: called from %s, pd=%p\n",
@@ -1486,13 +1487,13 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 	if (rc)
 		return ERR_PTR(rc);
 
+	if (attrs->srq)
+		return ERR_PTR(-EINVAL);
+
 	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
 	if (!qp)
 		return ERR_PTR(-ENOMEM);
 
-	if (attrs->srq)
-		return ERR_PTR(-EINVAL);
-
 	DP_DEBUG(dev, QEDR_MSG_QP,
 		 "create qp: sq_cq=%p, sq_icid=%d, rq_cq=%p, rq_icid=%d\n",
 		 get_qedr_cq(attrs->send_cq),
@@ -1508,7 +1509,10 @@ struct ib_qp *qedr_create_qp(struct ib_pd *ibpd,
 			       "create qp: unexpected udata when creating GSI QP\n");
 			goto err0;
 		}
-		return qedr_create_gsi_qp(dev, attrs, qp);
+		ibqp = qedr_create_gsi_qp(dev, attrs, qp);
+		if (IS_ERR(ibqp))
+			kfree(qp);
+		return ibqp;
 	}
 
 	memset(&in_params, 0, sizeof(in_params));

--
To unsubscribe from this list: send the line "unsubscribe linux-rdma" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox