From mboxrd@z Thu Jan 1 00:00:00 1970 From: Sagi Grimberg Subject: Re: RDMA Read: Local protection error Date: Thu, 26 May 2016 20:19:33 +0300 Message-ID: <57473025.5020801@grimberg.me> References: <1A4F4C32-CE5A-44D9-9BFE-0E1F8D5DF44D@oracle.com> <57238F8C.70505@sandisk.com> <57277B63.8030506@sandisk.com> <6BBFD126-877C-4638-BB91-ABF715E29326@oracle.com> <1AFD636B-09FC-4736-B1C5-D1D9FA0B97B0@oracle.com> <8a3276bf-f716-3dca-9d54-369fc3bdcc39@dev.mellanox.co.il> <574728EC.9040802@grimberg.me> Mime-Version: 1.0 Content-Type: text/plain; charset=windows-1252; format=flowed Content-Transfer-Encoding: 7bit Return-path: In-Reply-To: <574728EC.9040802-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org> Sender: linux-rdma-owner-u79uwXL29TY76Z2rM5mHXA@public.gmane.org To: Chuck Lever , Bart Van Assche , Yishai Hadas Cc: Yishai Hadas , linux-rdma , Or Gerlitz , Joonsoo Kim , Haggai Eran , Majd Dibbiny List-Id: linux-rdma@vger.kernel.org >>>>> When debugging is disabled, kzalloc returns page-aligned >>>>> addresses: >>>> >>>> Is it defined some where that regular kzalloc/kmalloc guaranties to >>>> return a page-aligned address as you see in your testing ? if so the >>>> debug mode should behave the same. Otherwise we can consider using any >>>> flag allocation that can force that if such exists. >>>> Let's get other people's input here. >>> >>> My understanding is that the fact that k[mz]alloc() returns a >>> page-aligned buffer if the allocation size is > PAGE_SIZE / 2 is a >>> side effect of the implementation and not something callers of that >>> function should rely on. I think the only assumption k[mz]alloc() >>> callers should rely on is that the allocated memory respects >>> ARCH_KMALLOC_MINALIGN. >> >> I agree. mlx4_alloc_priv_pages() is carefully designed to >> correct the alignment of the buffer, so it already assumes >> that it is not getting a page-aligned buffer. >> >> The alignment isn't the problem here, though. It's that >> the buffer contains a page-boundary. That is guaranteed >> to be the case for HCAs that support more than 512 >> sges, so that will have to be addressed (at least in >> mlx5). > > rrr... > > I think we should make the pages allocations dma coherent > in order to fix that... > > Nice catch Chunk. Does this untested patch help (if so, mlx5 will need an identical patch)? -- diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index ba328177eae9..78e9b3addfea 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -139,7 +139,6 @@ struct mlx4_ib_mr { u32 max_pages; struct mlx4_mr mmr; struct ib_umem *umem; - void *pages_alloc; }; struct mlx4_ib_mw { diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c index b04f6238e7e2..becb4a65c755 100644 --- a/drivers/infiniband/hw/mlx4/mr.c +++ b/drivers/infiniband/hw/mlx4/mr.c @@ -278,30 +278,13 @@ mlx4_alloc_priv_pages(struct ib_device *device, int max_pages) { int size = max_pages * sizeof(u64); - int add_size; - int ret; - - add_size = max_t(int, MLX4_MR_PAGES_ALIGN - ARCH_KMALLOC_MINALIGN, 0); - mr->pages_alloc = kzalloc(size + add_size, GFP_KERNEL); - if (!mr->pages_alloc) + mr->pages = dma_alloc_coherent(device->dma_device, size, + &mr->page_map, GFP_KERNEL); + if (!mr->pages) return -ENOMEM; - mr->pages = PTR_ALIGN(mr->pages_alloc, MLX4_MR_PAGES_ALIGN); - - mr->page_map = dma_map_single(device->dma_device, mr->pages, - size, DMA_TO_DEVICE); - - if (dma_mapping_error(device->dma_device, mr->page_map)) { - ret = -ENOMEM; - goto err; - } - return 0; -err: - kfree(mr->pages_alloc); - - return ret; } static void @@ -311,9 +294,8 @@ mlx4_free_priv_pages(struct mlx4_ib_mr *mr) struct ib_device *device = mr->ibmr.device; int size = mr->max_pages * sizeof(u64); - dma_unmap_single(device->dma_device, mr->page_map, - size, DMA_TO_DEVICE); - kfree(mr->pages_alloc); + dma_free_coherent(device->dma_device, size, + mr->pages, mr->page_map); mr->pages = NULL; } } @@ -532,19 +514,8 @@ int mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int sg_offset) { struct mlx4_ib_mr *mr = to_mmr(ibmr); - int rc; mr->npages = 0; - ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, - sizeof(u64) * mr->max_pages, - DMA_TO_DEVICE); - - rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); - - ib_dma_sync_single_for_device(ibmr->device, mr->page_map, - sizeof(u64) * mr->max_pages, - DMA_TO_DEVICE); - - return rc; + return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); } -- -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org More majordomo info at http://vger.kernel.org/majordomo-info.html