* [PATCH 00/16] Add InfiniBand userspace verbs (direct userspace access) @ 2005-06-28 23:03 Roland Dreier 2005-06-28 23:03 ` [PATCH 01/16] IB uverbs: core API extensions Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Here is a series of patches that adds support for direct userspace access to InfiniBand hardware -- so-called "userspace verbs." I believe these patches are ready to merge, but a final review would be useful. These patches should incorporate all of the feedback from the discussion when I posted an earlier version back in April (see http://lkml.org/lkml/2005/4/4/267 for the start of the thread). In particular, memory pinned for use by userspace is accounted for in current->mm->vm_locked and requests to pin memory are checked against RLIMIT_MEMLOCK. Thanks, Roland ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 01/16] IB uverbs: core API extensions 2005-06-28 23:03 [PATCH 00/16] Add InfiniBand userspace verbs (direct userspace access) Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 02/16] IB uverbs: update kernel midlayer for new API Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Modify the ib_verbs.h header file with changes required for InfiniBand userspace verbs support. We add a few structures to keep track of userspace context, and extend the driver API so that low-level drivers know when they're creating resources that will be used from userspace. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/include/ib_verbs.h | 124 +++++++++++++++++++++++++++++----- 1 files changed, 106 insertions(+), 18 deletions(-) --- linux.orig/drivers/infiniband/include/ib_verbs.h 2005-06-28 15:19:55.956779699 -0700 +++ linux/drivers/infiniband/include/ib_verbs.h 2005-06-28 15:19:57.390470064 -0700 @@ -4,6 +4,7 @@ * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -41,7 +42,10 @@ #include <linux/types.h> #include <linux/device.h> + #include <asm/atomic.h> +#include <asm/scatterlist.h> +#include <asm/uaccess.h> union ib_gid { u8 raw[16]; @@ -544,7 +548,7 @@ struct ib_send_wr { int num_sge; enum ib_wr_opcode opcode; int send_flags; - u32 imm_data; + __be32 imm_data; union { struct { u64 remote_addr; @@ -618,29 +622,86 @@ struct ib_fmr_attr { u8 page_size; }; +struct ib_ucontext { + struct ib_device *device; + struct list_head pd_list; + struct list_head mr_list; + struct list_head mw_list; + struct list_head cq_list; + struct list_head qp_list; + struct list_head srq_list; + struct list_head ah_list; + spinlock_t lock; +}; + +struct ib_uobject { + u64 user_handle; /* handle given to us by userspace */ + struct ib_ucontext *context; /* associated user context */ + struct list_head list; /* link to context's list */ + u32 id; /* index into kernel idr */ +}; + +struct ib_umem { + unsigned long user_base; + unsigned long virt_base; + size_t length; + int offset; + int page_size; + int writable; + struct list_head chunk_list; +}; + +struct ib_umem_chunk { + struct list_head list; + int nents; + int nmap; + struct scatterlist page_list[0]; +}; + +struct ib_udata { + void __user *inbuf; + void __user *outbuf; + size_t inlen; + size_t outlen; +}; + +#define IB_UMEM_MAX_PAGE_CHUNK \ + ((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) / \ + ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] - \ + (void *) &((struct ib_umem_chunk *) 0)->page_list[0])) + +struct ib_umem_object { + struct ib_uobject uobject; + struct ib_umem umem; +}; + struct ib_pd { - struct ib_device *device; - atomic_t usecnt; /* count all resources */ + struct ib_device *device; + struct ib_uobject *uobject; + atomic_t usecnt; /* count all resources */ }; struct ib_ah { struct ib_device *device; struct ib_pd *pd; + struct ib_uobject *uobject; }; typedef void (*ib_comp_handler)(struct ib_cq *cq, void *cq_context); struct ib_cq { - struct ib_device *device; - ib_comp_handler comp_handler; - void (*event_handler)(struct ib_event *, void *); - void * cq_context; - int cqe; - atomic_t usecnt; /* count number of work queues */ + struct ib_device *device; + struct ib_uobject *uobject; + ib_comp_handler comp_handler; + void (*event_handler)(struct ib_event *, void *); + void * cq_context; + int cqe; + atomic_t usecnt; /* count number of work queues */ }; struct ib_srq { struct ib_device *device; + struct ib_uobject *uobject; struct ib_pd *pd; void *srq_context; atomic_t usecnt; @@ -652,6 +713,7 @@ struct ib_qp { struct ib_cq *send_cq; struct ib_cq *recv_cq; struct ib_srq *srq; + struct ib_uobject *uobject; void (*event_handler)(struct ib_event *, void *); void *qp_context; u32 qp_num; @@ -659,16 +721,18 @@ struct ib_qp { }; struct ib_mr { - struct ib_device *device; - struct ib_pd *pd; - u32 lkey; - u32 rkey; - atomic_t usecnt; /* count number of MWs */ + struct ib_device *device; + struct ib_pd *pd; + struct ib_uobject *uobject; + u32 lkey; + u32 rkey; + atomic_t usecnt; /* count number of MWs */ }; struct ib_mw { struct ib_device *device; struct ib_pd *pd; + struct ib_uobject *uobject; u32 rkey; }; @@ -737,7 +801,14 @@ struct ib_device { int (*modify_port)(struct ib_device *device, u8 port_num, int port_modify_mask, struct ib_port_modify *port_modify); - struct ib_pd * (*alloc_pd)(struct ib_device *device); + struct ib_ucontext * (*alloc_ucontext)(struct ib_device *device, + struct ib_udata *udata); + int (*dealloc_ucontext)(struct ib_ucontext *context); + int (*mmap)(struct ib_ucontext *context, + struct vm_area_struct *vma); + struct ib_pd * (*alloc_pd)(struct ib_device *device, + struct ib_ucontext *context, + struct ib_udata *udata); int (*dealloc_pd)(struct ib_pd *pd); struct ib_ah * (*create_ah)(struct ib_pd *pd, struct ib_ah_attr *ah_attr); @@ -747,7 +818,8 @@ struct ib_device { struct ib_ah_attr *ah_attr); int (*destroy_ah)(struct ib_ah *ah); struct ib_qp * (*create_qp)(struct ib_pd *pd, - struct ib_qp_init_attr *qp_init_attr); + struct ib_qp_init_attr *qp_init_attr, + struct ib_udata *udata); int (*modify_qp)(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask); @@ -762,8 +834,9 @@ struct ib_device { int (*post_recv)(struct ib_qp *qp, struct ib_recv_wr *recv_wr, struct ib_recv_wr **bad_recv_wr); - struct ib_cq * (*create_cq)(struct ib_device *device, - int cqe); + struct ib_cq * (*create_cq)(struct ib_device *device, int cqe, + struct ib_ucontext *context, + struct ib_udata *udata); int (*destroy_cq)(struct ib_cq *cq); int (*resize_cq)(struct ib_cq *cq, int *cqe); int (*poll_cq)(struct ib_cq *cq, int num_entries, @@ -780,6 +853,10 @@ struct ib_device { int num_phys_buf, int mr_access_flags, u64 *iova_start); + struct ib_mr * (*reg_user_mr)(struct ib_pd *pd, + struct ib_umem *region, + int mr_access_flags, + struct ib_udata *udata); int (*query_mr)(struct ib_mr *mr, struct ib_mr_attr *mr_attr); int (*dereg_mr)(struct ib_mr *mr); @@ -817,6 +894,7 @@ struct ib_device { struct ib_mad *in_mad, struct ib_mad *out_mad); + struct module *owner; struct class_device class_dev; struct kobject ports_parent; struct list_head port_list; @@ -852,6 +930,16 @@ void *ib_get_client_data(struct ib_devic void ib_set_client_data(struct ib_device *device, struct ib_client *client, void *data); +static inline int ib_copy_from_udata(void *dest, struct ib_udata *udata, size_t len) +{ + return copy_from_user(dest, udata->inbuf, len) ? -EFAULT : 0; +} + +static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len) +{ + return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0; +} + int ib_register_event_handler (struct ib_event_handler *event_handler); int ib_unregister_event_handler(struct ib_event_handler *event_handler); void ib_dispatch_event(struct ib_event *event); ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 02/16] IB uverbs: update kernel midlayer for new API 2005-06-28 23:03 ` [PATCH 01/16] IB uverbs: core API extensions Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 03/16] IB uverbs: update mthca " Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Update kernel InfiniBand midlayer to compile against the updated API for low-level drivers. This just amounts to passing NULL for all userspace-related parameters, and setting userspace-related structure members to NULL. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/core/verbs.c | 32 ++++++++++++++++++++------------ 1 files changed, 20 insertions(+), 12 deletions(-) --- linux.orig/drivers/infiniband/core/verbs.c 2005-06-28 15:19:55.267928471 -0700 +++ linux/drivers/infiniband/core/verbs.c 2005-06-28 15:19:59.462022670 -0700 @@ -4,6 +4,7 @@ * Copyright (c) 2004 Intel Corporation. All rights reserved. * Copyright (c) 2004 Topspin Corporation. All rights reserved. * Copyright (c) 2004 Voltaire Corporation. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -47,10 +48,11 @@ struct ib_pd *ib_alloc_pd(struct ib_devi { struct ib_pd *pd; - pd = device->alloc_pd(device); + pd = device->alloc_pd(device, NULL, NULL); if (!IS_ERR(pd)) { - pd->device = device; + pd->device = device; + pd->uobject = NULL; atomic_set(&pd->usecnt, 0); } @@ -76,8 +78,9 @@ struct ib_ah *ib_create_ah(struct ib_pd ah = pd->device->create_ah(pd, ah_attr); if (!IS_ERR(ah)) { - ah->device = pd->device; - ah->pd = pd; + ah->device = pd->device; + ah->pd = pd; + ah->uobject = NULL; atomic_inc(&pd->usecnt); } @@ -122,7 +125,7 @@ struct ib_qp *ib_create_qp(struct ib_pd { struct ib_qp *qp; - qp = pd->device->create_qp(pd, qp_init_attr); + qp = pd->device->create_qp(pd, qp_init_attr, NULL); if (!IS_ERR(qp)) { qp->device = pd->device; @@ -130,6 +133,7 @@ struct ib_qp *ib_create_qp(struct ib_pd qp->send_cq = qp_init_attr->send_cq; qp->recv_cq = qp_init_attr->recv_cq; qp->srq = qp_init_attr->srq; + qp->uobject = NULL; qp->event_handler = qp_init_attr->event_handler; qp->qp_context = qp_init_attr->qp_context; qp->qp_type = qp_init_attr->qp_type; @@ -197,10 +201,11 @@ struct ib_cq *ib_create_cq(struct ib_dev { struct ib_cq *cq; - cq = device->create_cq(device, cqe); + cq = device->create_cq(device, cqe, NULL, NULL); if (!IS_ERR(cq)) { cq->device = device; + cq->uobject = NULL; cq->comp_handler = comp_handler; cq->event_handler = event_handler; cq->cq_context = cq_context; @@ -245,8 +250,9 @@ struct ib_mr *ib_get_dma_mr(struct ib_pd mr = pd->device->get_dma_mr(pd, mr_access_flags); if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); } @@ -267,8 +273,9 @@ struct ib_mr *ib_reg_phys_mr(struct ib_p mr_access_flags, iova_start); if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; atomic_inc(&pd->usecnt); atomic_set(&mr->usecnt, 0); } @@ -344,8 +351,9 @@ struct ib_mw *ib_alloc_mw(struct ib_pd * mw = pd->device->alloc_mw(pd); if (!IS_ERR(mw)) { - mw->device = pd->device; - mw->pd = pd; + mw->device = pd->device; + mw->pd = pd; + mw->uobject = NULL; atomic_inc(&pd->usecnt); } ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 03/16] IB uverbs: update mthca for new API 2005-06-28 23:03 ` [PATCH 02/16] IB uverbs: update kernel midlayer for new API Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 04/16] IB uverbs: add user verbs ABI header Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Update mthca to compile against the updated API for low-level drivers. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_provider.c | 13 ++++++++++--- 1 files changed, 10 insertions(+), 3 deletions(-) --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:19:55.005985043 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:00.882715841 -0700 @@ -284,7 +284,9 @@ static int mthca_query_gid(struct ib_dev return err; } -static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev) +static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, + struct ib_ucontext *context, + struct ib_udata *udata) { struct mthca_pd *pd; int err; @@ -338,7 +340,8 @@ static int mthca_ah_destroy(struct ib_ah } static struct ib_qp *mthca_create_qp(struct ib_pd *pd, - struct ib_qp_init_attr *init_attr) + struct ib_qp_init_attr *init_attr, + struct ib_udata *udata) { struct mthca_qp *qp; int err; @@ -409,7 +412,9 @@ static int mthca_destroy_qp(struct ib_qp return 0; } -static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries) +static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries, + struct ib_ucontext *context, + struct ib_udata *udata) { struct mthca_cq *cq; int nent; @@ -692,6 +697,8 @@ int mthca_register_device(struct mthca_d int i; strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX); + dev->ib_dev.owner = THIS_MODULE; + dev->ib_dev.node_type = IB_NODE_CA; dev->ib_dev.phys_port_cnt = dev->limits.num_ports; dev->ib_dev.dma_device = &dev->pdev->dev; ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 04/16] IB uverbs: add user verbs ABI header 2005-06-28 23:03 ` [PATCH 03/16] IB uverbs: update mthca " Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 05/16] IB uverbs: core implementation Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add the ib_user_verbs.h header file, which defines the ABI used by InfiniBand userspace verbs for kernel/user communication. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/include/ib_user_verbs.h | 389 +++++++++++++++++++++++++++++ 1 files changed, 389 insertions(+) --- /dev/null 2005-06-23 14:14:38.423479552 -0700 +++ linux/drivers/infiniband/include/ib_user_verbs.h 2005-06-28 15:20:02.710321131 -0700 @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: ib_user_verbs.h 2708 2005-06-24 17:27:21Z roland $ + */ + +#ifndef IB_USER_VERBS_H +#define IB_USER_VERBS_H + +#include <linux/types.h> + +/* + * Increment this value if any changes that break userspace ABI + * compatibility are made. + */ +#define IB_USER_VERBS_ABI_VERSION 1 + +enum { + IB_USER_VERBS_CMD_QUERY_PARAMS, + IB_USER_VERBS_CMD_GET_CONTEXT, + IB_USER_VERBS_CMD_QUERY_DEVICE, + IB_USER_VERBS_CMD_QUERY_PORT, + IB_USER_VERBS_CMD_QUERY_GID, + IB_USER_VERBS_CMD_QUERY_PKEY, + IB_USER_VERBS_CMD_ALLOC_PD, + IB_USER_VERBS_CMD_DEALLOC_PD, + IB_USER_VERBS_CMD_CREATE_AH, + IB_USER_VERBS_CMD_MODIFY_AH, + IB_USER_VERBS_CMD_QUERY_AH, + IB_USER_VERBS_CMD_DESTROY_AH, + IB_USER_VERBS_CMD_REG_MR, + IB_USER_VERBS_CMD_REG_SMR, + IB_USER_VERBS_CMD_REREG_MR, + IB_USER_VERBS_CMD_QUERY_MR, + IB_USER_VERBS_CMD_DEREG_MR, + IB_USER_VERBS_CMD_ALLOC_MW, + IB_USER_VERBS_CMD_BIND_MW, + IB_USER_VERBS_CMD_DEALLOC_MW, + IB_USER_VERBS_CMD_CREATE_CQ, + IB_USER_VERBS_CMD_RESIZE_CQ, + IB_USER_VERBS_CMD_DESTROY_CQ, + IB_USER_VERBS_CMD_POLL_CQ, + IB_USER_VERBS_CMD_PEEK_CQ, + IB_USER_VERBS_CMD_REQ_NOTIFY_CQ, + IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_CMD_QUERY_QP, + IB_USER_VERBS_CMD_MODIFY_QP, + IB_USER_VERBS_CMD_DESTROY_QP, + IB_USER_VERBS_CMD_POST_SEND, + IB_USER_VERBS_CMD_POST_RECV, + IB_USER_VERBS_CMD_ATTACH_MCAST, + IB_USER_VERBS_CMD_DETACH_MCAST +}; + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct ib_uverbs_async_event_desc { + __u64 element; + __u32 event_type; /* enum ib_event_type */ + __u32 reserved; +}; + +struct ib_uverbs_comp_event_desc { + __u64 cq_handle; +}; + +/* + * All commands from userspace should start with a __u32 command field + * followed by __u16 in_words and out_words fields (which give the + * length of the command block and response buffer if any in 32-bit + * words). The kernel driver will read these fields first and read + * the rest of the command struct based on these value. + */ + +struct ib_uverbs_cmd_hdr { + __u32 command; + __u16 in_words; + __u16 out_words; +}; + +/* + * No driver_data for "query params" command, since this is intended + * to be a core function with no possible device dependence. + */ +struct ib_uverbs_query_params { + __u64 response; +}; + +struct ib_uverbs_query_params_resp { + __u32 num_cq_events; +}; + +struct ib_uverbs_get_context { + __u64 response; + __u64 cq_fd_tab; + __u64 driver_data[0]; +}; + +struct ib_uverbs_get_context_resp { + __u32 async_fd; + __u32 reserved; +}; + +struct ib_uverbs_query_device { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_device_resp { + __u64 fw_ver; + __u64 node_guid; + __u64 sys_image_guid; + __u64 max_mr_size; + __u64 page_size_cap; + __u32 vendor_id; + __u32 vendor_part_id; + __u32 hw_ver; + __u32 max_qp; + __u32 max_qp_wr; + __u32 device_cap_flags; + __u32 max_sge; + __u32 max_sge_rd; + __u32 max_cq; + __u32 max_cqe; + __u32 max_mr; + __u32 max_pd; + __u32 max_qp_rd_atom; + __u32 max_ee_rd_atom; + __u32 max_res_rd_atom; + __u32 max_qp_init_rd_atom; + __u32 max_ee_init_rd_atom; + __u32 atomic_cap; + __u32 max_ee; + __u32 max_rdd; + __u32 max_mw; + __u32 max_raw_ipv6_qp; + __u32 max_raw_ethy_qp; + __u32 max_mcast_grp; + __u32 max_mcast_qp_attach; + __u32 max_total_mcast_qp_attach; + __u32 max_ah; + __u32 max_fmr; + __u32 max_map_per_fmr; + __u32 max_srq; + __u32 max_srq_wr; + __u32 max_srq_sge; + __u16 max_pkeys; + __u8 local_ca_ack_delay; + __u8 phys_port_cnt; + __u8 reserved[4]; +}; + +struct ib_uverbs_query_port { + __u64 response; + __u8 port_num; + __u8 reserved[7]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_port_resp { + __u32 port_cap_flags; + __u32 max_msg_sz; + __u32 bad_pkey_cntr; + __u32 qkey_viol_cntr; + __u32 gid_tbl_len; + __u16 pkey_tbl_len; + __u16 lid; + __u16 sm_lid; + __u8 state; + __u8 max_mtu; + __u8 active_mtu; + __u8 lmc; + __u8 max_vl_num; + __u8 sm_sl; + __u8 subnet_timeout; + __u8 init_type_reply; + __u8 active_width; + __u8 active_speed; + __u8 phys_state; + __u8 reserved[3]; +}; + +struct ib_uverbs_query_gid { + __u64 response; + __u8 port_num; + __u8 index; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_gid_resp { + __u8 gid[16]; +}; + +struct ib_uverbs_query_pkey { + __u64 response; + __u8 port_num; + __u8 index; + __u8 reserved[6]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_query_pkey_resp { + __u16 pkey; + __u16 reserved; +}; + +struct ib_uverbs_alloc_pd { + __u64 response; + __u64 driver_data[0]; +}; + +struct ib_uverbs_alloc_pd_resp { + __u32 pd_handle; +}; + +struct ib_uverbs_dealloc_pd { + __u32 pd_handle; +}; + +struct ib_uverbs_reg_mr { + __u64 response; + __u64 start; + __u64 length; + __u64 hca_va; + __u32 pd_handle; + __u32 access_flags; + __u64 driver_data[0]; +}; + +struct ib_uverbs_reg_mr_resp { + __u32 mr_handle; + __u32 lkey; + __u32 rkey; +}; + +struct ib_uverbs_dereg_mr { + __u32 mr_handle; +}; + +struct ib_uverbs_create_cq { + __u64 response; + __u64 user_handle; + __u32 cqe; + __u32 event_handler; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_cq_resp { + __u32 cq_handle; + __u32 cqe; +}; + +struct ib_uverbs_destroy_cq { + __u32 cq_handle; +}; + +struct ib_uverbs_create_qp { + __u64 response; + __u64 user_handle; + __u32 pd_handle; + __u32 send_cq_handle; + __u32 recv_cq_handle; + __u32 srq_handle; + __u32 max_send_wr; + __u32 max_recv_wr; + __u32 max_send_sge; + __u32 max_recv_sge; + __u32 max_inline_data; + __u8 sq_sig_all; + __u8 qp_type; + __u8 is_srq; + __u8 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_create_qp_resp { + __u32 qp_handle; + __u32 qpn; +}; + +/* + * This struct needs to remain a multiple of 8 bytes to keep the + * alignment of the modify QP parameters. + */ +struct ib_uverbs_qp_dest { + __u8 dgid[16]; + __u32 flow_label; + __u16 dlid; + __u16 reserved; + __u8 sgid_index; + __u8 hop_limit; + __u8 traffic_class; + __u8 sl; + __u8 src_path_bits; + __u8 static_rate; + __u8 is_global; + __u8 port_num; +}; + +struct ib_uverbs_modify_qp { + struct ib_uverbs_qp_dest dest; + struct ib_uverbs_qp_dest alt_dest; + __u32 qp_handle; + __u32 attr_mask; + __u32 qkey; + __u32 rq_psn; + __u32 sq_psn; + __u32 dest_qp_num; + __u32 qp_access_flags; + __u16 pkey_index; + __u16 alt_pkey_index; + __u8 qp_state; + __u8 cur_qp_state; + __u8 path_mtu; + __u8 path_mig_state; + __u8 en_sqd_async_notify; + __u8 max_rd_atomic; + __u8 max_dest_rd_atomic; + __u8 min_rnr_timer; + __u8 port_num; + __u8 timeout; + __u8 retry_cnt; + __u8 rnr_retry; + __u8 alt_port_num; + __u8 alt_timeout; + __u8 reserved[2]; + __u64 driver_data[0]; +}; + +struct ib_uverbs_modify_qp_resp { +}; + +struct ib_uverbs_destroy_qp { + __u32 qp_handle; +}; + +struct ib_uverbs_attach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +struct ib_uverbs_detach_mcast { + __u8 gid[16]; + __u32 qp_handle; + __u16 mlid; + __u16 reserved; + __u64 driver_data[0]; +}; + +#endif /* IB_USER_VERBS_H */ ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 05/16] IB uverbs: core implementation 2005-06-28 23:03 ` [PATCH 04/16] IB uverbs: add user verbs ABI header Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 06/16] IB uverbs: memory pinning implementation Roland Dreier 2005-06-29 0:27 ` [PATCH 05/16] IB uverbs: core implementation Greg KH 0 siblings, 2 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add the core of the InfiniBand userspace verbs implementation, including creating character device nodes, dispatching requests from userspace, and passing event notifications back up to userspace. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/core/uverbs.h | 132 ++++ drivers/infiniband/core/uverbs_cmd.c | 1006 ++++++++++++++++++++++++++++++++++ drivers/infiniband/core/uverbs_main.c | 708 +++++++++++++++++++++++ 3 files changed, 1846 insertions(+) --- /dev/null 2005-06-23 14:14:38.423479552 -0700 +++ linux/drivers/infiniband/core/uverbs.h 2005-06-28 15:20:04.361964423 -0700 @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs.h 2559 2005-06-06 19:43:16Z roland $ + */ + +#ifndef UVERBS_H +#define UVERBS_H + +/* Include device.h and fs.h until cdev.h is self-sufficient */ +#include <linux/fs.h> +#include <linux/device.h> +#include <linux/cdev.h> +#include <linux/kref.h> +#include <linux/idr.h> + +#include <ib_verbs.h> +#include <ib_user_verbs.h> + +struct ib_uverbs_device { + int devnum; + struct cdev dev; + struct class_device class_dev; + struct ib_device *ib_dev; + int num_comp; +}; + +struct ib_uverbs_event_file { + struct kref ref; + struct ib_uverbs_file *uverbs_file; + spinlock_t lock; + int fd; + int is_async; + wait_queue_head_t poll_wait; + struct list_head event_list; +}; + +struct ib_uverbs_file { + struct kref ref; + struct ib_uverbs_device *device; + struct ib_ucontext *ucontext; + struct ib_event_handler event_handler; + struct ib_uverbs_event_file async_file; + struct ib_uverbs_event_file comp_file[1]; +}; + +struct ib_uverbs_async_event { + struct ib_uverbs_async_event_desc desc; + struct list_head list; +}; + +struct ib_uverbs_comp_event { + struct ib_uverbs_comp_event_desc desc; + struct list_head list; +}; + +struct ib_uobject_mr { + struct ib_uobject uobj; + struct page *page_list; + struct scatterlist *sg_list; +}; + +extern struct semaphore ib_uverbs_idr_mutex; +extern struct idr ib_uverbs_pd_idr; +extern struct idr ib_uverbs_mr_idr; +extern struct idr ib_uverbs_mw_idr; +extern struct idr ib_uverbs_ah_idr; +extern struct idr ib_uverbs_cq_idr; +extern struct idr ib_uverbs_qp_idr; + +void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context); +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr); +void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr); + +int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write); +void ib_umem_release(struct ib_device *dev, struct ib_umem *umem); +void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem); + +#define IB_UVERBS_DECLARE_CMD(name) \ + ssize_t ib_uverbs_##name(struct ib_uverbs_file *file, \ + const char __user *buf, int in_len, \ + int out_len) + +IB_UVERBS_DECLARE_CMD(query_params); +IB_UVERBS_DECLARE_CMD(get_context); +IB_UVERBS_DECLARE_CMD(query_device); +IB_UVERBS_DECLARE_CMD(query_port); +IB_UVERBS_DECLARE_CMD(query_gid); +IB_UVERBS_DECLARE_CMD(query_pkey); +IB_UVERBS_DECLARE_CMD(alloc_pd); +IB_UVERBS_DECLARE_CMD(dealloc_pd); +IB_UVERBS_DECLARE_CMD(reg_mr); +IB_UVERBS_DECLARE_CMD(dereg_mr); +IB_UVERBS_DECLARE_CMD(create_cq); +IB_UVERBS_DECLARE_CMD(destroy_cq); +IB_UVERBS_DECLARE_CMD(create_qp); +IB_UVERBS_DECLARE_CMD(modify_qp); +IB_UVERBS_DECLARE_CMD(destroy_qp); +IB_UVERBS_DECLARE_CMD(attach_mcast); +IB_UVERBS_DECLARE_CMD(detach_mcast); + +#endif /* UVERBS_H */ --- /dev/null 2005-06-23 14:14:38.423479552 -0700 +++ linux/drivers/infiniband/core/uverbs_cmd.c 2005-06-28 15:20:04.365963559 -0700 @@ -0,0 +1,1006 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_cmd.c 2708 2005-06-24 17:27:21Z roland $ + */ + +#include <asm/uaccess.h> + +#include "uverbs.h" + +#define INIT_UDATA(udata, ibuf, obuf, ilen, olen) \ + do { \ + (udata)->inbuf = (void __user *) (ibuf); \ + (udata)->outbuf = (void __user *) (obuf); \ + (udata)->inlen = (ilen); \ + (udata)->outlen = (olen); \ + } while (0) + +ssize_t ib_uverbs_query_params(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_params cmd; + struct ib_uverbs_query_params_resp resp; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + resp.num_cq_events = file->device->num_comp; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_get_context(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_get_context cmd; + struct ib_uverbs_get_context_resp resp; + struct ib_udata udata; + struct ib_device *ibdev = file->device->ib_dev; + int i; + int ret = in_len; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + file->ucontext = ibdev->alloc_ucontext(ibdev, &udata); + if (IS_ERR(file->ucontext)) { + ret = PTR_ERR(file->ucontext); + file->ucontext = NULL; + return ret; + } + + file->ucontext->device = ibdev; + INIT_LIST_HEAD(&file->ucontext->pd_list); + INIT_LIST_HEAD(&file->ucontext->mr_list); + INIT_LIST_HEAD(&file->ucontext->mw_list); + INIT_LIST_HEAD(&file->ucontext->cq_list); + INIT_LIST_HEAD(&file->ucontext->qp_list); + INIT_LIST_HEAD(&file->ucontext->srq_list); + INIT_LIST_HEAD(&file->ucontext->ah_list); + spin_lock_init(&file->ucontext->lock); + + resp.async_fd = file->async_file.fd; + for (i = 0; i < file->device->num_comp; ++i) + if (copy_to_user((void __user *) (unsigned long) cmd.cq_fd_tab + + i * sizeof (__u32), + &file->comp_file[i].fd, sizeof (__u32))) + goto err; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + goto err; + + return in_len; + +err: + ibdev->dealloc_ucontext(file->ucontext); + file->ucontext = NULL; + + return -EFAULT; +} + +ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_device cmd; + struct ib_uverbs_query_device_resp resp; + struct ib_device_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_device(file->device->ib_dev, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.fw_ver = attr.fw_ver; + resp.node_guid = attr.node_guid; + resp.sys_image_guid = attr.sys_image_guid; + resp.max_mr_size = attr.max_mr_size; + resp.page_size_cap = attr.page_size_cap; + resp.vendor_id = attr.vendor_id; + resp.vendor_part_id = attr.vendor_part_id; + resp.hw_ver = attr.hw_ver; + resp.max_qp = attr.max_qp; + resp.max_qp_wr = attr.max_qp_wr; + resp.device_cap_flags = attr.device_cap_flags; + resp.max_sge = attr.max_sge; + resp.max_sge_rd = attr.max_sge_rd; + resp.max_cq = attr.max_cq; + resp.max_cqe = attr.max_cqe; + resp.max_mr = attr.max_mr; + resp.max_pd = attr.max_pd; + resp.max_qp_rd_atom = attr.max_qp_rd_atom; + resp.max_ee_rd_atom = attr.max_ee_rd_atom; + resp.max_res_rd_atom = attr.max_res_rd_atom; + resp.max_qp_init_rd_atom = attr.max_qp_init_rd_atom; + resp.max_ee_init_rd_atom = attr.max_ee_init_rd_atom; + resp.atomic_cap = attr.atomic_cap; + resp.max_ee = attr.max_ee; + resp.max_rdd = attr.max_rdd; + resp.max_mw = attr.max_mw; + resp.max_raw_ipv6_qp = attr.max_raw_ipv6_qp; + resp.max_raw_ethy_qp = attr.max_raw_ethy_qp; + resp.max_mcast_grp = attr.max_mcast_grp; + resp.max_mcast_qp_attach = attr.max_mcast_qp_attach; + resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach; + resp.max_ah = attr.max_ah; + resp.max_fmr = attr.max_fmr; + resp.max_map_per_fmr = attr.max_map_per_fmr; + resp.max_srq = attr.max_srq; + resp.max_srq_wr = attr.max_srq_wr; + resp.max_srq_sge = attr.max_srq_sge; + resp.max_pkeys = attr.max_pkeys; + resp.local_ca_ack_delay = attr.local_ca_ack_delay; + resp.phys_port_cnt = file->device->ib_dev->phys_port_cnt; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_port cmd; + struct ib_uverbs_query_port_resp resp; + struct ib_port_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + ret = ib_query_port(file->device->ib_dev, cmd.port_num, &attr); + if (ret) + return ret; + + memset(&resp, 0, sizeof resp); + + resp.state = attr.state; + resp.max_mtu = attr.max_mtu; + resp.active_mtu = attr.active_mtu; + resp.gid_tbl_len = attr.gid_tbl_len; + resp.port_cap_flags = attr.port_cap_flags; + resp.max_msg_sz = attr.max_msg_sz; + resp.bad_pkey_cntr = attr.bad_pkey_cntr; + resp.qkey_viol_cntr = attr.qkey_viol_cntr; + resp.pkey_tbl_len = attr.pkey_tbl_len; + resp.lid = attr.lid; + resp.sm_lid = attr.sm_lid; + resp.lmc = attr.lmc; + resp.max_vl_num = attr.max_vl_num; + resp.sm_sl = attr.sm_sl; + resp.subnet_timeout = attr.subnet_timeout; + resp.init_type_reply = attr.init_type_reply; + resp.active_width = attr.active_width; + resp.active_speed = attr.active_speed; + resp.phys_state = attr.phys_state; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_gid(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_gid cmd; + struct ib_uverbs_query_gid_resp resp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + ret = ib_query_gid(file->device->ib_dev, cmd.port_num, cmd.index, + (union ib_gid *) resp.gid); + if (ret) + return ret; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_query_pkey(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_query_pkey cmd; + struct ib_uverbs_query_pkey_resp resp; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + memset(&resp, 0, sizeof resp); + + ret = ib_query_pkey(file->device->ib_dev, cmd.port_num, cmd.index, + &resp.pkey); + if (ret) + return ret; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) + return -EFAULT; + + return in_len; +} + +ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_alloc_pd cmd; + struct ib_uverbs_alloc_pd_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + uobj->context = file->ucontext; + + pd = file->device->ib_dev->alloc_pd(file->device->ib_dev, + file->ucontext, &udata); + if (IS_ERR(pd)) { + ret = PTR_ERR(pd); + goto err; + } + + pd->device = file->device->ib_dev; + pd->uobject = uobj; + atomic_set(&pd->usecnt, 0); + +retry: + if (!idr_pre_get(&ib_uverbs_pd_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_pd; + } + + down(&ib_uverbs_idr_mutex); + ret = idr_get_new(&ib_uverbs_pd_idr, pd, &uobj->id); + up(&ib_uverbs_idr_mutex); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_pd; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->pd_list); + spin_unlock_irq(&file->ucontext->lock); + + memset(&resp, 0, sizeof resp); + resp.pd_handle = uobj->id; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + down(&ib_uverbs_idr_mutex); + idr_remove(&ib_uverbs_pd_idr, uobj->id); + up(&ib_uverbs_idr_mutex); + +err_pd: + ib_dealloc_pd(pd); + +err: + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_dealloc_pd(struct ib_uverbs_file *file, + const char __user *buf, + int in_len, int out_len) +{ + struct ib_uverbs_dealloc_pd cmd; + struct ib_pd *pd; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) + goto out; + + uobj = pd->uobject; + + ret = ib_dealloc_pd(pd); + if (ret) + goto out; + + idr_remove(&ib_uverbs_pd_idr, cmd.pd_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_reg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_reg_mr cmd; + struct ib_uverbs_reg_mr_resp resp; + struct ib_udata udata; + struct ib_umem_object *obj; + struct ib_pd *pd; + struct ib_mr *mr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if ((cmd.start & ~PAGE_MASK) != (cmd.hca_va & ~PAGE_MASK)) + return -EINVAL; + + obj = kmalloc(sizeof *obj, GFP_KERNEL); + if (!obj) + return -ENOMEM; + + obj->uobject.context = file->ucontext; + + /* + * We ask for writable memory if any access flags other than + * "remote read" are set. "Local write" and "remote write" + * obviously require write access. "Remote atomic" can do + * things like fetch and add, which will modify memory, and + * "MW bind" can change permissions by binding a window. + */ + ret = ib_umem_get(file->device->ib_dev, &obj->umem, + (void *) (unsigned long) cmd.start, cmd.length, + !!(cmd.access_flags & ~IB_ACCESS_REMOTE_READ)); + if (ret) + goto err_free; + + obj->umem.virt_base = cmd.hca_va; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + if (!pd || pd->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + if (!pd->device->reg_user_mr) { + ret = -ENOSYS; + goto err_up; + } + + mr = pd->device->reg_user_mr(pd, &obj->umem, cmd.access_flags, &udata); + if (IS_ERR(mr)) { + ret = PTR_ERR(mr); + goto err_up; + } + + mr->device = pd->device; + mr->pd = pd; + mr->uobject = &obj->uobject; + atomic_inc(&pd->usecnt); + atomic_set(&mr->usecnt, 0); + + memset(&resp, 0, sizeof resp); + resp.lkey = mr->lkey; + resp.rkey = mr->rkey; + +retry: + if (!idr_pre_get(&ib_uverbs_mr_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_unreg; + } + + ret = idr_get_new(&ib_uverbs_mr_idr, mr, &obj->uobject.id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_unreg; + + resp.mr_handle = obj->uobject.id; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&obj->uobject.list, &file->ucontext->mr_list); + spin_unlock_irq(&file->ucontext->lock); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&obj->uobject.list); + spin_unlock_irq(&file->ucontext->lock); + +err_unreg: + ib_dereg_mr(mr); + +err_up: + up(&ib_uverbs_idr_mutex); + + ib_umem_release(file->device->ib_dev, &obj->umem); + +err_free: + kfree(obj); + return ret; +} + +ssize_t ib_uverbs_dereg_mr(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_dereg_mr cmd; + struct ib_mr *mr; + struct ib_umem_object *memobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + mr = idr_find(&ib_uverbs_mr_idr, cmd.mr_handle); + if (!mr || mr->uobject->context != file->ucontext) + goto out; + + memobj = container_of(mr->uobject, struct ib_umem_object, uobject); + + ret = ib_dereg_mr(mr); + if (ret) + goto out; + + idr_remove(&ib_uverbs_mr_idr, cmd.mr_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&memobj->uobject.list); + spin_unlock_irq(&file->ucontext->lock); + + ib_umem_release(file->device->ib_dev, &memobj->umem); + kfree(memobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_cq cmd; + struct ib_uverbs_create_cq_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_cq *cq; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + if (cmd.event_handler >= file->device->num_comp) + return -EINVAL; + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + cq = file->device->ib_dev->create_cq(file->device->ib_dev, cmd.cqe, + file->ucontext, &udata); + if (IS_ERR(cq)) { + ret = PTR_ERR(cq); + goto err; + } + + cq->device = file->device->ib_dev; + cq->uobject = uobj; + cq->comp_handler = ib_uverbs_comp_handler; + cq->event_handler = ib_uverbs_cq_event_handler; + cq->cq_context = file; + atomic_set(&cq->usecnt, 0); + +retry: + if (!idr_pre_get(&ib_uverbs_cq_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_cq; + } + + down(&ib_uverbs_idr_mutex); + ret = idr_get_new(&ib_uverbs_cq_idr, cq, &uobj->id); + up(&ib_uverbs_idr_mutex); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_cq; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->cq_list); + spin_unlock_irq(&file->ucontext->lock); + + memset(&resp, 0, sizeof resp); + resp.cq_handle = uobj->id; + resp.cqe = cq->cqe; + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + down(&ib_uverbs_idr_mutex); + idr_remove(&ib_uverbs_cq_idr, uobj->id); + up(&ib_uverbs_idr_mutex); + +err_cq: + ib_destroy_cq(cq); + +err: + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_destroy_cq(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_cq cmd; + struct ib_cq *cq; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + cq = idr_find(&ib_uverbs_cq_idr, cmd.cq_handle); + if (!cq || cq->uobject->context != file->ucontext) + goto out; + + uobj = cq->uobject; + + ret = ib_destroy_cq(cq); + if (ret) + goto out; + + idr_remove(&ib_uverbs_cq_idr, cmd.cq_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_create_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_create_qp cmd; + struct ib_uverbs_create_qp_resp resp; + struct ib_udata udata; + struct ib_uobject *uobj; + struct ib_pd *pd; + struct ib_cq *scq, *rcq; + struct ib_qp *qp; + struct ib_qp_init_attr attr; + int ret; + + if (out_len < sizeof resp) + return -ENOSPC; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + INIT_UDATA(&udata, buf + sizeof cmd, + (unsigned long) cmd.response + sizeof resp, + in_len - sizeof cmd, out_len - sizeof resp); + + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); + if (!uobj) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + pd = idr_find(&ib_uverbs_pd_idr, cmd.pd_handle); + scq = idr_find(&ib_uverbs_cq_idr, cmd.send_cq_handle); + rcq = idr_find(&ib_uverbs_cq_idr, cmd.recv_cq_handle); + + if (!pd || pd->uobject->context != file->ucontext || + !scq || scq->uobject->context != file->ucontext || + !rcq || rcq->uobject->context != file->ucontext) { + ret = -EINVAL; + goto err_up; + } + + attr.event_handler = ib_uverbs_qp_event_handler; + attr.qp_context = file; + attr.send_cq = scq; + attr.recv_cq = rcq; + attr.srq = NULL; + attr.sq_sig_type = cmd.sq_sig_all ? IB_SIGNAL_ALL_WR : IB_SIGNAL_REQ_WR; + attr.qp_type = cmd.qp_type; + + attr.cap.max_send_wr = cmd.max_send_wr; + attr.cap.max_recv_wr = cmd.max_recv_wr; + attr.cap.max_send_sge = cmd.max_send_sge; + attr.cap.max_recv_sge = cmd.max_recv_sge; + attr.cap.max_inline_data = cmd.max_inline_data; + + uobj->user_handle = cmd.user_handle; + uobj->context = file->ucontext; + + qp = pd->device->create_qp(pd, &attr, &udata); + if (IS_ERR(qp)) { + ret = PTR_ERR(qp); + goto err_up; + } + + qp->device = pd->device; + qp->pd = pd; + qp->send_cq = attr.send_cq; + qp->recv_cq = attr.recv_cq; + qp->srq = attr.srq; + qp->uobject = uobj; + qp->event_handler = attr.event_handler; + qp->qp_context = attr.qp_context; + qp->qp_type = attr.qp_type; + atomic_inc(&pd->usecnt); + atomic_inc(&attr.send_cq->usecnt); + atomic_inc(&attr.recv_cq->usecnt); + if (attr.srq) + atomic_inc(&attr.srq->usecnt); + + memset(&resp, 0, sizeof resp); + resp.qpn = qp->qp_num; + +retry: + if (!idr_pre_get(&ib_uverbs_qp_idr, GFP_KERNEL)) { + ret = -ENOMEM; + goto err_destroy; + } + + ret = idr_get_new(&ib_uverbs_qp_idr, qp, &uobj->id); + + if (ret == -EAGAIN) + goto retry; + if (ret) + goto err_destroy; + + resp.qp_handle = uobj->id; + + spin_lock_irq(&file->ucontext->lock); + list_add_tail(&uobj->list, &file->ucontext->qp_list); + spin_unlock_irq(&file->ucontext->lock); + + if (copy_to_user((void __user *) (unsigned long) cmd.response, + &resp, sizeof resp)) { + ret = -EFAULT; + goto err_list; + } + + up(&ib_uverbs_idr_mutex); + + return in_len; + +err_list: + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + +err_destroy: + ib_destroy_qp(qp); + +err_up: + up(&ib_uverbs_idr_mutex); + + kfree(uobj); + return ret; +} + +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_modify_qp cmd; + struct ib_qp *qp; + struct ib_qp_attr *attr; + int ret; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + attr = kmalloc(sizeof *attr, GFP_KERNEL); + if (!attr) + return -ENOMEM; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) { + ret = -EINVAL; + goto out; + } + + attr->qp_state = cmd.qp_state; + attr->cur_qp_state = cmd.cur_qp_state; + attr->path_mtu = cmd.path_mtu; + attr->path_mig_state = cmd.path_mig_state; + attr->qkey = cmd.qkey; + attr->rq_psn = cmd.rq_psn; + attr->sq_psn = cmd.sq_psn; + attr->dest_qp_num = cmd.dest_qp_num; + attr->qp_access_flags = cmd.qp_access_flags; + attr->pkey_index = cmd.pkey_index; + attr->alt_pkey_index = cmd.pkey_index; + attr->en_sqd_async_notify = cmd.en_sqd_async_notify; + attr->max_rd_atomic = cmd.max_rd_atomic; + attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; + attr->min_rnr_timer = cmd.min_rnr_timer; + attr->port_num = cmd.port_num; + attr->timeout = cmd.timeout; + attr->retry_cnt = cmd.retry_cnt; + attr->rnr_retry = cmd.rnr_retry; + attr->alt_port_num = cmd.alt_port_num; + attr->alt_timeout = cmd.alt_timeout; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; + attr->ah_attr.dlid = cmd.dest.dlid; + attr->ah_attr.sl = cmd.dest.sl; + attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; + attr->ah_attr.static_rate = cmd.dest.static_rate; + attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + + ret = ib_modify_qp(qp, attr, cmd.attr_mask); + if (ret) + goto out; + + ret = in_len; + +out: + up(&ib_uverbs_idr_mutex); + kfree(attr); + + return ret; +} + +ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_destroy_qp cmd; + struct ib_qp *qp; + struct ib_uobject *uobj; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (!qp || qp->uobject->context != file->ucontext) + goto out; + + uobj = qp->uobject; + + ret = ib_destroy_qp(qp); + if (ret) + goto out; + + idr_remove(&ib_uverbs_qp_idr, cmd.qp_handle); + + spin_lock_irq(&file->ucontext->lock); + list_del(&uobj->list); + spin_unlock_irq(&file->ucontext->lock); + + kfree(uobj); + +out: + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_attach_mcast(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_attach_mcast cmd; + struct ib_qp *qp; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (qp && qp->uobject->context == file->ucontext) + ret = ib_attach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} + +ssize_t ib_uverbs_detach_mcast(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_detach_mcast cmd; + struct ib_qp *qp; + int ret = -EINVAL; + + if (copy_from_user(&cmd, buf, sizeof cmd)) + return -EFAULT; + + down(&ib_uverbs_idr_mutex); + + qp = idr_find(&ib_uverbs_qp_idr, cmd.qp_handle); + if (qp && qp->uobject->context == file->ucontext) + ret = ib_detach_mcast(qp, (union ib_gid *) cmd.gid, cmd.mlid); + + up(&ib_uverbs_idr_mutex); + + return ret ? ret : in_len; +} --- /dev/null 2005-06-23 14:14:38.423479552 -0700 +++ linux/drivers/infiniband/core/uverbs_main.c 2005-06-28 15:20:04.363963991 -0700 @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_main.c 2733 2005-06-28 19:14:34Z roland $ + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/poll.h> +#include <linux/file.h> +#include <linux/mount.h> + +#include <asm/uaccess.h> + +#include "uverbs.h" + +MODULE_AUTHOR("Roland Dreier"); +MODULE_DESCRIPTION("InfiniBand userspace verbs access"); +MODULE_LICENSE("Dual BSD/GPL"); + +#define INFINIBANDEVENTFS_MAGIC 0x49426576 /* "IBev" */ + +enum { + IB_UVERBS_MAJOR = 231, + IB_UVERBS_BASE_MINOR = 192, + IB_UVERBS_MAX_DEVICES = 32 +}; + +#define IB_UVERBS_BASE_DEV MKDEV(IB_UVERBS_MAJOR, IB_UVERBS_BASE_MINOR) + +DECLARE_MUTEX(ib_uverbs_idr_mutex); +DEFINE_IDR(ib_uverbs_pd_idr); +DEFINE_IDR(ib_uverbs_mr_idr); +DEFINE_IDR(ib_uverbs_mw_idr); +DEFINE_IDR(ib_uverbs_ah_idr); +DEFINE_IDR(ib_uverbs_cq_idr); +DEFINE_IDR(ib_uverbs_qp_idr); + +static spinlock_t map_lock; +static DECLARE_BITMAP(dev_map, IB_UVERBS_MAX_DEVICES); + +static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file, + const char __user *buf, int in_len, + int out_len) = { + [IB_USER_VERBS_CMD_QUERY_PARAMS] = ib_uverbs_query_params, + [IB_USER_VERBS_CMD_GET_CONTEXT] = ib_uverbs_get_context, + [IB_USER_VERBS_CMD_QUERY_DEVICE] = ib_uverbs_query_device, + [IB_USER_VERBS_CMD_QUERY_PORT] = ib_uverbs_query_port, + [IB_USER_VERBS_CMD_QUERY_GID] = ib_uverbs_query_gid, + [IB_USER_VERBS_CMD_QUERY_PKEY] = ib_uverbs_query_pkey, + [IB_USER_VERBS_CMD_ALLOC_PD] = ib_uverbs_alloc_pd, + [IB_USER_VERBS_CMD_DEALLOC_PD] = ib_uverbs_dealloc_pd, + [IB_USER_VERBS_CMD_REG_MR] = ib_uverbs_reg_mr, + [IB_USER_VERBS_CMD_DEREG_MR] = ib_uverbs_dereg_mr, + [IB_USER_VERBS_CMD_CREATE_CQ] = ib_uverbs_create_cq, + [IB_USER_VERBS_CMD_DESTROY_CQ] = ib_uverbs_destroy_cq, + [IB_USER_VERBS_CMD_CREATE_QP] = ib_uverbs_create_qp, + [IB_USER_VERBS_CMD_MODIFY_QP] = ib_uverbs_modify_qp, + [IB_USER_VERBS_CMD_DESTROY_QP] = ib_uverbs_destroy_qp, + [IB_USER_VERBS_CMD_ATTACH_MCAST] = ib_uverbs_attach_mcast, + [IB_USER_VERBS_CMD_DETACH_MCAST] = ib_uverbs_detach_mcast, +}; + +static struct vfsmount *uverbs_event_mnt; + +static void ib_uverbs_add_one(struct ib_device *device); +static void ib_uverbs_remove_one(struct ib_device *device); + +static int ib_dealloc_ucontext(struct ib_ucontext *context) +{ + struct ib_uobject *uobj, *tmp; + + if (!context) + return 0; + + down(&ib_uverbs_idr_mutex); + + /* XXX Free AHs */ + + list_for_each_entry_safe(uobj, tmp, &context->qp_list, list) { + struct ib_qp *qp = idr_find(&ib_uverbs_qp_idr, uobj->id); + idr_remove(&ib_uverbs_qp_idr, uobj->id); + ib_destroy_qp(qp); + list_del(&uobj->list); + kfree(uobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->cq_list, list) { + struct ib_cq *cq = idr_find(&ib_uverbs_cq_idr, uobj->id); + idr_remove(&ib_uverbs_cq_idr, uobj->id); + ib_destroy_cq(cq); + list_del(&uobj->list); + kfree(uobj); + } + + /* XXX Free SRQs */ + /* XXX Free MWs */ + + list_for_each_entry_safe(uobj, tmp, &context->mr_list, list) { + struct ib_mr *mr = idr_find(&ib_uverbs_mr_idr, uobj->id); + struct ib_umem_object *memobj; + + idr_remove(&ib_uverbs_mr_idr, uobj->id); + ib_dereg_mr(mr); + + memobj = container_of(uobj, struct ib_umem_object, uobject); + ib_umem_release_on_close(mr->device, &memobj->umem); + + list_del(&uobj->list); + kfree(memobj); + } + + list_for_each_entry_safe(uobj, tmp, &context->pd_list, list) { + struct ib_pd *pd = idr_find(&ib_uverbs_pd_idr, uobj->id); + idr_remove(&ib_uverbs_pd_idr, uobj->id); + ib_dealloc_pd(pd); + list_del(&uobj->list); + kfree(uobj); + } + + up(&ib_uverbs_idr_mutex); + + return context->device->dealloc_ucontext(context); +} + +static void ib_uverbs_release_file(struct kref *ref) +{ + struct ib_uverbs_file *file = + container_of(ref, struct ib_uverbs_file, ref); + + module_put(file->device->ib_dev->owner); + kfree(file); +} + +static ssize_t ib_uverbs_event_read(struct file *filp, char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_event_file *file = filp->private_data; + void *event; + int eventsz; + int ret = 0; + + spin_lock_irq(&file->lock); + + while (list_empty(&file->event_list) && file->fd >= 0) { + spin_unlock_irq(&file->lock); + + if (filp->f_flags & O_NONBLOCK) + return -EAGAIN; + + if (wait_event_interruptible(file->poll_wait, + !list_empty(&file->event_list) || + file->fd < 0)) + return -ERESTARTSYS; + + spin_lock_irq(&file->lock); + } + + if (file->fd < 0) { + spin_unlock_irq(&file->lock); + return -ENODEV; + } + + if (file->is_async) { + event = list_entry(file->event_list.next, + struct ib_uverbs_async_event, list); + eventsz = sizeof (struct ib_uverbs_async_event_desc); + } else { + event = list_entry(file->event_list.next, + struct ib_uverbs_comp_event, list); + eventsz = sizeof (struct ib_uverbs_comp_event_desc); + } + + if (eventsz > count) { + ret = -EINVAL; + event = NULL; + } else + list_del(file->event_list.next); + + spin_unlock_irq(&file->lock); + + if (event) { + if (copy_to_user(buf, event, eventsz)) + ret = -EFAULT; + else + ret = eventsz; + } + + kfree(event); + + return ret; +} + +static unsigned int ib_uverbs_event_poll(struct file *filp, + struct poll_table_struct *wait) +{ + unsigned int pollflags = 0; + struct ib_uverbs_event_file *file = filp->private_data; + + poll_wait(filp, &file->poll_wait, wait); + + spin_lock_irq(&file->lock); + if (file->fd < 0) + pollflags = POLLERR; + else if (!list_empty(&file->event_list)) + pollflags = POLLIN | POLLRDNORM; + spin_unlock_irq(&file->lock); + + return pollflags; +} + +static void ib_uverbs_event_release(struct ib_uverbs_event_file *file) +{ + struct list_head *entry, *tmp; + + spin_lock_irq(&file->lock); + if (file->fd != -1) { + file->fd = -1; + list_for_each_safe(entry, tmp, &file->event_list) + if (file->is_async) + kfree(list_entry(entry, struct ib_uverbs_async_event, list)); + else + kfree(list_entry(entry, struct ib_uverbs_comp_event, list)); + } + spin_unlock_irq(&file->lock); +} + +static int ib_uverbs_event_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_event_file *file = filp->private_data; + + ib_uverbs_event_release(file); + kref_put(&file->uverbs_file->ref, ib_uverbs_release_file); + + return 0; +} + +static struct file_operations uverbs_event_fops = { + /* + * No .owner field since we artificially create event files, + * so there is no increment to the module reference count in + * the open path. All event files come from a uverbs command + * file, which already takes a module reference, so this is OK. + */ + .read = ib_uverbs_event_read, + .poll = ib_uverbs_event_poll, + .release = ib_uverbs_event_close +}; + +void ib_uverbs_comp_handler(struct ib_cq *cq, void *cq_context) +{ + struct ib_uverbs_file *file = cq_context; + struct ib_uverbs_comp_event *entry; + unsigned long flags; + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) + return; + + entry->desc.cq_handle = cq->uobject->user_handle; + + spin_lock_irqsave(&file->comp_file[0].lock, flags); + list_add_tail(&entry->list, &file->comp_file[0].event_list); + spin_unlock_irqrestore(&file->comp_file[0].lock, flags); + + wake_up_interruptible(&file->comp_file[0].poll_wait); +} + +static void ib_uverbs_async_handler(struct ib_uverbs_file *file, + __u64 element, __u64 event) +{ + struct ib_uverbs_async_event *entry; + unsigned long flags; + + entry = kmalloc(sizeof *entry, GFP_ATOMIC); + if (!entry) + return; + + entry->desc.element = element; + entry->desc.event_type = event; + + spin_lock_irqsave(&file->async_file.lock, flags); + list_add_tail(&entry->list, &file->async_file.event_list); + spin_unlock_irqrestore(&file->async_file.lock, flags); + + wake_up_interruptible(&file->async_file.poll_wait); +} + +void ib_uverbs_cq_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, + event->element.cq->uobject->user_handle, + event->event); +} + +void ib_uverbs_qp_event_handler(struct ib_event *event, void *context_ptr) +{ + ib_uverbs_async_handler(context_ptr, + event->element.qp->uobject->user_handle, + event->event); +} + +static void ib_uverbs_event_handler(struct ib_event_handler *handler, + struct ib_event *event) +{ + struct ib_uverbs_file *file = + container_of(handler, struct ib_uverbs_file, event_handler); + + ib_uverbs_async_handler(file, event->element.port_num, event->event); +} + +static int ib_uverbs_event_init(struct ib_uverbs_event_file *file, + struct ib_uverbs_file *uverbs_file) +{ + struct file *filp; + + spin_lock_init(&file->lock); + INIT_LIST_HEAD(&file->event_list); + init_waitqueue_head(&file->poll_wait); + file->uverbs_file = uverbs_file; + + file->fd = get_unused_fd(); + if (file->fd < 0) + return file->fd; + + filp = get_empty_filp(); + if (!filp) { + put_unused_fd(file->fd); + return -ENFILE; + } + + filp->f_op = &uverbs_event_fops; + filp->f_vfsmnt = mntget(uverbs_event_mnt); + filp->f_dentry = dget(uverbs_event_mnt->mnt_root); + filp->f_mapping = filp->f_dentry->d_inode->i_mapping; + filp->f_flags = O_RDONLY; + filp->f_mode = FMODE_READ; + filp->private_data = file; + + fd_install(file->fd, filp); + + return 0; +} + +static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf, + size_t count, loff_t *pos) +{ + struct ib_uverbs_file *file = filp->private_data; + struct ib_uverbs_cmd_hdr hdr; + + if (count < sizeof hdr) + return -EINVAL; + + if (copy_from_user(&hdr, buf, sizeof hdr)) + return -EFAULT; + + if (hdr.in_words * 4 != count) + return -EINVAL; + + if (hdr.command < 0 || hdr.command >= ARRAY_SIZE(uverbs_cmd_table)) + return -EINVAL; + + if (!file->ucontext && + hdr.command != IB_USER_VERBS_CMD_QUERY_PARAMS && + hdr.command != IB_USER_VERBS_CMD_GET_CONTEXT) + return -EINVAL; + + return uverbs_cmd_table[hdr.command](file, buf + sizeof hdr, + hdr.in_words * 4, hdr.out_words * 4); +} + +static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct ib_uverbs_file *file = filp->private_data; + + if (!file->ucontext) + return -ENODEV; + else + return file->device->ib_dev->mmap(file->ucontext, vma); +} + +static int ib_uverbs_open(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_device *dev = + container_of(inode->i_cdev, struct ib_uverbs_device, dev); + struct ib_uverbs_file *file; + int i = 0; + int ret; + + if (!try_module_get(dev->ib_dev->owner)) + return -ENODEV; + + file = kmalloc(sizeof *file + + (dev->num_comp - 1) * sizeof (struct ib_uverbs_event_file), + GFP_KERNEL); + if (!file) + return -ENOMEM; + + file->device = dev; + kref_init(&file->ref); + + file->ucontext = NULL; + + ret = ib_uverbs_event_init(&file->async_file, file); + if (ret) + goto err; + + file->async_file.is_async = 1; + + kref_get(&file->ref); + + for (i = 0; i < dev->num_comp; ++i) { + ret = ib_uverbs_event_init(&file->comp_file[i], file); + if (ret) + goto err_async; + kref_get(&file->ref); + file->comp_file[i].is_async = 0; + } + + + filp->private_data = file; + + INIT_IB_EVENT_HANDLER(&file->event_handler, dev->ib_dev, + ib_uverbs_event_handler); + if (ib_register_event_handler(&file->event_handler)) + goto err_async; + + return 0; + +err_async: + while (i--) + ib_uverbs_event_release(&file->comp_file[i]); + + ib_uverbs_event_release(&file->async_file); + +err: + kref_put(&file->ref, ib_uverbs_release_file); + + return ret; +} + +static int ib_uverbs_close(struct inode *inode, struct file *filp) +{ + struct ib_uverbs_file *file = filp->private_data; + int i; + + ib_unregister_event_handler(&file->event_handler); + ib_uverbs_event_release(&file->async_file); + ib_dealloc_ucontext(file->ucontext); + + for (i = 0; i < file->device->num_comp; ++i) + ib_uverbs_event_release(&file->comp_file[i]); + + kref_put(&file->ref, ib_uverbs_release_file); + + return 0; +} + +static struct file_operations uverbs_fops = { + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .open = ib_uverbs_open, + .release = ib_uverbs_close +}; + +static struct file_operations uverbs_mmap_fops = { + .owner = THIS_MODULE, + .write = ib_uverbs_write, + .mmap = ib_uverbs_mmap, + .open = ib_uverbs_open, + .release = ib_uverbs_close +}; + +static struct ib_client uverbs_client = { + .name = "uverbs", + .add = ib_uverbs_add_one, + .remove = ib_uverbs_remove_one +}; + +static ssize_t show_dev(struct class_device *class_dev, char *buf) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + return print_dev_t(buf, dev->dev.dev); +} +static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); + +static ssize_t show_ibdev(struct class_device *class_dev, char *buf) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + return sprintf(buf, "%s\n", dev->ib_dev->name); +} +static CLASS_DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL); + +static void ib_uverbs_release_class_dev(struct class_device *class_dev) +{ + struct ib_uverbs_device *dev = + container_of(class_dev, struct ib_uverbs_device, class_dev); + + cdev_del(&dev->dev); + clear_bit(dev->devnum, dev_map); + kfree(dev); +} + +static struct class uverbs_class = { + .name = "infiniband_verbs", + .release = ib_uverbs_release_class_dev +}; + +static ssize_t show_abi_version(struct class *class, char *buf) +{ + return sprintf(buf, "%d\n", IB_USER_VERBS_ABI_VERSION); +} +static CLASS_ATTR(abi_version, S_IRUGO, show_abi_version, NULL); + +static void ib_uverbs_add_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev; + + if (!device->alloc_ucontext) + return; + + uverbs_dev = kmalloc(sizeof *uverbs_dev, GFP_KERNEL); + if (!uverbs_dev) + return; + + memset(uverbs_dev, 0, sizeof *uverbs_dev); + + spin_lock(&map_lock); + uverbs_dev->devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES); + if (uverbs_dev->devnum >= IB_UVERBS_MAX_DEVICES) { + spin_unlock(&map_lock); + goto err; + } + set_bit(uverbs_dev->devnum, dev_map); + spin_unlock(&map_lock); + + uverbs_dev->ib_dev = device; + uverbs_dev->num_comp = 1; + + if (device->mmap) + cdev_init(&uverbs_dev->dev, &uverbs_mmap_fops); + else + cdev_init(&uverbs_dev->dev, &uverbs_fops); + uverbs_dev->dev.owner = THIS_MODULE; + kobject_set_name(&uverbs_dev->dev.kobj, "uverbs%d", uverbs_dev->devnum); + if (cdev_add(&uverbs_dev->dev, IB_UVERBS_BASE_DEV + uverbs_dev->devnum, 1)) + goto err; + + uverbs_dev->class_dev.class = &uverbs_class; + uverbs_dev->class_dev.dev = device->dma_device; + snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); + if (class_device_register(&uverbs_dev->class_dev)) + goto err_cdev; + + if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_dev)) + goto err_class; + if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) + goto err_class; + + ib_set_client_data(device, &uverbs_client, uverbs_dev); + + return; + +err_class: + class_device_unregister(&uverbs_dev->class_dev); + +err_cdev: + cdev_del(&uverbs_dev->dev); + clear_bit(uverbs_dev->devnum, dev_map); + +err: + kfree(uverbs_dev); + return; +} + +static void ib_uverbs_remove_one(struct ib_device *device) +{ + struct ib_uverbs_device *uverbs_dev = ib_get_client_data(device, &uverbs_client); + + if (!uverbs_dev) + return; + + class_device_unregister(&uverbs_dev->class_dev); +} + +static struct super_block *uverbs_event_get_sb(struct file_system_type *fs_type, int flags, + const char *dev_name, void *data) +{ + return get_sb_pseudo(fs_type, "infinibandevent:", NULL, + INFINIBANDEVENTFS_MAGIC); +} + +static struct file_system_type uverbs_event_fs = { + /* No owner field so module can be unloaded */ + .name = "infinibandeventfs", + .get_sb = uverbs_event_get_sb, + .kill_sb = kill_litter_super +}; + +static int __init ib_uverbs_init(void) +{ + int ret; + + spin_lock_init(&map_lock); + + ret = register_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES, + "infiniband_verbs"); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register device number\n"); + goto out; + } + + ret = class_register(&uverbs_class); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't create class infiniband_verbs\n"); + goto out_chrdev; + } + + ret = class_create_file(&uverbs_class, &class_attr_abi_version); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't create abi_version attribute\n"); + goto out_class; + } + + ret = register_filesystem(&uverbs_event_fs); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register infinibandeventfs\n"); + goto out_class; + } + + uverbs_event_mnt = kern_mount(&uverbs_event_fs); + if (IS_ERR(uverbs_event_mnt)) { + ret = PTR_ERR(uverbs_event_mnt); + printk(KERN_ERR "user_verbs: couldn't mount infinibandeventfs\n"); + goto out_fs; + } + + ret = ib_register_client(&uverbs_client); + if (ret) { + printk(KERN_ERR "user_verbs: couldn't register client\n"); + goto out_mnt; + } + + return 0; + +out_mnt: + mntput(uverbs_event_mnt); + +out_fs: + unregister_filesystem(&uverbs_event_fs); + +out_class: + class_unregister(&uverbs_class); + +out_chrdev: + unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); + +out: + return ret; +} + +static void __exit ib_uverbs_cleanup(void) +{ + ib_unregister_client(&uverbs_client); + mntput(uverbs_event_mnt); + unregister_filesystem(&uverbs_event_fs); + class_unregister(&uverbs_class); + unregister_chrdev_region(IB_UVERBS_BASE_DEV, IB_UVERBS_MAX_DEVICES); +} + +module_init(ib_uverbs_init); +module_exit(ib_uverbs_cleanup); ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 06/16] IB uverbs: memory pinning implementation 2005-06-28 23:03 ` [PATCH 05/16] IB uverbs: core implementation Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 07/16] IB uverbs: hook up Kconfig/Makefile Roland Dreier 2005-06-29 0:02 ` [PATCH 06/16] IB uverbs: memory pinning implementation Andrew Morton 2005-06-29 0:27 ` [PATCH 05/16] IB uverbs: core implementation Greg KH 1 sibling, 2 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add support for pinning userspace memory regions and returning a list of pages in the region. This includes tracking pinned memory against vm_locked and preventing unprivileged users from exceeding RLIMIT_MEMLOCK. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/core/uverbs_mem.c | 221 +++++++++++++++++++++++++++++++++++ 1 files changed, 221 insertions(+) --- /dev/null 2005-06-23 14:14:38.423479552 -0700 +++ linux/drivers/infiniband/core/uverbs_mem.c 2005-06-28 15:20:06.718455487 -0700 @@ -0,0 +1,221 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * $Id: uverbs_mem.c 2743 2005-06-28 22:27:59Z roland $ + */ + +#include <linux/mm.h> +#include <linux/dma-mapping.h> + +#include "uverbs.h" + +struct ib_umem_account_work { + struct work_struct work; + struct mm_struct *mm; + unsigned long diff; +}; + + +static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty) +{ + struct ib_umem_chunk *chunk, *tmp; + int i; + + list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) { + dma_unmap_sg(dev->dma_device, chunk->page_list, + chunk->nents, DMA_BIDIRECTIONAL); + for (i = 0; i < chunk->nents; ++i) { + if (umem->writable && dirty) + set_page_dirty_lock(chunk->page_list[i].page); + put_page(chunk->page_list[i].page); + } + + kfree(chunk); + } +} + +int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, + void *addr, size_t size, int write) +{ + struct page **page_list; + struct ib_umem_chunk *chunk; + unsigned long locked; + unsigned long lock_limit; + unsigned long cur_base; + unsigned long npages; + int ret = 0; + int off; + int i; + + if (!can_do_mlock()) + return -EPERM; + + page_list = (struct page **) __get_free_page(GFP_KERNEL); + if (!page_list) + return -ENOMEM; + + mem->user_base = (unsigned long) addr; + mem->length = size; + mem->offset = (unsigned long) addr & ~PAGE_MASK; + mem->page_size = PAGE_SIZE; + mem->writable = write; + + INIT_LIST_HEAD(&mem->chunk_list); + + npages = PAGE_ALIGN(size + mem->offset) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + locked = npages + current->mm->locked_vm; + lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { + ret = -ENOMEM; + goto out; + } + + cur_base = (unsigned long) addr & PAGE_MASK; + + while (npages) { + ret = get_user_pages(current, current->mm, cur_base, + min_t(int, npages, + PAGE_SIZE / sizeof (struct page *)), + 1, !write, page_list, NULL); + + if (ret < 0) + goto out; + + cur_base += ret * PAGE_SIZE; + npages -= ret; + + off = 0; + + while (ret) { + chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) * + min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK), + GFP_KERNEL); + if (!chunk) { + ret = -ENOMEM; + goto out; + } + + chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK); + for (i = 0; i < chunk->nents; ++i) { + chunk->page_list[i].page = page_list[i + off]; + chunk->page_list[i].offset = 0; + chunk->page_list[i].length = PAGE_SIZE; + } + + chunk->nmap = dma_map_sg(dev->dma_device, + &chunk->page_list[0], + chunk->nents, + DMA_BIDIRECTIONAL); + if (chunk->nmap <= 0) { + for (i = 0; i < chunk->nents; ++i) + put_page(chunk->page_list[i].page); + kfree(chunk); + + ret = -ENOMEM; + goto out; + } + + ret -= chunk->nents; + off += chunk->nents; + list_add_tail(&chunk->list, &mem->chunk_list); + } + + ret = 0; + } + +out: + if (ret < 0) + __ib_umem_release(dev, mem, 0); + else + current->mm->locked_vm = locked; + + up_write(¤t->mm->mmap_sem); + free_page((unsigned long) page_list); + + return ret; +} + +void ib_umem_release(struct ib_device *dev, struct ib_umem *umem) +{ + __ib_umem_release(dev, umem, 1); + + down_write(¤t->mm->mmap_sem); + current->mm->locked_vm -= + PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + up_write(¤t->mm->mmap_sem); +} + +static void ib_umem_account(void *work_ptr) +{ + struct ib_umem_account_work *work = work_ptr; + + down_write(&work->mm->mmap_sem); + work->mm->locked_vm -= work->diff; + up_write(&work->mm->mmap_sem); + mmput(work->mm); + kfree(work); +} + +void ib_umem_release_on_close(struct ib_device *dev, struct ib_umem *umem) +{ + struct ib_umem_account_work *work; + struct mm_struct *mm; + + __ib_umem_release(dev, umem, 1); + + mm = get_task_mm(current); + if (!mm) + return; + + /* + * We may be called with the mm's mmap_sem already held. This + * can happen when a userspace munmap() is the call that drops + * the last reference to our file and calls our release + * method. If there are memory regions to destroy, we'll end + * up here and not be able to take the mmap_sem. Therefore we + * defer the vm_locked accounting to the system workqueue. + */ + + work = kmalloc(sizeof *work, GFP_KERNEL); + if (!work) + return; + + INIT_WORK(&work->work, ib_umem_account, work); + work->mm = mm; + work->diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT; + + schedule_work(&work->work); +} ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 07/16] IB uverbs: hook up Kconfig/Makefile 2005-06-28 23:03 ` [PATCH 06/16] IB uverbs: memory pinning implementation Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 08/16] IB uverbs: add mthca ABI header Roland Dreier 2005-06-29 0:02 ` [PATCH 06/16] IB uverbs: memory pinning implementation Andrew Morton 1 sibling, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Hook up InfiniBand userspace verbs to Kconfig and the make system. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/Kconfig | 10 ++++++++++ drivers/infiniband/core/Makefile | 5 ++++- 2 files changed, 14 insertions(+), 1 deletion(-) --- linux.orig/drivers/infiniband/Kconfig 2005-06-28 15:19:24.783512128 -0700 +++ linux/drivers/infiniband/Kconfig 2005-06-28 15:20:09.143931652 -0700 @@ -7,6 +7,16 @@ config INFINIBAND any protocols you wish to use as well as drivers for your InfiniBand hardware. +config INFINIBAND_USER_VERBS + tristate "InfiniBand userspace verbs support" + depends on INFINIBAND + ---help--- + Userspace InfiniBand verbs support. This is the kernel side + of userspace verbs, which allows userspace processes to + directly access InfiniBand hardware for fast-path + operations. You will also need libibverbs and a hardware + driver library from <http://www.openib.org>. + source "drivers/infiniband/hw/mthca/Kconfig" source "drivers/infiniband/ulp/ipoib/Kconfig" --- linux.orig/drivers/infiniband/core/Makefile 2005-06-28 15:19:24.782512344 -0700 +++ linux/drivers/infiniband/core/Makefile 2005-06-28 15:20:09.143931652 -0700 @@ -1,6 +1,7 @@ EXTRA_CFLAGS += -Idrivers/infiniband/include -obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o +obj-$(CONFIG_INFINIBAND) += ib_core.o ib_mad.o ib_sa.o ib_umad.o +obj-$(CONFIG_INFINIBAND_USER_VERBS) += ib_uverbs.o ib_core-y := packer.o ud_header.o verbs.o sysfs.o \ device.o fmr_pool.o cache.o @@ -10,3 +11,5 @@ ib_mad-y := mad.o smi.o agent.o ib_sa-y := sa_query.o ib_umad-y := user_mad.o + +ib_uverbs-y := uverbs_main.o uverbs_cmd.o uverbs_mem.o ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 08/16] IB uverbs: add mthca ABI header 2005-06-28 23:03 ` [PATCH 07/16] IB uverbs: hook up Kconfig/Makefile Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 09/16] IB uverbs: add mthca user doorbell record support Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add the mthca_user.h header file, which defines the device-specific ABI used by the mthca low-level driver for kernel/user communication. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_user.h | 81 +++++++++++++++++++++++++++++++ 1 files changed, 81 insertions(+) --- /dev/null 2005-06-23 14:14:38.423479552 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_user.h 2005-06-28 15:20:10.937544281 -0700 @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef MTHCA_USER_H +#define MTHCA_USER_H + +#include <linux/types.h> + +/* + * Make sure that all structs defined in this file remain laid out so + * that they pack the same way on 32-bit and 64-bit architectures (to + * avoid incompatibility between 32-bit userspace and 64-bit kernels). + * In particular do not use pointer types -- pass pointers in __u64 + * instead. + */ + +struct mthca_alloc_ucontext_resp { + __u32 qp_tab_size; + __u32 uarc_size; +}; + +struct mthca_alloc_pd_resp { + __u32 pdn; + __u32 reserved; +}; + +struct mthca_create_cq { + __u32 lkey; + __u32 pdn; + __u64 arm_db_page; + __u64 set_db_page; + __u32 arm_db_index; + __u32 set_db_index; +}; + +struct mthca_create_cq_resp { + __u32 cqn; + __u32 reserved; +}; + +struct mthca_create_qp { + __u32 lkey; + __u32 reserved; + __u64 sq_db_page; + __u64 rq_db_page; + __u32 sq_db_index; + __u32 rq_db_index; +}; + +#endif /* MTHCA_USER_H */ ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 09/16] IB uverbs: add mthca user doorbell record support 2005-06-28 23:03 ` [PATCH 08/16] IB uverbs: add mthca ABI header Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 10/16] IB uverbs: add mthca user context support Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add support for userspace doorbell records to mthca. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_memfree.c | 141 +++++++++++++++++++++++++++- drivers/infiniband/hw/mthca/mthca_memfree.h | 14 ++ 2 files changed, 149 insertions(+), 6 deletions(-) --- linux.orig/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-06-28 15:19:22.793941807 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_memfree.c 2005-06-28 15:20:12.995098113 -0700 @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -47,6 +48,15 @@ enum { MTHCA_TABLE_CHUNK_SIZE = 1 << 18 }; +struct mthca_user_db_table { + struct semaphore mutex; + struct { + u64 uvirt; + struct scatterlist mem; + int refcount; + } page[0]; +}; + void mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm) { struct mthca_icm_chunk *chunk, *tmp; @@ -344,13 +354,133 @@ void mthca_free_icm_table(struct mthca_d kfree(table); } -static u64 mthca_uarc_virt(struct mthca_dev *dev, int page) +static u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page) { return dev->uar_table.uarc_base + - dev->driver_uar.index * dev->uar_table.uarc_size + + uar->index * dev->uar_table.uarc_size + page * 4096; } +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr) +{ + int ret = 0; + u8 status; + int i; + + if (!mthca_is_memfree(dev)) + return 0; + + if (index < 0 || index > dev->uar_table.uarc_size / 8) + return -EINVAL; + + down(&db_tab->mutex); + + i = index / MTHCA_DB_REC_PER_PAGE; + + if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE) || + (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) || + (uaddr & 4095)) { + ret = -EINVAL; + goto out; + } + + if (db_tab->page[i].refcount) { + ++db_tab->page[i].refcount; + goto out; + } + + ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0, + &db_tab->page[i].mem.page, NULL); + if (ret < 0) + goto out; + + db_tab->page[i].mem.length = 4096; + db_tab->page[i].mem.offset = uaddr & ~PAGE_MASK; + + ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + if (ret < 0) { + put_page(db_tab->page[i].mem.page); + goto out; + } + + ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem), + mthca_uarc_virt(dev, uar, i), &status); + if (!ret && status) + ret = -EINVAL; + if (ret) { + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(db_tab->page[i].mem.page); + goto out; + } + + db_tab->page[i].uvirt = uaddr; + db_tab->page[i].refcount = 1; + +out: + up(&db_tab->mutex); + return ret; +} + +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index) +{ + if (!mthca_is_memfree(dev)) + return; + + /* + * To make our bookkeeping simpler, we don't unmap DB + * pages until we clean up the whole db table. + */ + + down(&db_tab->mutex); + + --db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount; + + up(&db_tab->mutex); +} + +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev) +{ + struct mthca_user_db_table *db_tab; + int npages; + int i; + + if (!mthca_is_memfree(dev)) + return NULL; + + npages = dev->uar_table.uarc_size / 4096; + db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL); + if (!db_tab) + return ERR_PTR(-ENOMEM); + + init_MUTEX(&db_tab->mutex); + for (i = 0; i < npages; ++i) { + db_tab->page[i].refcount = 0; + db_tab->page[i].uvirt = 0; + } + + return db_tab; +} + +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab) +{ + int i; + u8 status; + + if (!mthca_is_memfree(dev)) + return; + + for (i = 0; i < dev->uar_table.uarc_size / 4096; ++i) { + if (db_tab->page[i].uvirt) { + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status); + pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE); + put_page(db_tab->page[i].mem.page); + } + } +} + int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db) { int group; @@ -407,7 +537,8 @@ int mthca_alloc_db(struct mthca_dev *dev } memset(page->db_rec, 0, 4096); - ret = mthca_MAP_ICM_page(dev, page->mapping, mthca_uarc_virt(dev, i), &status); + ret = mthca_MAP_ICM_page(dev, page->mapping, + mthca_uarc_virt(dev, &dev->driver_uar, i), &status); if (!ret && status) ret = -EINVAL; if (ret) { @@ -461,7 +592,7 @@ void mthca_free_db(struct mthca_dev *dev if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) && i >= dev->db_tab->max_group1 - 1) { - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, 4096, page->db_rec, page->mapping); @@ -530,7 +661,7 @@ void mthca_cleanup_db_tab(struct mthca_d if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE)) mthca_warn(dev, "Kernel UARC page %d not empty\n", i); - mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, i), 1, &status); + mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status); dma_free_coherent(&dev->pdev->dev, 4096, dev->db_tab->page[i].db_rec, --- linux.orig/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-06-28 15:19:22.793941807 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_memfree.h 2005-06-28 15:20:12.995098113 -0700 @@ -1,5 +1,6 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -148,7 +149,7 @@ struct mthca_db_table { struct semaphore mutex; }; -enum { +enum mthca_db_type { MTHCA_DB_TYPE_INVALID = 0x0, MTHCA_DB_TYPE_CQ_SET_CI = 0x1, MTHCA_DB_TYPE_CQ_ARM = 0x2, @@ -158,6 +159,17 @@ enum { MTHCA_DB_TYPE_GROUP_SEP = 0x7 }; +struct mthca_user_db_table; +struct mthca_uar; + +int mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index, u64 uaddr); +void mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab, int index); +struct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev); +void mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar, + struct mthca_user_db_table *db_tab); + int mthca_init_db_tab(struct mthca_dev *dev); void mthca_cleanup_db_tab(struct mthca_dev *dev); int mthca_alloc_db(struct mthca_dev *dev, int type, u32 qn, u32 **db); ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 10/16] IB uverbs: add mthca user context support 2005-06-28 23:03 ` [PATCH 09/16] IB uverbs: add mthca user doorbell record support Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 11/16] IB uverbs: add mthca mmap support Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add support for managing userspace contexts to mthca. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_provider.c | 58 +++++++++++++++++++++++++++ drivers/infiniband/hw/mthca/mthca_provider.h | 14 ++++++ 2 files changed, 72 insertions(+) --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:00.882715841 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:16.611313860 -0700 @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,6 +38,8 @@ #include "mthca_dev.h" #include "mthca_cmd.h" +#include "mthca_user.h" +#include "mthca_memfree.h" static int mthca_query_device(struct ib_device *ibdev, struct ib_device_attr *props) @@ -284,6 +287,59 @@ static int mthca_query_gid(struct ib_dev return err; } +static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev, + struct ib_udata *udata) +{ + struct mthca_alloc_ucontext_resp uresp; + struct mthca_ucontext *context; + int err; + + memset(&uresp, 0, sizeof uresp); + + uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps; + if (mthca_is_memfree(to_mdev(ibdev))) + uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size; + else + uresp.uarc_size = 0; + + context = kmalloc(sizeof *context, GFP_KERNEL); + if (!context) + return ERR_PTR(-ENOMEM); + + err = mthca_uar_alloc(to_mdev(ibdev), &context->uar); + if (err) { + kfree(context); + return ERR_PTR(err); + } + + context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev)); + if (IS_ERR(context->db_tab)) { + err = PTR_ERR(context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(err); + } + + if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) { + mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab); + mthca_uar_free(to_mdev(ibdev), &context->uar); + kfree(context); + return ERR_PTR(-EFAULT); + } + + return &context->ibucontext; +} + +static int mthca_dealloc_ucontext(struct ib_ucontext *context) +{ + mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab); + mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar); + kfree(to_mucontext(context)); + + return 0; +} + static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -708,6 +764,8 @@ int mthca_register_device(struct mthca_d dev->ib_dev.modify_port = mthca_modify_port; dev->ib_dev.query_pkey = mthca_query_pkey; dev->ib_dev.query_gid = mthca_query_gid; + dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; + dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; dev->ib_dev.alloc_pd = mthca_alloc_pd; dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.h 2005-06-28 15:19:22.365034436 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.h 2005-06-28 15:20:16.612313643 -0700 @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -54,6 +55,14 @@ struct mthca_uar { int index; }; +struct mthca_user_db_table; + +struct mthca_ucontext { + struct ib_ucontext ibucontext; + struct mthca_uar uar; + struct mthca_user_db_table *db_tab; +}; + struct mthca_mtt; struct mthca_mr { @@ -236,6 +245,11 @@ struct mthca_sqp { dma_addr_t header_dma; }; +static inline struct mthca_ucontext *to_mucontext(struct ib_ucontext *ibucontext) +{ + return container_of(ibucontext, struct mthca_ucontext, ibucontext); +} + static inline struct mthca_fmr *to_mfmr(struct ib_fmr *ibmr) { return container_of(ibmr, struct mthca_fmr, ibmr); ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 11/16] IB uverbs: add mthca mmap support 2005-06-28 23:03 ` [PATCH 10/16] IB uverbs: add mthca user context support Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 12/16] IB uverbs: add mthca user PD support Roland Dreier 2005-06-29 0:05 ` [PATCH 11/16] IB uverbs: add mthca mmap support Andrew Morton 0 siblings, 2 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add support for mmap() method to mthca, so that userspace can get access to doorbell registers. This allows userspace to get direct access to the HCA for data path operations. Each userspace context gets its own copy of the doorbell registers and is only allowed to use resources that the kernel has given it access to. In other words, this is safe. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_provider.c | 18 ++++++++++++++++++ 1 files changed, 18 insertions(+) --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:16.611313860 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:18.380930082 -0700 @@ -340,6 +340,23 @@ static int mthca_dealloc_ucontext(struct return 0; } +static int mthca_mmap_uar(struct ib_ucontext *context, + struct vm_area_struct *vma) +{ + if (vma->vm_end - vma->vm_start != PAGE_SIZE) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_flags |= VM_DONTCOPY; + + if (remap_pfn_range(vma, vma->vm_start, + to_mucontext(context)->uar.pfn, + PAGE_SIZE, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev, struct ib_ucontext *context, struct ib_udata *udata) @@ -766,6 +783,7 @@ int mthca_register_device(struct mthca_d dev->ib_dev.query_gid = mthca_query_gid; dev->ib_dev.alloc_ucontext = mthca_alloc_ucontext; dev->ib_dev.dealloc_ucontext = mthca_dealloc_ucontext; + dev->ib_dev.mmap = mthca_mmap_uar; dev->ib_dev.alloc_pd = mthca_alloc_pd; dev->ib_dev.dealloc_pd = mthca_dealloc_pd; dev->ib_dev.create_ah = mthca_ah_create; ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 12/16] IB uverbs: add mthca user PD support 2005-06-28 23:03 ` [PATCH 11/16] IB uverbs: add mthca mmap support Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 13/16] IB uverbs: add mthca user MR support Roland Dreier 2005-06-29 0:07 ` [PATCH 12/16] IB uverbs: add mthca user PD support Andrew Morton 2005-06-29 0:05 ` [PATCH 11/16] IB uverbs: add mthca mmap support Andrew Morton 1 sibling, 2 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add support for userspace protection domains (PDs) to mthca. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_dev.h | 3 ++- drivers/infiniband/hw/mthca/mthca_main.c | 2 +- drivers/infiniband/hw/mthca/mthca_pd.c | 24 +++++++++++++++--------- drivers/infiniband/hw/mthca/mthca_provider.c | 10 +++++++++- drivers/infiniband/hw/mthca/mthca_provider.h | 1 + 5 files changed, 28 insertions(+), 12 deletions(-) --- linux.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-06-28 15:19:20.685397179 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_dev.h 2005-06-28 15:20:20.514467380 -0700 @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -378,7 +379,7 @@ void mthca_unregister_device(struct mthc int mthca_uar_alloc(struct mthca_dev *dev, struct mthca_uar *uar); void mthca_uar_free(struct mthca_dev *dev, struct mthca_uar *uar); -int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd); +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd); void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd); struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size); --- linux.orig/drivers/infiniband/hw/mthca/mthca_main.c 2005-06-28 15:19:20.685397179 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_main.c 2005-06-28 15:20:20.515467163 -0700 @@ -665,7 +665,7 @@ static int __devinit mthca_setup_hca(str goto err_pd_table_free; } - err = mthca_pd_alloc(dev, &dev->driver_pd); + err = mthca_pd_alloc(dev, 1, &dev->driver_pd); if (err) { mthca_err(dev, "Failed to create driver PD, " "aborting.\n"); --- linux.orig/drivers/infiniband/hw/mthca/mthca_pd.c 2005-06-28 15:19:20.684397395 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_pd.c 2005-06-28 15:20:20.513467597 -0700 @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -37,23 +38,27 @@ #include "mthca_dev.h" -int mthca_pd_alloc(struct mthca_dev *dev, struct mthca_pd *pd) +int mthca_pd_alloc(struct mthca_dev *dev, int privileged, struct mthca_pd *pd) { - int err; + int err = 0; might_sleep(); + pd->privileged = privileged; + atomic_set(&pd->sqp_count, 0); pd->pd_num = mthca_alloc(&dev->pd_table.alloc); if (pd->pd_num == -1) return -ENOMEM; - err = mthca_mr_alloc_notrans(dev, pd->pd_num, - MTHCA_MPT_FLAG_LOCAL_READ | - MTHCA_MPT_FLAG_LOCAL_WRITE, - &pd->ntmr); - if (err) - mthca_free(&dev->pd_table.alloc, pd->pd_num); + if (privileged) { + err = mthca_mr_alloc_notrans(dev, pd->pd_num, + MTHCA_MPT_FLAG_LOCAL_READ | + MTHCA_MPT_FLAG_LOCAL_WRITE, + &pd->ntmr); + if (err) + mthca_free(&dev->pd_table.alloc, pd->pd_num); + } return err; } @@ -61,7 +66,8 @@ int mthca_pd_alloc(struct mthca_dev *dev void mthca_pd_free(struct mthca_dev *dev, struct mthca_pd *pd) { might_sleep(); - mthca_free_mr(dev, &pd->ntmr); + if (pd->privileged) + mthca_free_mr(dev, &pd->ntmr); mthca_free(&dev->pd_table.alloc, pd->pd_num); } --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:18.380930082 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:20.513467597 -0700 @@ -368,12 +368,20 @@ static struct ib_pd *mthca_alloc_pd(stru if (!pd) return ERR_PTR(-ENOMEM); - err = mthca_pd_alloc(to_mdev(ibdev), pd); + err = mthca_pd_alloc(to_mdev(ibdev), !context, pd); if (err) { kfree(pd); return ERR_PTR(err); } + if (context) { + if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) { + mthca_pd_free(to_mdev(ibdev), pd); + kfree(pd); + return ERR_PTR(-EFAULT); + } + } + return &pd->ibpd; } --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.h 2005-06-28 15:20:16.612313643 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.h 2005-06-28 15:20:20.514467380 -0700 @@ -92,6 +92,7 @@ struct mthca_pd { u32 pd_num; atomic_t sqp_count; struct mthca_mr ntmr; + int privileged; }; struct mthca_eq { ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 13/16] IB uverbs: add mthca user MR support 2005-06-28 23:03 ` [PATCH 12/16] IB uverbs: add mthca user PD support Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 14/16] IB uverbs: add mthca user CQ support Roland Dreier 2005-06-29 0:07 ` [PATCH 12/16] IB uverbs: add mthca user PD support Andrew Morton 1 sibling, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add support for userspace memory regions (MRs) to mthca. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_provider.c | 82 +++++++++++++++++++++++++++ 1 files changed, 82 insertions(+) --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:20.513467597 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:23.354851384 -0700 @@ -654,6 +654,87 @@ static struct ib_mr *mthca_reg_phys_mr(s return &mr->ibmr; } +static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region, + int acc, struct ib_udata *udata) +{ + struct mthca_dev *dev = to_mdev(pd->device); + struct ib_umem_chunk *chunk; + struct mthca_mr *mr; + u64 *pages; + int shift, n, len; + int i, j, k; + int err = 0; + + shift = ffs(region->page_size) - 1; + + mr = kmalloc(sizeof *mr, GFP_KERNEL); + if (!mr) + return ERR_PTR(-ENOMEM); + + n = 0; + list_for_each_entry(chunk, ®ion->chunk_list, list) + n += chunk->nents; + + mr->mtt = mthca_alloc_mtt(dev, n); + if (IS_ERR(mr->mtt)) { + err = PTR_ERR(mr->mtt); + goto err; + } + + pages = (u64 *) __get_free_page(GFP_KERNEL); + if (!pages) { + err = -ENOMEM; + goto err_mtt; + } + + i = n = 0; + + list_for_each_entry(chunk, ®ion->chunk_list, list) + for (j = 0; j < chunk->nmap; ++j) { + len = sg_dma_len(&chunk->page_list[j]) >> shift; + for (k = 0; k < len; ++k) { + pages[i++] = sg_dma_address(&chunk->page_list[j]) + + region->page_size * k; + /* + * Be friendly to WRITE_MTT command + * and leave two empty slots for the + * index and reserved fields of the + * mailbox. + */ + if (i == PAGE_SIZE / sizeof (u64) - 2) { + err = mthca_write_mtt(dev, mr->mtt, + n, pages, i); + if (err) + goto mtt_done; + n += i; + i = 0; + } + } + } + + if (i) + err = mthca_write_mtt(dev, mr->mtt, n, pages, i); +mtt_done: + free_page((unsigned long) pages); + if (err) + goto err_mtt; + + err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base, + region->length, convert_access(acc), mr); + + if (err) + goto err_mtt; + + return &mr->ibmr; + +err_mtt: + mthca_free_mtt(dev, mr->mtt); + +err: + kfree(mr); + return ERR_PTR(err); +} + static int mthca_dereg_mr(struct ib_mr *mr) { struct mthca_mr *mmr = to_mmr(mr); @@ -804,6 +885,7 @@ int mthca_register_device(struct mthca_d dev->ib_dev.poll_cq = mthca_poll_cq; dev->ib_dev.get_dma_mr = mthca_get_dma_mr; dev->ib_dev.reg_phys_mr = mthca_reg_phys_mr; + dev->ib_dev.reg_user_mr = mthca_reg_user_mr; dev->ib_dev.dereg_mr = mthca_dereg_mr; if (dev->mthca_flags & MTHCA_FLAG_FMR) { ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 14/16] IB uverbs: add mthca user CQ support 2005-06-28 23:03 ` [PATCH 13/16] IB uverbs: add mthca user MR support Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 15/16] IB uverbs: add mthca user QP support Roland Dreier 2005-06-29 0:10 ` [PATCH 14/16] IB uverbs: add mthca user CQ support Andrew Morton 0 siblings, 2 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add support for userspace completion queues (CQs) to mthca. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_cq.c | 78 +++++++++++++++------------ drivers/infiniband/hw/mthca/mthca_dev.h | 1 drivers/infiniband/hw/mthca/mthca_provider.c | 69 +++++++++++++++++++++-- drivers/infiniband/hw/mthca/mthca_provider.h | 1 4 files changed, 111 insertions(+), 38 deletions(-) --- linux.orig/drivers/infiniband/hw/mthca/mthca_cq.c 2005-06-28 15:19:19.183721488 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_cq.c 2005-06-28 15:20:25.680347052 -0700 @@ -1,6 +1,7 @@ /* * Copyright (c) 2004, 2005 Topspin Communications. All rights reserved. * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2005 Cisco Systems, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -742,6 +743,7 @@ err_out: } int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq) { int size = nent * MTHCA_CQ_ENTRY_SIZE; @@ -753,30 +755,33 @@ int mthca_init_cq(struct mthca_dev *dev, might_sleep(); - cq->ibcq.cqe = nent - 1; + cq->ibcq.cqe = nent - 1; + cq->is_kernel = !ctx; cq->cqn = mthca_alloc(&dev->cq_table.alloc); if (cq->cqn == -1) return -ENOMEM; if (mthca_is_memfree(dev)) { - cq->arm_sn = 1; - err = mthca_table_get(dev, dev->cq_table.table, cq->cqn); if (err) goto err_out; - err = -ENOMEM; + if (cq->is_kernel) { + cq->arm_sn = 1; + + err = -ENOMEM; - cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, - cq->cqn, &cq->set_ci_db); - if (cq->set_ci_db_index < 0) - goto err_out_icm; - - cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, - cq->cqn, &cq->arm_db); - if (cq->arm_db_index < 0) - goto err_out_ci; + cq->set_ci_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, + cq->cqn, &cq->set_ci_db); + if (cq->set_ci_db_index < 0) + goto err_out_icm; + + cq->arm_db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_CQ_ARM, + cq->cqn, &cq->arm_db); + if (cq->arm_db_index < 0) + goto err_out_ci; + } } mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL); @@ -785,12 +790,14 @@ int mthca_init_cq(struct mthca_dev *dev, cq_context = mailbox->buf; - err = mthca_alloc_cq_buf(dev, size, cq); - if (err) - goto err_out_mailbox; + if (cq->is_kernel) { + err = mthca_alloc_cq_buf(dev, size, cq); + if (err) + goto err_out_mailbox; - for (i = 0; i < nent; ++i) - set_cqe_hw(get_cqe(cq, i)); + for (i = 0; i < nent; ++i) + set_cqe_hw(get_cqe(cq, i)); + } spin_lock_init(&cq->lock); atomic_set(&cq->refcount, 1); @@ -801,11 +808,14 @@ int mthca_init_cq(struct mthca_dev *dev, MTHCA_CQ_STATE_DISARMED | MTHCA_CQ_FLAG_TR); cq_context->start = cpu_to_be64(0); - cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24 | - dev->driver_uar.index); + cq_context->logsize_usrpage = cpu_to_be32((ffs(nent) - 1) << 24); + if (ctx) + cq_context->logsize_usrpage |= cpu_to_be32(ctx->uar.index); + else + cq_context->logsize_usrpage |= cpu_to_be32(dev->driver_uar.index); cq_context->error_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_ASYNC].eqn); cq_context->comp_eqn = cpu_to_be32(dev->eq_table.eq[MTHCA_EQ_COMP].eqn); - cq_context->pd = cpu_to_be32(dev->driver_pd.pd_num); + cq_context->pd = cpu_to_be32(pdn); cq_context->lkey = cpu_to_be32(cq->mr.ibmr.lkey); cq_context->cqn = cpu_to_be32(cq->cqn); @@ -843,18 +853,20 @@ int mthca_init_cq(struct mthca_dev *dev, return 0; err_out_free_mr: - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); + if (cq->is_kernel) { + mthca_free_mr(dev, &cq->mr); + mthca_free_cq_buf(dev, cq); + } err_out_mailbox: mthca_free_mailbox(dev, mailbox); err_out_arm: - if (mthca_is_memfree(dev)) + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); err_out_ci: - if (mthca_is_memfree(dev)) + if (cq->is_kernel && mthca_is_memfree(dev)) mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); err_out_icm: @@ -892,7 +904,8 @@ void mthca_free_cq(struct mthca_dev *dev int j; printk(KERN_ERR "context for CQN %x (cons index %x, next sw %d)\n", - cq->cqn, cq->cons_index, !!next_cqe_sw(cq)); + cq->cqn, cq->cons_index, + cq->is_kernel ? !!next_cqe_sw(cq) : 0); for (j = 0; j < 16; ++j) printk(KERN_ERR "[%2x] %08x\n", j * 4, be32_to_cpu(ctx[j])); } @@ -910,12 +923,13 @@ void mthca_free_cq(struct mthca_dev *dev atomic_dec(&cq->refcount); wait_event(cq->wait, !atomic_read(&cq->refcount)); - mthca_free_mr(dev, &cq->mr); - mthca_free_cq_buf(dev, cq); - - if (mthca_is_memfree(dev)) { - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); - mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + if (cq->is_kernel) { + mthca_free_mr(dev, &cq->mr); + mthca_free_cq_buf(dev, cq); + if (mthca_is_memfree(dev)) { + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_ARM, cq->arm_db_index); + mthca_free_db(dev, MTHCA_DB_TYPE_CQ_SET_CI, cq->set_ci_db_index); + } } mthca_table_put(dev, dev->cq_table.table, cq->cqn); --- linux.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-06-28 15:20:20.514467380 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_dev.h 2005-06-28 15:20:25.681346835 -0700 @@ -414,6 +414,7 @@ int mthca_poll_cq(struct ib_cq *ibcq, in int mthca_tavor_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_arbel_arm_cq(struct ib_cq *cq, enum ib_cq_notify notify); int mthca_init_cq(struct mthca_dev *dev, int nent, + struct mthca_ucontext *ctx, u32 pdn, struct mthca_cq *cq); void mthca_free_cq(struct mthca_dev *dev, struct mthca_cq *cq); --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:23.354851384 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:25.681346835 -0700 @@ -497,28 +497,85 @@ static struct ib_cq *mthca_create_cq(str struct ib_ucontext *context, struct ib_udata *udata) { + struct mthca_create_cq ucmd; struct mthca_cq *cq; int nent; int err; + if (context) { + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.set_db_index, ucmd.set_db_page); + if (err) + return ERR_PTR(err); + + err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, + ucmd.arm_db_index, ucmd.arm_db_page); + if (err) + goto err_unmap_set; + } + cq = kmalloc(sizeof *cq, GFP_KERNEL); - if (!cq) - return ERR_PTR(-ENOMEM); + if (!cq) { + err = -ENOMEM; + goto err_unmap_arm; + } + + if (context) { + cq->mr.ibmr.lkey = ucmd.lkey; + cq->set_ci_db_index = ucmd.set_db_index; + cq->arm_db_index = ucmd.arm_db_index; + } for (nent = 1; nent <= entries; nent <<= 1) ; /* nothing */ - err = mthca_init_cq(to_mdev(ibdev), nent, cq); - if (err) { - kfree(cq); - cq = ERR_PTR(err); + err = mthca_init_cq(to_mdev(ibdev), nent, + context ? to_mucontext(context) : NULL, + context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num, + cq); + if (err) + goto err_free; + + if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) { + mthca_free_cq(to_mdev(ibdev), cq); + goto err_free; } return &cq->ibcq; + +err_free: + kfree(cq); + +err_unmap_arm: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.arm_db_index); + +err_unmap_set: + if (context) + mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar, + to_mucontext(context)->db_tab, ucmd.set_db_index); + + return ERR_PTR(err); } static int mthca_destroy_cq(struct ib_cq *cq) { + if (cq->uobject) { + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->arm_db_index); + mthca_unmap_user_db(to_mdev(cq->device), + &to_mucontext(cq->uobject->context)->uar, + to_mucontext(cq->uobject->context)->db_tab, + to_mcq(cq)->set_ci_db_index); + } mthca_free_cq(to_mdev(cq->device), to_mcq(cq)); kfree(cq); --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.h 2005-06-28 15:20:20.514467380 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.h 2005-06-28 15:20:25.682346619 -0700 @@ -177,6 +177,7 @@ struct mthca_cq { int cqn; u32 cons_index; int is_direct; + int is_kernel; /* Next fields are Arbel only */ int set_ci_db_index; ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 15/16] IB uverbs: add mthca user QP support 2005-06-28 23:03 ` [PATCH 14/16] IB uverbs: add mthca user CQ support Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 2005-06-28 23:03 ` [PATCH 16/16] IB uverbs: add documentation file Roland Dreier 2005-06-29 0:10 ` [PATCH 14/16] IB uverbs: add mthca user CQ support Andrew Morton 1 sibling, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add support for userspace queue pairs (QPs) to mthca. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- drivers/infiniband/hw/mthca/mthca_dev.h | 2 drivers/infiniband/hw/mthca/mthca_provider.c | 80 ++++++++-- drivers/infiniband/hw/mthca/mthca_qp.c | 215 +++++++++++++++++---------- 3 files changed, 212 insertions(+), 85 deletions(-) --- linux.orig/drivers/infiniband/hw/mthca/mthca_dev.h 2005-06-28 15:20:25.681346835 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_dev.h 2005-06-28 15:20:28.954636956 -0700 @@ -440,12 +440,14 @@ int mthca_alloc_qp(struct mthca_dev *dev struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, struct mthca_qp *qp); int mthca_alloc_sqp(struct mthca_dev *dev, struct mthca_pd *pd, struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp); --- linux.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:25.681346835 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:28.953637173 -0700 @@ -424,6 +424,7 @@ static struct ib_qp *mthca_create_qp(str struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { + struct mthca_create_qp ucmd; struct mthca_qp *qp; int err; @@ -432,41 +433,82 @@ static struct ib_qp *mthca_create_qp(str case IB_QPT_UC: case IB_QPT_UD: { + struct mthca_ucontext *context; + qp = kmalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - qp->sq.max = init_attr->cap.max_send_wr; - qp->rq.max = init_attr->cap.max_recv_wr; - qp->sq.max_gs = init_attr->cap.max_send_sge; - qp->rq.max_gs = init_attr->cap.max_recv_sge; + if (pd->uobject) { + context = to_mucontext(pd->uobject->context); + + if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd)) + return ERR_PTR(-EFAULT); + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.sq_db_index, ucmd.sq_db_page); + if (err) { + kfree(qp); + return ERR_PTR(err); + } + + err = mthca_map_user_db(to_mdev(pd->device), &context->uar, + context->db_tab, + ucmd.rq_db_index, ucmd.rq_db_page); + if (err) { + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + kfree(qp); + return ERR_PTR(err); + } + + qp->mr.ibmr.lkey = ucmd.lkey; + qp->sq.db_index = ucmd.sq_db_index; + qp->rq.db_index = ucmd.rq_db_index; + } err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), init_attr->qp_type, init_attr->sq_sig_type, - qp); + &init_attr->cap, qp); + + if (err && pd->uobject) { + context = to_mucontext(pd->uobject->context); + + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.sq_db_index); + mthca_unmap_user_db(to_mdev(pd->device), + &context->uar, + context->db_tab, + ucmd.rq_db_index); + } + qp->ibqp.qp_num = qp->qpn; break; } case IB_QPT_SMI: case IB_QPT_GSI: { + /* Don't allow userspace to create special QPs */ + if (pd->uobject) + return ERR_PTR(-EINVAL); + qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - qp->sq.max = init_attr->cap.max_send_wr; - qp->rq.max = init_attr->cap.max_recv_wr; - qp->sq.max_gs = init_attr->cap.max_send_sge; - qp->rq.max_gs = init_attr->cap.max_recv_sge; - qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd), to_mcq(init_attr->send_cq), to_mcq(init_attr->recv_cq), - init_attr->sq_sig_type, + init_attr->sq_sig_type, &init_attr->cap, qp->ibqp.qp_num, init_attr->port_num, to_msqp(qp)); break; @@ -481,13 +523,27 @@ static struct ib_qp *mthca_create_qp(str return ERR_PTR(err); } - init_attr->cap.max_inline_data = 0; + init_attr->cap.max_inline_data = 0; + init_attr->cap.max_send_wr = qp->sq.max; + init_attr->cap.max_recv_wr = qp->rq.max; + init_attr->cap.max_send_sge = qp->sq.max_gs; + init_attr->cap.max_recv_sge = qp->rq.max_gs; return &qp->ibqp; } static int mthca_destroy_qp(struct ib_qp *qp) { + if (qp->uobject) { + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->sq.db_index); + mthca_unmap_user_db(to_mdev(qp->device), + &to_mucontext(qp->uobject->context)->uar, + to_mucontext(qp->uobject->context)->db_tab, + to_mqp(qp)->rq.db_index); + } mthca_free_qp(to_mdev(qp->device), to_mqp(qp)); kfree(qp); return 0; --- linux.orig/drivers/infiniband/hw/mthca/mthca_qp.c 2005-06-28 15:19:18.256921644 -0700 +++ linux/drivers/infiniband/hw/mthca/mthca_qp.c 2005-06-28 15:20:28.952637389 -0700 @@ -1,5 +1,6 @@ /* * Copyright (c) 2004 Topspin Communications. All rights reserved. + * Copyright (c) 2005 Cisco Systems. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -46,7 +47,9 @@ enum { MTHCA_MAX_DIRECT_QP_SIZE = 4 * PAGE_SIZE, MTHCA_ACK_REQ_FREQ = 10, MTHCA_FLIGHT_LIMIT = 9, - MTHCA_UD_HEADER_SIZE = 72 /* largest UD header possible */ + MTHCA_UD_HEADER_SIZE = 72, /* largest UD header possible */ + MTHCA_INLINE_HEADER_SIZE = 4, /* data segment overhead for inline */ + MTHCA_INLINE_CHUNK_SIZE = 16 /* inline data segment chunk */ }; enum { @@ -689,7 +692,11 @@ int mthca_modify_qp(struct ib_qp *ibqp, /* leave arbel_sched_queue as 0 */ - qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); + if (qp->ibqp.uobject) + qp_context->usr_page = + cpu_to_be32(to_mucontext(qp->ibqp.uobject->context)->uar.index); + else + qp_context->usr_page = cpu_to_be32(dev->driver_uar.index); qp_context->local_qpn = cpu_to_be32(qp->qpn); if (attr_mask & IB_QP_DEST_QPN) { qp_context->remote_qpn = cpu_to_be32(attr->dest_qp_num); @@ -954,6 +961,15 @@ static int mthca_alloc_wqe_buf(struct mt qp->send_wqe_offset = ALIGN(qp->rq.max << qp->rq.wqe_shift, 1 << qp->sq.wqe_shift); + + /* + * If this is a userspace QP, we don't actually have to + * allocate anything. All we need is to calculate the WQE + * sizes and the send_wqe_offset, so we're done now. + */ + if (pd->ibpd.uobject) + return 0; + size = PAGE_ALIGN(qp->send_wqe_offset + (qp->sq.max << qp->sq.wqe_shift)); @@ -1053,10 +1069,32 @@ static int mthca_alloc_wqe_buf(struct mt return err; } -static int mthca_alloc_memfree(struct mthca_dev *dev, +static void mthca_free_wqe_buf(struct mthca_dev *dev, struct mthca_qp *qp) { - int ret = 0; + int i; + int size = PAGE_ALIGN(qp->send_wqe_offset + + (qp->sq.max << qp->sq.wqe_shift)); + + if (qp->is_direct) { + dma_free_coherent(&dev->pdev->dev, size, qp->queue.direct.buf, + pci_unmap_addr(&qp->queue.direct, mapping)); + } else { + for (i = 0; i < size / PAGE_SIZE; ++i) { + dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, + qp->queue.page_list[i].buf, + pci_unmap_addr(&qp->queue.page_list[i], + mapping)); + } + } + + kfree(qp->wrid); +} + +static int mthca_map_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret; if (mthca_is_memfree(dev)) { ret = mthca_table_get(dev, dev->qp_table.qp_table, qp->qpn); @@ -1067,35 +1105,15 @@ static int mthca_alloc_memfree(struct mt if (ret) goto err_qpc; - ret = mthca_table_get(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - if (ret) - goto err_eqpc; - - qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, - qp->qpn, &qp->rq.db); - if (qp->rq.db_index < 0) { - ret = -ENOMEM; - goto err_rdb; - } + ret = mthca_table_get(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + if (ret) + goto err_eqpc; - qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, - qp->qpn, &qp->sq.db); - if (qp->sq.db_index < 0) { - ret = -ENOMEM; - goto err_rq_db; - } } return 0; -err_rq_db: - mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); - -err_rdb: - mthca_table_put(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - err_eqpc: mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); @@ -1105,6 +1123,35 @@ err_qpc: return ret; } +static void mthca_unmap_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + mthca_table_put(dev, dev->qp_table.rdb_table, + qp->qpn << dev->qp_table.rdb_shift); + mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); + mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); +} + +static int mthca_alloc_memfree(struct mthca_dev *dev, + struct mthca_qp *qp) +{ + int ret = 0; + + if (mthca_is_memfree(dev)) { + qp->rq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_RQ, + qp->qpn, &qp->rq.db); + if (qp->rq.db_index < 0) + return ret; + + qp->sq.db_index = mthca_alloc_db(dev, MTHCA_DB_TYPE_SQ, + qp->qpn, &qp->sq.db); + if (qp->sq.db_index < 0) + mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); + } + + return ret; +} + static void mthca_free_memfree(struct mthca_dev *dev, struct mthca_qp *qp) { @@ -1112,11 +1159,6 @@ static void mthca_free_memfree(struct mt mthca_free_db(dev, MTHCA_DB_TYPE_SQ, qp->sq.db_index); mthca_free_db(dev, MTHCA_DB_TYPE_RQ, qp->rq.db_index); } - - mthca_table_put(dev, dev->qp_table.rdb_table, - qp->qpn << dev->qp_table.rdb_shift); - mthca_table_put(dev, dev->qp_table.eqp_table, qp->qpn); - mthca_table_put(dev, dev->qp_table.qp_table, qp->qpn); } static void mthca_wq_init(struct mthca_wq* wq) @@ -1147,13 +1189,28 @@ static int mthca_alloc_qp_common(struct mthca_wq_init(&qp->sq); mthca_wq_init(&qp->rq); - ret = mthca_alloc_memfree(dev, qp); + ret = mthca_map_memfree(dev, qp); if (ret) return ret; ret = mthca_alloc_wqe_buf(dev, pd, qp); if (ret) { - mthca_free_memfree(dev, qp); + mthca_unmap_memfree(dev, qp); + return ret; + } + + /* + * If this is a userspace QP, we're done now. The doorbells + * will be allocated and buffers will be initialized in + * userspace. + */ + if (pd->ibpd.uobject) + return 0; + + ret = mthca_alloc_memfree(dev, qp); + if (ret) { + mthca_free_wqe_buf(dev, qp); + mthca_unmap_memfree(dev, qp); return ret; } @@ -1186,22 +1243,39 @@ static int mthca_alloc_qp_common(struct return 0; } -static void mthca_align_qp_size(struct mthca_dev *dev, struct mthca_qp *qp) +static int mthca_set_qp_size(struct mthca_dev *dev, struct ib_qp_cap *cap, + struct mthca_qp *qp) { - int i; - - if (!mthca_is_memfree(dev)) - return; + /* Sanity check QP size before proceeding */ + if (cap->max_send_wr > 65536 || cap->max_recv_wr > 65536 || + cap->max_send_sge > 64 || cap->max_recv_sge > 64) + return -EINVAL; - for (i = 0; 1 << i < qp->rq.max; ++i) - ; /* nothing */ + if (mthca_is_memfree(dev)) { + qp->rq.max = cap->max_recv_wr ? + roundup_pow_of_two(cap->max_recv_wr) : 0; + qp->sq.max = cap->max_send_wr ? + roundup_pow_of_two(cap->max_send_wr) : 0; + } else { + qp->rq.max = cap->max_recv_wr; + qp->sq.max = cap->max_send_wr; + } - qp->rq.max = 1 << i; + qp->rq.max_gs = cap->max_recv_sge; + qp->sq.max_gs = max_t(int, cap->max_send_sge, + ALIGN(cap->max_inline_data + MTHCA_INLINE_HEADER_SIZE, + MTHCA_INLINE_CHUNK_SIZE) / + sizeof (struct mthca_data_seg)); - for (i = 0; 1 << i < qp->sq.max; ++i) - ; /* nothing */ + /* + * For MLX transport we need 2 extra S/G entries: + * one for the header and one for the checksum at the end + */ + if ((qp->transport == MLX && qp->sq.max_gs + 2 > dev->limits.max_sg) || + qp->sq.max_gs > dev->limits.max_sg || qp->rq.max_gs > dev->limits.max_sg) + return -EINVAL; - qp->sq.max = 1 << i; + return 0; } int mthca_alloc_qp(struct mthca_dev *dev, @@ -1210,11 +1284,14 @@ int mthca_alloc_qp(struct mthca_dev *dev struct mthca_cq *recv_cq, enum ib_qp_type type, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, struct mthca_qp *qp) { int err; - mthca_align_qp_size(dev, qp); + err = mthca_set_qp_size(dev, cap, qp); + if (err) + return err; switch (type) { case IB_QPT_RC: qp->transport = RC; break; @@ -1247,14 +1324,17 @@ int mthca_alloc_sqp(struct mthca_dev *de struct mthca_cq *send_cq, struct mthca_cq *recv_cq, enum ib_sig_type send_policy, + struct ib_qp_cap *cap, int qpn, int port, struct mthca_sqp *sqp) { - int err = 0; u32 mqpn = qpn * 2 + dev->qp_table.sqp_start + port - 1; + int err; - mthca_align_qp_size(dev, &sqp->qp); + err = mthca_set_qp_size(dev, cap, &sqp->qp); + if (err) + return err; sqp->header_buf_size = sqp->qp.sq.max * MTHCA_UD_HEADER_SIZE; sqp->header_buf = dma_alloc_coherent(&dev->pdev->dev, sqp->header_buf_size, @@ -1313,8 +1393,6 @@ void mthca_free_qp(struct mthca_dev *dev struct mthca_qp *qp) { u8 status; - int size; - int i; struct mthca_cq *send_cq; struct mthca_cq *recv_cq; @@ -1344,31 +1422,22 @@ void mthca_free_qp(struct mthca_dev *dev if (qp->state != IB_QPS_RESET) mthca_MODIFY_QP(dev, MTHCA_TRANS_ANY2RST, qp->qpn, 0, NULL, 0, &status); - mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); - if (qp->ibqp.send_cq != qp->ibqp.recv_cq) - mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); - - mthca_free_mr(dev, &qp->mr); - - size = PAGE_ALIGN(qp->send_wqe_offset + - (qp->sq.max << qp->sq.wqe_shift)); + /* + * If this is a userspace QP, the buffers, MR, CQs and so on + * will be cleaned up in userspace, so all we have to do is + * unref the mem-free tables and free the QPN in our table. + */ + if (!qp->ibqp.uobject) { + mthca_cq_clean(dev, to_mcq(qp->ibqp.send_cq)->cqn, qp->qpn); + if (qp->ibqp.send_cq != qp->ibqp.recv_cq) + mthca_cq_clean(dev, to_mcq(qp->ibqp.recv_cq)->cqn, qp->qpn); - if (qp->is_direct) { - pci_free_consistent(dev->pdev, size, - qp->queue.direct.buf, - pci_unmap_addr(&qp->queue.direct, mapping)); - } else { - for (i = 0; i < size / PAGE_SIZE; ++i) { - pci_free_consistent(dev->pdev, PAGE_SIZE, - qp->queue.page_list[i].buf, - pci_unmap_addr(&qp->queue.page_list[i], - mapping)); - } + mthca_free_mr(dev, &qp->mr); + mthca_free_memfree(dev, qp); + mthca_free_wqe_buf(dev, qp); } - kfree(qp->wrid); - - mthca_free_memfree(dev, qp); + mthca_unmap_memfree(dev, qp); if (is_sqp(dev, qp)) { atomic_dec(&(to_mpd(qp->ibqp.pd)->sqp_count)); ^ permalink raw reply [flat|nested] 37+ messages in thread
* [PATCH 16/16] IB uverbs: add documentation file 2005-06-28 23:03 ` [PATCH 15/16] IB uverbs: add mthca user QP support Roland Dreier @ 2005-06-28 23:03 ` Roland Dreier 0 siblings, 0 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-28 23:03 UTC (permalink / raw) To: akpm; +Cc: linux-kernel, openib-general Add documentation for InfiniBand userspace verbs. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- Documentation/infiniband/user_verbs.txt | 69 ++++++++++++++++++++++++++++++++ 1 files changed, 69 insertions(+) --- /dev/null 2005-06-23 14:14:38.423479552 -0700 +++ linux/Documentation/infiniband/user_verbs.txt 2005-06-28 15:20:31.245140214 -0700 @@ -0,0 +1,69 @@ +USERSPACE VERBS ACCESS + + The ib_uverbs module, built by enabling CONFIG_INFINIBAND_USER_VERBS, + enables direct userspace access to IB hardware via "verbs," as + described in chapter 11 of the InfiniBand Architecture Specification. + + To use the verbs, the libibverbs library, available from + <http://openib.org/>, is required. libibverbs contains a + device-independent API for using the ib_uverbs interface. + libibverbs also requires appropriate device-dependent kernel and + userspace driver for your InfiniBand hardware. For example, to use + a Mellanox HCA, you will need the ib_mthca kernel module and the + libmthca userspace driver be installed. + +User-kernel communication + + Userspace communicates with the kernel for slow path, resource + management operations via the /dev/infiniband/uverbsN character + devices. Fast path operations are typically performed by writing + directly to hardware registers mmap()ed into userspace, with no + system call or context switch into the kernel. + + Commands are sent to the kernel via write()s on these device files. + The ABI is defined in drivers/infiniband/include/ib_user_verbs.h. + The structs for commands that require a response from the kernel + contain a 64-bit field used to pass a pointer to an output buffer. + Status is returned to userspace as the return value of the write() + system call. + +Resource management + + Since creation and destruction of all IB resources is done by + commands passed through a file descriptor, the kernel can keep track + of which resources are attached to a given userspace context. The + ib_uverbs module maintains idr tables that are used to translate + between kernel pointers and opaque userspace handles, so that kernel + pointers are never exposed to userspace and userspace cannot trick + the kernel into following a bogus pointer. + + This also allows the kernel to clean up when a process exits and + prevent one process from touching another process's resources. + +Memory pinning + + Direct userspace I/O requires that memory regions that are potential + I/O targets be kept resident at the same physical address. The + ib_uverbs module manages pinning and unpinning memory regions via + get_user_pages() and put_page() calls. It also accounts for the + amount of memory pinned in the process's locked_vm, and checks that + unprivileged processes do not exceed their RLIMIT_MEMLOCK limit. + + Pages that are pinned multiple times are counted each time they are + pinned, so the value of locked_vm may be an overestimate of the + number of pages pinned by a process. + +/dev files + + To create the appropriate character device files automatically with + udev, a rule like + + KERNEL="uverbs*", NAME="infiniband/%k" + + can be used. This will create device nodes named + + /dev/infiniband/uverbs0 + + and so on. Since the InfiniBand userspace verbs should be safe for + use by non-privileged processes, it may be useful to add an + appropriate MODE or GROUP to the udev rule. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 14/16] IB uverbs: add mthca user CQ support 2005-06-28 23:03 ` [PATCH 14/16] IB uverbs: add mthca user CQ support Roland Dreier 2005-06-28 23:03 ` [PATCH 15/16] IB uverbs: add mthca user QP support Roland Dreier @ 2005-06-29 0:10 ` Andrew Morton 2005-06-29 16:06 ` Roland Dreier 1 sibling, 1 reply; 37+ messages in thread From: Andrew Morton @ 2005-06-29 0:10 UTC (permalink / raw) To: Roland Dreier; +Cc: linux-kernel, openib-general Roland Dreier <rolandd@cisco.com> wrote: > > Add support for userspace completion queues (CQs) to mthca. > There are more interesting things happening here ;) > @@ -177,6 +177,7 @@ struct mthca_cq { > int cqn; > u32 cons_index; > int is_direct; > + int is_kernel; > > /* Next fields are Arbel only */ > int set_ci_db_index; I assume we have one body of code which is capable of handling data structures in either kenrel memory of user memory? (guess). If so, that's a fairly sensitive thing to be doing. Tell us more, please. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 14/16] IB uverbs: add mthca user CQ support 2005-06-29 0:10 ` [PATCH 14/16] IB uverbs: add mthca user CQ support Andrew Morton @ 2005-06-29 16:06 ` Roland Dreier 0 siblings, 0 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-29 16:06 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, openib-general >> + int is_kernel; Andrew> I assume we have one body of code which is capable of Andrew> handling data structures in either kenrel memory of user Andrew> memory? (guess). Andrew> If so, that's a fairly sensitive thing to be doing. Tell Andrew> us more, please. It's actually not that bad. A completion queue (CQ) is a basically a chunk of memory where completion information is written when a work request completes. The hardware can handle many CQs (64K is not an unreasonable number), and we always do things like allocation of CQ numbers, programming HW for CQ context, etc. in the kernel. Both the kernel and userspace can do data path operations like looking for a new CQ entry. This means that for userspace CQs, the actual memory where entries are written should be in userspace. However the struct mthca_cq will always be in the kernel. If you look at how the is_kernel flag is used, you can see that all it does is control whether we allocate/free the actual buffer and a few other things in the kernel, or just use the stuff that userspace has already allocated. - R. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 12/16] IB uverbs: add mthca user PD support 2005-06-28 23:03 ` [PATCH 12/16] IB uverbs: add mthca user PD support Roland Dreier 2005-06-28 23:03 ` [PATCH 13/16] IB uverbs: add mthca user MR support Roland Dreier @ 2005-06-29 0:07 ` Andrew Morton 2005-06-29 16:06 ` Roland Dreier 1 sibling, 1 reply; 37+ messages in thread From: Andrew Morton @ 2005-06-29 0:07 UTC (permalink / raw) To: Roland Dreier; +Cc: linux-kernel, openib-general Roland Dreier <rolandd@cisco.com> wrote: > > Add support for userspace protection domains (PDs) to mthca. What is a userspace protection domain? ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 12/16] IB uverbs: add mthca user PD support 2005-06-29 0:07 ` [PATCH 12/16] IB uverbs: add mthca user PD support Andrew Morton @ 2005-06-29 16:06 ` Roland Dreier 0 siblings, 0 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-29 16:06 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, openib-general Andrew> What is a userspace protection domain? A protection domain is an abstraction enforced by IB hardware -- loosely put, every resource (work queue, memory region, etc) in put in a PD when it is created, and different resources can only see each other if they belong to the same PD. As an example, PDs are needed because IB allows unprivileged processes to directly post requests to work queues. Work requests refer to memory regions by memory keys (32 bit cookies). Without PDs, a process could get access to another process's memory region if it could guess the 32-bit key -- with PDs, it can't because the other process's memory region will be in a different PD from its work queue. - R. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 11/16] IB uverbs: add mthca mmap support 2005-06-28 23:03 ` [PATCH 11/16] IB uverbs: add mthca mmap support Roland Dreier 2005-06-28 23:03 ` [PATCH 12/16] IB uverbs: add mthca user PD support Roland Dreier @ 2005-06-29 0:05 ` Andrew Morton 2005-06-29 16:06 ` Roland Dreier 2005-07-05 19:20 ` Roland Dreier 1 sibling, 2 replies; 37+ messages in thread From: Andrew Morton @ 2005-06-29 0:05 UTC (permalink / raw) To: Roland Dreier; +Cc: linux-kernel, openib-general Roland Dreier <rolandd@cisco.com> wrote: > > Add support for mmap() method to mthca, so that userspace can get > access to doorbell registers. This allows userspace to get direct > access to the HCA for data path operations. > > Each userspace context gets its own copy of the doorbell registers and > is only allowed to use resources that the kernel has given it access > to. In other words, this is safe. > > ... > > +static int mthca_mmap_uar(struct ib_ucontext *context, > + struct vm_area_struct *vma) > +{ > + if (vma->vm_end - vma->vm_start != PAGE_SIZE) > + return -EINVAL; > + > + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); > + vma->vm_flags |= VM_DONTCOPY; > + > + if (remap_pfn_range(vma, vma->vm_start, > + to_mucontext(context)->uar.pfn, > + PAGE_SIZE, vma->vm_page_prot)) > + return -EAGAIN; > + > + return 0; > +} What's the thinking behind the VM_DONTCOPY there? What's actually being mapped here? Hardware? If so, is VM_IO not needed? ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 11/16] IB uverbs: add mthca mmap support 2005-06-29 0:05 ` [PATCH 11/16] IB uverbs: add mthca mmap support Andrew Morton @ 2005-06-29 16:06 ` Roland Dreier 2005-07-05 19:20 ` Roland Dreier 1 sibling, 0 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-29 16:06 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, openib-general Andrew> What's the thinking behind the VM_DONTCOPY there? I think that was my paranoia about something like a process doing a fork, the original process exiting, and the new process having page still mapped even though the file has been released. This is bad because then we could map the same page to a different process and have them collide. But it seems that there will always be a reference to the underlying struct file as long as someone has this mapping, so I don't really need to worry about this and the VM_DONTCOPY can go. Andrew> What's actually being mapped here? Hardware? If so, is Andrew> VM_IO not needed? Yes, this is a page from a PCI BAR. However, we use remap_pfn_range() to map the page, which sets VM_IO already. - R. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 11/16] IB uverbs: add mthca mmap support 2005-06-29 0:05 ` [PATCH 11/16] IB uverbs: add mthca mmap support Andrew Morton 2005-06-29 16:06 ` Roland Dreier @ 2005-07-05 19:20 ` Roland Dreier 2005-07-05 20:53 ` Michael S. Tsirkin 1 sibling, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-07-05 19:20 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, openib-general Andrew> What's the thinking behind the VM_DONTCOPY there? As I said before, I don't think the thinking behind VM_DONTCOPY was correct thinking. Let's take it out. I've now answered all your questions on this patchset (or at least written something in response to all your questions ;). What's your feeling on merging? If more info is required, just let me know. I'll also be at the kernel summit in a couple of weeks if you want to go over it in person. - R. There's no need to set VM_DONTCOPY when mmap()ing the hardware doorbell page into userspace. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- linux-export.orig/drivers/infiniband/hw/mthca/mthca_provider.c 2005-06-28 15:20:28.000000000 -0700 +++ linux-export/drivers/infiniband/hw/mthca/mthca_provider.c 2005-07-05 12:14:20.838664330 -0700 @@ -347,7 +347,6 @@ return -EINVAL; vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); - vma->vm_flags |= VM_DONTCOPY; if (remap_pfn_range(vma, vma->vm_start, to_mucontext(context)->uar.pfn, ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 11/16] IB uverbs: add mthca mmap support 2005-07-05 19:20 ` Roland Dreier @ 2005-07-05 20:53 ` Michael S. Tsirkin 2005-07-05 22:07 ` Roland Dreier 0 siblings, 1 reply; 37+ messages in thread From: Michael S. Tsirkin @ 2005-07-05 20:53 UTC (permalink / raw) To: Roland Dreier; +Cc: Andrew Morton, linux-kernel, openib-general Quoting r. Roland Dreier <rolandd@cisco.com>: > Subject: Re: [PATCH 11/16] IB uverbs: add mthca mmap support > > Andrew> What's the thinking behind the VM_DONTCOPY there? > > As I said before, I don't think the thinking behind VM_DONTCOPY was > correct thinking. Let's take it out. Roland, I think VM_DONTCOPY is needed here. If a process forks, we must prevent the child from accessing the parent's hardware page. Otherwise the child can corrupt the parent's queues since the hardware wont be able to distinguish between parent and child. Does this make sense? -- MST ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 11/16] IB uverbs: add mthca mmap support 2005-07-05 20:53 ` Michael S. Tsirkin @ 2005-07-05 22:07 ` Roland Dreier 0 siblings, 0 replies; 37+ messages in thread From: Roland Dreier @ 2005-07-05 22:07 UTC (permalink / raw) To: Michael S. Tsirkin; +Cc: Andrew Morton, linux-kernel, openib-general Michael> Roland, I think VM_DONTCOPY is needed here. Michael> If a process forks, we must prevent the child from Michael> accessing the parent's hardware page. Otherwise the child Michael> can corrupt the parent's queues since the hardware wont Michael> be able to distinguish between parent and child. Michael> Does this make sense? This is true, but there are a number of pieces that are required before fork will work for processes using userspace verbs. One of the ingredients that's missing is adding something like PROT_DONTCOPY for mprotect(). Once that's in place, an app can use that on the doorbell page before forking. I don't consider this attack by children of a process very serious, since a process can always fork, munmap the doorbell page in the child process, and then fork the untrusted child into yet another child. - R. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 06/16] IB uverbs: memory pinning implementation 2005-06-28 23:03 ` [PATCH 06/16] IB uverbs: memory pinning implementation Roland Dreier 2005-06-28 23:03 ` [PATCH 07/16] IB uverbs: hook up Kconfig/Makefile Roland Dreier @ 2005-06-29 0:02 ` Andrew Morton 2005-06-29 16:06 ` Roland Dreier 1 sibling, 1 reply; 37+ messages in thread From: Andrew Morton @ 2005-06-29 0:02 UTC (permalink / raw) To: Roland Dreier; +Cc: linux-kernel, openib-general Roland Dreier <rolandd@cisco.com> wrote: > > Add support for pinning userspace memory regions and returning a list > of pages in the region. This includes tracking pinned memory against > vm_locked and preventing unprivileged users from exceeding RLIMIT_MEMLOCK. > Can you tell us a bit more about the design ideas here? What's it doing, how and why? We should look at these things and also decide whether some of this should live in mm/*. > +int ib_umem_get(struct ib_device *dev, struct ib_umem *mem, > + void *addr, size_t size, int write) > +{ > ... > + if (!can_do_mlock()) > + return -EPERM; > + > ... > + if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) { The capable() test is redundant. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 06/16] IB uverbs: memory pinning implementation 2005-06-29 0:02 ` [PATCH 06/16] IB uverbs: memory pinning implementation Andrew Morton @ 2005-06-29 16:06 ` Roland Dreier 0 siblings, 0 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-29 16:06 UTC (permalink / raw) To: Andrew Morton; +Cc: linux-kernel, openib-general Roland> Add support for pinning userspace memory regions and Roland> returning a list of pages in the region. This includes Roland> tracking pinned memory against vm_locked and preventing Roland> unprivileged users from exceeding RLIMIT_MEMLOCK. Andrew> Can you tell us a bit more about the design ideas here? Andrew> What's it doing, how and why? The idea is that allowing userspace to handle initiating IO directly requires the kernel to make sure the memory targeted by that IO is kept pinned. This is done by requiring userspace to register the memory regions it will use for IO in advance. The code in uverbs_mem.c helps handle this by providing a function ib_umem_get(), which wraps up calling get_user_pages() and dma_map_sg() for a given piece of userspace address space, and returns a data structure with DMA addresses for region. Since userspace can potentially register huge chunks of memory, the code breaks up the calls to get_user_pages() and dma_map_sg() into chunks, and the umem data structure has a linked list of these chunks. Andrew> We should look at these things and also decide whether Andrew> some of this should live in mm/*. I thought about that a little while I was writing the code. The only thing that seemed generic enough was the logic for vm_locked accounting and checking against RLIMIT_MEMLOCK. I wasn't smart enough to come up with a way to encapsulate it that seemed any easier to read or maintain than just spelling the logic out. iWARP (basically RDMA over TCP) will also want to use the memory pinning code here, but I think the best plan for handling iWARP is to evolve drivers/infiniband into a more generic drivers/rdma -- in which case, this code is fine where it is. So... no objection to making it generic or putting it somewhere else, but there's not anything deep going on here. - R. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-28 23:03 ` [PATCH 05/16] IB uverbs: core implementation Roland Dreier 2005-06-28 23:03 ` [PATCH 06/16] IB uverbs: memory pinning implementation Roland Dreier @ 2005-06-29 0:27 ` Greg KH 2005-06-29 1:38 ` [openib-general] " Tom Duffy ` (4 more replies) 1 sibling, 5 replies; 37+ messages in thread From: Greg KH @ 2005-06-29 0:27 UTC (permalink / raw) To: Roland Dreier; +Cc: akpm, linux-kernel, openib-general On Tue, Jun 28, 2005 at 04:03:43PM -0700, Roland Dreier wrote: > +++ linux/drivers/infiniband/core/uverbs_main.c 2005-06-28 15:20:04.363963991 -0700 > @@ -0,0 +1,708 @@ > +/* > + * Copyright (c) 2005 Topspin Communications. All rights reserved. > + * Copyright (c) 2005 Cisco Systems. All rights reserved. > + * > + * This software is available to you under a choice of one of two > + * licenses. You may choose to be licensed under the terms of the GNU > + * General Public License (GPL) Version 2, available from the file > + * COPYING in the main directory of this source tree, or the > + * OpenIB.org BSD license below: Ok, I've complained about this before, but due to the fact that you are calling EXPORT_SYMBOL_GPL() only functions in this code, the ability for it for someone to use the BSD license on it in the future, is pretty much impossible, right? Wasn't the openib group going to drop this horrible license, or are they still insisting on porting this to other operating systems? > +static ssize_t show_dev(struct class_device *class_dev, char *buf) > +{ > + struct ib_uverbs_device *dev = > + container_of(class_dev, struct ib_uverbs_device, class_dev); > + > + return print_dev_t(buf, dev->dev.dev); > +} > +static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); This is no longer needed with the class device interface in the kernel today. Please use the new api (basically just set dev_t in the class_device, and you get this for free.) thanks, greg k-h ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [openib-general] Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-29 0:27 ` [PATCH 05/16] IB uverbs: core implementation Greg KH @ 2005-06-29 1:38 ` Tom Duffy 2005-06-29 4:13 ` Troy Benjegerdes ` (3 subsequent siblings) 4 siblings, 0 replies; 37+ messages in thread From: Tom Duffy @ 2005-06-29 1:38 UTC (permalink / raw) To: Greg KH; +Cc: Roland Dreier, akpm, linux-kernel, openib-general [-- Attachment #1: Type: text/plain, Size: 1224 bytes --] On Tue, 2005-06-28 at 17:27 -0700, Greg KH wrote: > Ok, I've complained about this before, but due to the fact that you are > calling EXPORT_SYMBOL_GPL() only functions in this code, the ability for > it for someone to use the BSD license on it in the future, is pretty > much impossible, right? No, only to call these functions from BSD-only (or other licensed) modules. > Wasn't the openib group going to drop this horrible license, or are they > still insisting on porting this to other operating systems? I don't think we need to drop this license. What is the harm? At some point, Sun may want OpenSolaris to use OpenIB. Or what if the Darwin folks decide to create a port? Don't worry: the OpenIB Windows work is done in a completely different repository with a completely different code base because Microsoft was scared of code that ever *was* GPL, even if a BSD-only fork was created. The bylaws of OpenIB.org require that all code hosted and developed under our auspices be (at least) BSD. I don't want it to happen, but if the code in Linux chooses one license (GPL) and not both, then we won't be able to accept patches back that come in through the mainline kernel. -tduffy [-- Attachment #2: This is a digitally signed message part --] [-- Type: application/pgp-signature, Size: 189 bytes --] ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [openib-general] Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-29 0:27 ` [PATCH 05/16] IB uverbs: core implementation Greg KH 2005-06-29 1:38 ` [openib-general] " Tom Duffy @ 2005-06-29 4:13 ` Troy Benjegerdes 2005-06-29 16:12 ` Greg KH 2005-06-29 16:06 ` Roland Dreier ` (2 subsequent siblings) 4 siblings, 1 reply; 37+ messages in thread From: Troy Benjegerdes @ 2005-06-29 4:13 UTC (permalink / raw) To: Greg KH; +Cc: Roland Dreier, akpm, linux-kernel, openib-general On Tue, Jun 28, 2005 at 05:27:09PM -0700, Greg KH wrote: > On Tue, Jun 28, 2005 at 04:03:43PM -0700, Roland Dreier wrote: > > +++ linux/drivers/infiniband/core/uverbs_main.c 2005-06-28 15:20:04.363963991 -0700 > > @@ -0,0 +1,708 @@ > > +/* > > + * Copyright (c) 2005 Topspin Communications. All rights reserved. > > + * Copyright (c) 2005 Cisco Systems. All rights reserved. > > + * > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > Ok, I've complained about this before, but due to the fact that you are > calling EXPORT_SYMBOL_GPL() only functions in this code, the ability for > it for someone to use the BSD license on it in the future, is pretty > much impossible, right? Only if someone tries to use it under a BSD license, strips off the GPL notices, and then builds it against *Linux*. If linux-kernel is going to be that fascist about licensing, let's please clean up all the binary firmware blobs in header files first. It seems reasonable to me that distribution and modification of the *source code* can be under either license. But as soon as you build a binary agaist the linux kernel, the binary is irrevocably GPL licensed. -------------------------------------------------------------------------- Troy Benjegerdes 'da hozer' hozer@hozed.org Somone asked my why I work on this free (http://www.fsf.org/philosophy/) software stuff and not get a real job. Charles Shultz had the best answer: "Why do musicians compose symphonies and poets write poems? They do it because life wouldn't have any meaning for them if they didn't. That's why I draw cartoons. It's my life." -- Charles Shultz ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [openib-general] Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-29 4:13 ` Troy Benjegerdes @ 2005-06-29 16:12 ` Greg KH 2005-06-29 16:32 ` Troy Benjegerdes 0 siblings, 1 reply; 37+ messages in thread From: Greg KH @ 2005-06-29 16:12 UTC (permalink / raw) To: Troy Benjegerdes; +Cc: Roland Dreier, akpm, linux-kernel, openib-general On Tue, Jun 28, 2005 at 11:13:22PM -0500, Troy Benjegerdes wrote: > On Tue, Jun 28, 2005 at 05:27:09PM -0700, Greg KH wrote: > > On Tue, Jun 28, 2005 at 04:03:43PM -0700, Roland Dreier wrote: > > > +++ linux/drivers/infiniband/core/uverbs_main.c 2005-06-28 15:20:04.363963991 -0700 > > > @@ -0,0 +1,708 @@ > > > +/* > > > + * Copyright (c) 2005 Topspin Communications. All rights reserved. > > > + * Copyright (c) 2005 Cisco Systems. All rights reserved. > > > + * > > > + * This software is available to you under a choice of one of two > > > + * licenses. You may choose to be licensed under the terms of the GNU > > > + * General Public License (GPL) Version 2, available from the file > > > + * COPYING in the main directory of this source tree, or the > > > + * OpenIB.org BSD license below: > > > > Ok, I've complained about this before, but due to the fact that you are > > calling EXPORT_SYMBOL_GPL() only functions in this code, the ability for > > it for someone to use the BSD license on it in the future, is pretty > > much impossible, right? > > Only if someone tries to use it under a BSD license, strips off the GPL > notices, and then builds it against *Linux*. Exactly, that's my point. It's pretty useless, and if you are going to build this code for another OS, well, that's going to be a tough job :) > If linux-kernel is going to be that fascist about licensing, let's > please clean up all the binary firmware blobs in header files first. I'm not being "fascist", I'm just saying it's pretty pointless to try to dual license this code, that's all. thanks, greg k-h ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [openib-general] Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-29 16:12 ` Greg KH @ 2005-06-29 16:32 ` Troy Benjegerdes 0 siblings, 0 replies; 37+ messages in thread From: Troy Benjegerdes @ 2005-06-29 16:32 UTC (permalink / raw) To: Greg KH; +Cc: Roland Dreier, akpm, linux-kernel, openib-general On Wed, Jun 29, 2005 at 09:12:09AM -0700, Greg KH wrote: > On Tue, Jun 28, 2005 at 11:13:22PM -0500, Troy Benjegerdes wrote: > > On Tue, Jun 28, 2005 at 05:27:09PM -0700, Greg KH wrote: > > > On Tue, Jun 28, 2005 at 04:03:43PM -0700, Roland Dreier wrote: > > > > +++ linux/drivers/infiniband/core/uverbs_main.c 2005-06-28 15:20:04.363963991 -0700 > > > > @@ -0,0 +1,708 @@ > > > > +/* > > > > + * Copyright (c) 2005 Topspin Communications. All rights reserved. > > > > + * Copyright (c) 2005 Cisco Systems. All rights reserved. > > > > + * > > > > + * This software is available to you under a choice of one of two > > > > + * licenses. You may choose to be licensed under the terms of the GNU > > > > + * General Public License (GPL) Version 2, available from the file > > > > + * COPYING in the main directory of this source tree, or the > > > > + * OpenIB.org BSD license below: > > > > > > Ok, I've complained about this before, but due to the fact that you are > > > calling EXPORT_SYMBOL_GPL() only functions in this code, the ability for > > > it for someone to use the BSD license on it in the future, is pretty > > > much impossible, right? > > > > Only if someone tries to use it under a BSD license, strips off the GPL > > notices, and then builds it against *Linux*. > > Exactly, that's my point. It's pretty useless, and if you are going to > build this code for another OS, well, that's going to be a tough job :) > > > If linux-kernel is going to be that fascist about licensing, let's > > please clean up all the binary firmware blobs in header files first. > > I'm not being "fascist", I'm just saying it's pretty pointless to try to > dual license this code, that's all. Ahh.. I think the point of the dual-license is that there is a lot of non linux-specific Infiniband code that will (hopefully) be usefull on other platforms where a BSD license might be more usefull. If for some reason I decided I wanted to run MacOSX, I would at least want to be running the OpenIB infiniband stack, and not some proprietary module. Does anyone have some nice scripts to audit for useage of EXPORT_SYMBOL_GPL only functions? Maybe it's worth trying to clean up the code to clearly deliniate what depends on GPL functions and what doesn't. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-29 0:27 ` [PATCH 05/16] IB uverbs: core implementation Greg KH 2005-06-29 1:38 ` [openib-general] " Tom Duffy 2005-06-29 4:13 ` Troy Benjegerdes @ 2005-06-29 16:06 ` Roland Dreier 2005-06-29 17:01 ` Roland Dreier 2005-06-30 3:13 ` [openib-general] " Ronald G. Minnich 4 siblings, 0 replies; 37+ messages in thread From: Roland Dreier @ 2005-06-29 16:06 UTC (permalink / raw) To: Greg KH; +Cc: akpm, linux-kernel, openib-general Greg> This is no longer needed with the class device interface in Greg> the kernel today. Please use the new api (basically just Greg> set dev_t in the class_device, and you get this for free.) Thanks, I've killed that code and just set class_dev.devt instead. - R. ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-29 0:27 ` [PATCH 05/16] IB uverbs: core implementation Greg KH ` (2 preceding siblings ...) 2005-06-29 16:06 ` Roland Dreier @ 2005-06-29 17:01 ` Roland Dreier 2005-06-29 18:03 ` Greg KH 2005-06-30 3:13 ` [openib-general] " Ronald G. Minnich 4 siblings, 1 reply; 37+ messages in thread From: Roland Dreier @ 2005-06-29 17:01 UTC (permalink / raw) To: akpm; +Cc: Greg KH, linux-kernel, openib-general Greg> This is no longer needed with the class device interface in Greg> the kernel today. Please use the new api (basically just Greg> set dev_t in the class_device, and you get this for free.) Here's a patch that applies on top of this patch set that fixes this: Greg KH pointed out that with the new class device code, we can just set class_dev.devt instead of having our own show_dev() function. Signed-off-by: Roland Dreier <rolandd@cisco.com> --- linux.orig/drivers/infiniband/core/uverbs_main.c 2005-06-28 15:20:04.000000000 -0700 +++ linux/drivers/infiniband/core/uverbs_main.c 2005-06-29 09:54:26.560138283 -0700 @@ -509,15 +509,6 @@ .remove = ib_uverbs_remove_one }; -static ssize_t show_dev(struct class_device *class_dev, char *buf) -{ - struct ib_uverbs_device *dev = - container_of(class_dev, struct ib_uverbs_device, class_dev); - - return print_dev_t(buf, dev->dev.dev); -} -static CLASS_DEVICE_ATTR(dev, S_IRUGO, show_dev, NULL); - static ssize_t show_ibdev(struct class_device *class_dev, char *buf) { struct ib_uverbs_device *dev = @@ -584,12 +575,11 @@ uverbs_dev->class_dev.class = &uverbs_class; uverbs_dev->class_dev.dev = device->dma_device; + uverbs_dev->class_dev.devt = uverbs_dev->dev.dev; snprintf(uverbs_dev->class_dev.class_id, BUS_ID_SIZE, "uverbs%d", uverbs_dev->devnum); if (class_device_register(&uverbs_dev->class_dev)) goto err_cdev; - if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_dev)) - goto err_class; if (class_device_create_file(&uverbs_dev->class_dev, &class_device_attr_ibdev)) goto err_class; ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-29 17:01 ` Roland Dreier @ 2005-06-29 18:03 ` Greg KH 0 siblings, 0 replies; 37+ messages in thread From: Greg KH @ 2005-06-29 18:03 UTC (permalink / raw) To: Roland Dreier; +Cc: akpm, linux-kernel, openib-general On Wed, Jun 29, 2005 at 10:01:53AM -0700, Roland Dreier wrote: > Greg> This is no longer needed with the class device interface in > Greg> the kernel today. Please use the new api (basically just > Greg> set dev_t in the class_device, and you get this for free.) > > Here's a patch that applies on top of this patch set that fixes this: > > > Greg KH pointed out that with the new class device code, we can just > set class_dev.devt instead of having our own show_dev() function. > > Signed-off-by: Roland Dreier <rolandd@cisco.com> Nice, thanks for doing this. You also get better userspace support as now the MAJOR and MINOR environment variables are set for the hotplug event when you create this device. Which, if Kay is correct, will make udev even faster... thanks, greg k-h ^ permalink raw reply [flat|nested] 37+ messages in thread
* Re: [openib-general] Re: [PATCH 05/16] IB uverbs: core implementation 2005-06-29 0:27 ` [PATCH 05/16] IB uverbs: core implementation Greg KH ` (3 preceding siblings ...) 2005-06-29 17:01 ` Roland Dreier @ 2005-06-30 3:13 ` Ronald G. Minnich 4 siblings, 0 replies; 37+ messages in thread From: Ronald G. Minnich @ 2005-06-30 3:13 UTC (permalink / raw) To: Greg KH; +Cc: Roland Dreier, akpm, linux-kernel, openib-general On Tue, 28 Jun 2005, Greg KH wrote: > On Tue, Jun 28, 2005 at 04:03:43PM -0700, Roland Dreier wrote: > > +++ linux/drivers/infiniband/core/uverbs_main.c 2005-06-28 15:20:04.363963991 -0700 > > @@ -0,0 +1,708 @@ > > +/* > > + * Copyright (c) 2005 Topspin Communications. All rights reserved. > > + * Copyright (c) 2005 Cisco Systems. All rights reserved. > > + * > > + * This software is available to you under a choice of one of two > > + * licenses. You may choose to be licensed under the terms of the GNU > > + * General Public License (GPL) Version 2, available from the file > > + * COPYING in the main directory of this source tree, or the > > + * OpenIB.org BSD license below: > > Ok, I've complained about this before, but due to the fact that you are > calling EXPORT_SYMBOL_GPL() only functions in this code, the ability for > it for someone to use the BSD license on it in the future, is pretty > much impossible, right? This does seem odd. If the goal is kernel inclusion, and the kernel is GPL, seems like this license boilerplate should now change. It makes no real sense otherwise, as far as I can tell. ron ^ permalink raw reply [flat|nested] 37+ messages in thread
end of thread, other threads:[~2005-07-05 22:12 UTC | newest] Thread overview: 37+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2005-06-28 23:03 [PATCH 00/16] Add InfiniBand userspace verbs (direct userspace access) Roland Dreier 2005-06-28 23:03 ` [PATCH 01/16] IB uverbs: core API extensions Roland Dreier 2005-06-28 23:03 ` [PATCH 02/16] IB uverbs: update kernel midlayer for new API Roland Dreier 2005-06-28 23:03 ` [PATCH 03/16] IB uverbs: update mthca " Roland Dreier 2005-06-28 23:03 ` [PATCH 04/16] IB uverbs: add user verbs ABI header Roland Dreier 2005-06-28 23:03 ` [PATCH 05/16] IB uverbs: core implementation Roland Dreier 2005-06-28 23:03 ` [PATCH 06/16] IB uverbs: memory pinning implementation Roland Dreier 2005-06-28 23:03 ` [PATCH 07/16] IB uverbs: hook up Kconfig/Makefile Roland Dreier 2005-06-28 23:03 ` [PATCH 08/16] IB uverbs: add mthca ABI header Roland Dreier 2005-06-28 23:03 ` [PATCH 09/16] IB uverbs: add mthca user doorbell record support Roland Dreier 2005-06-28 23:03 ` [PATCH 10/16] IB uverbs: add mthca user context support Roland Dreier 2005-06-28 23:03 ` [PATCH 11/16] IB uverbs: add mthca mmap support Roland Dreier 2005-06-28 23:03 ` [PATCH 12/16] IB uverbs: add mthca user PD support Roland Dreier 2005-06-28 23:03 ` [PATCH 13/16] IB uverbs: add mthca user MR support Roland Dreier 2005-06-28 23:03 ` [PATCH 14/16] IB uverbs: add mthca user CQ support Roland Dreier 2005-06-28 23:03 ` [PATCH 15/16] IB uverbs: add mthca user QP support Roland Dreier 2005-06-28 23:03 ` [PATCH 16/16] IB uverbs: add documentation file Roland Dreier 2005-06-29 0:10 ` [PATCH 14/16] IB uverbs: add mthca user CQ support Andrew Morton 2005-06-29 16:06 ` Roland Dreier 2005-06-29 0:07 ` [PATCH 12/16] IB uverbs: add mthca user PD support Andrew Morton 2005-06-29 16:06 ` Roland Dreier 2005-06-29 0:05 ` [PATCH 11/16] IB uverbs: add mthca mmap support Andrew Morton 2005-06-29 16:06 ` Roland Dreier 2005-07-05 19:20 ` Roland Dreier 2005-07-05 20:53 ` Michael S. Tsirkin 2005-07-05 22:07 ` Roland Dreier 2005-06-29 0:02 ` [PATCH 06/16] IB uverbs: memory pinning implementation Andrew Morton 2005-06-29 16:06 ` Roland Dreier 2005-06-29 0:27 ` [PATCH 05/16] IB uverbs: core implementation Greg KH 2005-06-29 1:38 ` [openib-general] " Tom Duffy 2005-06-29 4:13 ` Troy Benjegerdes 2005-06-29 16:12 ` Greg KH 2005-06-29 16:32 ` Troy Benjegerdes 2005-06-29 16:06 ` Roland Dreier 2005-06-29 17:01 ` Roland Dreier 2005-06-29 18:03 ` Greg KH 2005-06-30 3:13 ` [openib-general] " Ronald G. Minnich
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox