From mboxrd@z Thu Jan 1 00:00:00 1970 From: Tvrtko Ursulin Subject: Re: [RFC] libdrm_intel: Add support for userptr objects Date: Fri, 02 May 2014 11:27:45 +0100 Message-ID: <53637321.2060901@linux.intel.com> References: <1393432901-31951-1-git-send-email-tvrtko.ursulin@linux.intel.com> <20140501184749.GB2624@bwidawsk.net> Mime-Version: 1.0 Content-Type: text/plain; charset="us-ascii"; Format="flowed" Content-Transfer-Encoding: 7bit Return-path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by gabe.freedesktop.org (Postfix) with ESMTP id 57A4589F77 for ; Fri, 2 May 2014 03:27:48 -0700 (PDT) In-Reply-To: <20140501184749.GB2624@bwidawsk.net> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: Ben Widawsky Cc: Intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On 05/01/2014 07:47 PM, Ben Widawsky wrote: > On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote: >> From: Tvrtko Ursulin >> >> Allow userptr objects to be created and used via libdrm_intel. >> >> At the moment tiling and mapping to GTT aperture is not supported >> due hardware limitations across different generations and uncertainty >> about its usefulness. >> >> Signed-off-by: Tvrtko Ursulin >> --- >> include/drm/i915_drm.h | 16 +++++ >> intel/intel_bufmgr.c | 13 ++++ >> intel/intel_bufmgr.h | 5 ++ >> intel/intel_bufmgr_gem.c | 154 +++++++++++++++++++++++++++++++++++++++++++++- >> intel/intel_bufmgr_priv.h | 12 +++- >> 5 files changed, 198 insertions(+), 2 deletions(-) >> >> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h >> index 2f4eb8c..d32ef99 100644 >> --- a/include/drm/i915_drm.h >> +++ b/include/drm/i915_drm.h >> @@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea { >> #define DRM_I915_GEM_GET_CACHING 0x30 >> #define DRM_I915_REG_READ 0x31 >> #define DRM_I915_GET_RESET_STATS 0x32 >> +#define DRM_I915_GEM_USERPTR 0x34 >> >> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t) >> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) >> @@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea { >> #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy) >> #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read) >> #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats) >> +#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr) >> >> /* Allow drivers to submit batchbuffers directly to hardware, relying >> * on the security mechanisms provided by hardware. >> @@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt { >> __u64 offset; >> }; >> >> +struct drm_i915_gem_userptr { >> + __u64 user_ptr; >> + __u64 user_size; > > Adding alignment might be a safe bet. Hmmmm, at first I thought you are raising a good point. But then I don't understand why I don't see any aligned types in linux/include/uapi/drm/i915_drm.h ? >> + __u32 flags; >> +#define I915_USERPTR_READ_ONLY 0x1 > > I'll eventually want something like: > #define I915_MIRRORED_GVA 0x2 /* Request the same GPU address */ Plenty of free flags. :) >> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000 >> + /** >> + * Returned handle for the object. >> + * >> + * Object handles are nonzero. >> + */ >> + __u32 handle; >> +}; >> + >> struct drm_i915_gem_set_domain { >> /** Handle for the object */ >> __u32 handle; >> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c >> index 905556f..7f3d795 100644 >> --- a/intel/intel_bufmgr.c >> +++ b/intel/intel_bufmgr.c >> @@ -60,6 +60,19 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, >> return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment); >> } >> >> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, >> + const char *name, void *addr, >> + uint32_t tiling_mode, >> + uint32_t stride, >> + unsigned long size, >> + unsigned long flags) >> +{ >> + if (bufmgr->bo_alloc_userptr) >> + return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode, >> + stride, size, flags); >> + return NULL; >> +} >> + >> drm_intel_bo * >> drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, >> int x, int y, int cpp, uint32_t *tiling_mode, >> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h >> index 9383c72..be83a56 100644 >> --- a/intel/intel_bufmgr.h >> +++ b/intel/intel_bufmgr.h >> @@ -113,6 +113,11 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, >> const char *name, >> unsigned long size, >> unsigned int alignment); >> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, >> + const char *name, >> + void *addr, uint32_t tiling_mode, >> + uint32_t stride, unsigned long size, >> + unsigned long flags); >> drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, >> const char *name, >> int x, int y, int cpp, >> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c >> index 007a6d8..7cad945 100644 >> --- a/intel/intel_bufmgr_gem.c >> +++ b/intel/intel_bufmgr_gem.c >> @@ -182,6 +182,11 @@ struct _drm_intel_bo_gem { >> void *mem_virtual; >> /** GTT virtual address for the buffer, saved across map/unmap cycles */ >> void *gtt_virtual; >> + /** >> + * Virtual address of the buffer allocated by user, used for userptr >> + * objects only. >> + */ >> + void *user_virtual; > > Can we not just re-use mem_virtual? Do you intend to provide the ability > to have two distinct CPU mappings to the same object? Haven't heard anything like that. By quick look at the code seems that reusing mem_virtual would require special casing some parts which touch it. So it may be it is actually cleaner as it is. >> int map_count; >> drmMMListHead vma_list; >> >> @@ -221,6 +226,11 @@ struct _drm_intel_bo_gem { >> bool idle; >> >> /** >> + * Boolean of whether this buffer was allocated with userptr >> + */ >> + bool is_userptr; >> + >> + /** >> * Size in bytes of this buffer and its relocation descendents. >> * >> * Used to avoid costly tree walking in >> @@ -847,6 +857,80 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, >> tiling, stride); >> } >> >> +static drm_intel_bo * >> +drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, >> + const char *name, >> + void *addr, >> + uint32_t tiling_mode, >> + uint32_t stride, >> + unsigned long size, >> + unsigned long flags) > > I'd vote for dropping tiling_mode, stride. Make size and flags > explicitly sized types. Not doing this has bitten us already. Some people expressed interest in tiling so I thought to preserve it in the API. Kernel even allows it, and it is just because Chris found bspec references saying it will break badly on some platforms, that I (or maybe it was both of us) decided to disable it on this level for time being. So I think it is just a matter of coming up with a blacklist and it would be possible to use it then. >> +{ >> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; >> + drm_intel_bo_gem *bo_gem; >> + int ret; >> + struct drm_i915_gem_userptr userptr; >> + >> + /* Tiling with userptr surfaces is not supported >> + * on all hardware so refuse it for time being. >> + */ >> + if (tiling_mode != I915_TILING_NONE) >> + return NULL; >> + >> + bo_gem = calloc(1, sizeof(*bo_gem)); >> + if (!bo_gem) >> + return NULL; >> + >> + bo_gem->bo.size = size; >> + >> + VG_CLEAR(userptr); >> + userptr.user_ptr = (__u64)((unsigned long)addr); > > How are we getting away with non page aligned addresses? I'd vote for > moving some of the memalign checks into here if all cases require page > aligned. Kernel will reject any unaligned address or size in the ioctl. >> + userptr.user_size = size; >> + userptr.flags = flags; >> + >> + ret = drmIoctl(bufmgr_gem->fd, >> + DRM_IOCTL_I915_GEM_USERPTR, >> + &userptr); >> + if (ret != 0) { >> + DBG("bo_create_userptr: " >> + "ioctl failed with user ptr %p size 0x%lx, " >> + "user flags 0x%lx\n", addr, size, flags); >> + free(bo_gem); >> + return NULL; >> + } >> + >> + bo_gem->gem_handle = userptr.handle; >> + bo_gem->bo.handle = bo_gem->gem_handle; >> + bo_gem->bo.bufmgr = bufmgr; >> + bo_gem->is_userptr = true; >> + bo_gem->bo.virtual = addr; >> + /* Save the address provided by user */ >> + bo_gem->user_virtual = addr; >> + bo_gem->tiling_mode = I915_TILING_NONE; >> + bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; >> + bo_gem->stride = 0; >> + >> + DRMINITLISTHEAD(&bo_gem->name_list); >> + DRMINITLISTHEAD(&bo_gem->vma_list); >> + >> + bo_gem->name = name; >> + atomic_set(&bo_gem->refcount, 1); >> + bo_gem->validate_index = -1; >> + bo_gem->reloc_tree_fences = 0; >> + bo_gem->used_as_reloc_target = false; >> + bo_gem->has_error = false; >> + bo_gem->reusable = false; > > TODO for someone: Some room to consolidate this with the other create > functions. > >> + >> + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); > > I'm looking at the code and trying to figure out how this is relevant. > It's not introduced by you directly. Just looking at the math it all > seems to kind of mix-up aperture and non-aperture usages. Just talking > to myself... > >> + >> + DBG("bo_create_userptr: " >> + "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", >> + addr, bo_gem->gem_handle, bo_gem->name, >> + size, stride, tiling_mode); >> + >> + return &bo_gem->bo; >> +} >> + >> /** >> * Returns a drm_intel_bo wrapping the given buffer object handle. >> * >> @@ -1173,6 +1257,12 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) >> struct drm_i915_gem_set_domain set_domain; >> int ret; >> >> + if (bo_gem->is_userptr) { >> + /* Return the same user ptr */ >> + bo->virtual = bo_gem->user_virtual; >> + return 0; >> + } >> + >> pthread_mutex_lock(&bufmgr_gem->lock); >> >> if (bo_gem->map_count++ == 0) >> @@ -1241,6 +1331,9 @@ map_gtt(drm_intel_bo *bo) >> drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; >> int ret; >> >> + if (bo_gem->is_userptr) >> + return -EINVAL; >> + >> if (bo_gem->map_count++ == 0) >> drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); >> >> @@ -1385,13 +1478,18 @@ int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) >> >> static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) >> { >> - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; >> + drm_intel_bufmgr_gem *bufmgr_gem; > > I'm missing why this hunk was changed. But, okay. Just to ensure NULL bo is handled nicely I guess. >> drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; >> int ret = 0; >> >> if (bo == NULL) >> return 0; >> >> + if (bo_gem->is_userptr) >> + return 0; >> + >> + bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; >> + >> pthread_mutex_lock(&bufmgr_gem->lock); >> >> if (bo_gem->map_count <= 0) { >> @@ -1449,6 +1547,9 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, >> struct drm_i915_gem_pwrite pwrite; >> int ret; >> >> + if (bo_gem->is_userptr) >> + return -EINVAL; >> + >> VG_CLEAR(pwrite); >> pwrite.handle = bo_gem->gem_handle; >> pwrite.offset = offset; >> @@ -1501,6 +1602,9 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, >> struct drm_i915_gem_pread pread; >> int ret; >> >> + if (bo_gem->is_userptr) >> + return -EINVAL; >> + >> VG_CLEAR(pread); >> pread.handle = bo_gem->gem_handle; >> pread.offset = offset; >> @@ -2460,6 +2564,12 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, >> drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; >> int ret; >> >> + /* Tiling with userptr surfaces is not supported >> + * on all hardware so refuse it for time being. >> + */ >> + if (bo_gem->is_userptr) >> + return -EINVAL; >> + >> /* Linear buffers have no stride. By ensuring that we only ever use >> * stride 0 with linear buffers, we simplify our code. >> */ >> @@ -3181,6 +3291,44 @@ drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, >> bo_gem->aub_annotation_count = count; >> } >> >> +static bool >> +has_userptr(int fd) >> +{ >> + int ret; >> + void *ptr; >> + long pgsz; >> + struct drm_i915_gem_userptr userptr; >> + struct drm_gem_close close_bo; >> + >> + pgsz = sysconf(_SC_PAGESIZE); >> + assert(ret > 0); >> + >> + ret = posix_memalign(&ptr, pgsz, pgsz); >> + assert(ret == 0); >> + >> + memset(&userptr, 0, sizeof(userptr)); >> + userptr.user_ptr = (__u64)(unsigned long)ptr; >> + userptr.user_size = pgsz; >> + >> +retry: >> + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); >> + if (ret) { >> + if (errno == ENODEV && userptr.flags == 0) { >> + userptr.flags = I915_USERPTR_UNSYNCHRONIZED; >> + goto retry; >> + } >> + free(ptr); >> + return false; >> + } >> + >> + close_bo.handle = userptr.handle; >> + ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo); >> + assert(ret == 0); >> + free(ptr); >> + >> + return true; >> +} > > I think a param for USERPTR is warranted. Or did we decide not to do > this already? I asked for it, but people with authority said no. It is all very weak, fragile and dangerous anyway - param or feature detect like the above. We would really need a proper feature detect mechanism, like text based in sysfs or something. >> + >> /** >> * Initializes the GEM buffer manager, which uses the kernel to allocate, map, >> * and manage map buffer objections. >> @@ -3273,6 +3421,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size) >> ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); >> bufmgr_gem->has_relaxed_fencing = ret == 0; >> >> + if (has_userptr(fd)) >> + bufmgr_gem->bufmgr.bo_alloc_userptr = >> + drm_intel_gem_bo_alloc_userptr; >> + >> gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; >> ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); >> bufmgr_gem->has_wait_timeout = ret == 0; >> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h >> index 2592d42..3aa1abb 100644 >> --- a/intel/intel_bufmgr_priv.h >> +++ b/intel/intel_bufmgr_priv.h >> @@ -60,7 +60,17 @@ struct _drm_intel_bufmgr { >> const char *name, >> unsigned long size, >> unsigned int alignment); >> - >> + /** >> + * Allocate a buffer object from an existing user accessible >> + * address malloc'd with the provided size. >> + * Alignment is used when mapping to the gtt. >> + * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED >> + */ >> + drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr, >> + const char *name, void *addr, >> + uint32_t tiling_mode, uint32_t stride, >> + unsigned long size, >> + unsigned long flags); >> /** >> * Allocate a tiled buffer object. >> * > > Probably don't need the special function pointer yet since I don't think > we can yet envision use cases which will require any kind of special > handling. A simple has_userptr in bufmgr_gem will probably suffice. But > I don't care too much either way. What do you mean? > I couldn't spot any real bugs... Cool, I am glad there is some interest for this. Regards, Tvrtko