From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ville =?iso-8859-1?Q?Syrj=E4l=E4?= Subject: Re: [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Date: Fri, 2 May 2014 20:12:42 +0300 Message-ID: <20140502171241.GU18465@intel.com> References: <1397806024-27336-1-git-send-email-chris@chris-wilson.co.uk> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mga11.intel.com (mga11.intel.com [192.55.52.93]) by gabe.freedesktop.org (Postfix) with ESMTP id 4BBEF6E551 for ; Fri, 2 May 2014 10:12:46 -0700 (PDT) Content-Disposition: inline In-Reply-To: <1397806024-27336-1-git-send-email-chris@chris-wilson.co.uk> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: Chris Wilson Cc: intel-gfx@lists.freedesktop.org, stable@vger.kernel.org List-Id: intel-gfx@lists.freedesktop.org On Fri, Apr 18, 2014 at 08:27:03AM +0100, Chris Wilson wrote: > A single object may be referenced by multiple registers fundamentally > breaking the static allotment of ids in the current design. When the > object is used the second time, the physical address of the first > assignment is relinquished and a second one granted. However, the > hardware is still reading (and possibly writing) to the old physical > address now returned to the system. Eventually hilarity will ensue, but > in the short term, it just means that cursors are broken when using more > than one pipe. > = > Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=3D77351 > Signed-off-by: Chris Wilson > Cc: stable@vger.kernel.org > --- > drivers/gpu/drm/i915/i915_dma.c | 1 - > drivers/gpu/drm/i915/i915_drv.h | 24 +-- > drivers/gpu/drm/i915/i915_gem.c | 315 ++++++++++++++---------------= ------ > drivers/gpu/drm/i915/intel_display.c | 11 +- > drivers/gpu/drm/i915/intel_overlay.c | 12 +- > 5 files changed, 131 insertions(+), 232 deletions(-) > = > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_= dma.c > index d67ca8051e07..12571aac7516 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1854,7 +1854,6 @@ int i915_driver_unload(struct drm_device *dev) > flush_workqueue(dev_priv->wq); > = > mutex_lock(&dev->struct_mutex); > - i915_gem_free_all_phys_object(dev); > i915_gem_cleanup_ringbuffer(dev); > i915_gem_context_fini(dev); > WARN_ON(dev_priv->mm.aliasing_ppgtt); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_= drv.h > index 0e85bf2c3326..a2375a0ef27c 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -246,18 +246,6 @@ struct intel_ddi_plls { > #define WATCH_LISTS 0 > #define WATCH_GTT 0 > = > -#define I915_GEM_PHYS_CURSOR_0 1 > -#define I915_GEM_PHYS_CURSOR_1 2 > -#define I915_GEM_PHYS_OVERLAY_REGS 3 > -#define I915_MAX_PHYS_OBJECT (I915_GEM_PHYS_OVERLAY_REGS) > - > -struct drm_i915_gem_phys_object { > - int id; > - struct page **page_list; > - drm_dma_handle_t *handle; > - struct drm_i915_gem_object *cur_obj; > -}; > - > struct opregion_header; > struct opregion_acpi; > struct opregion_swsci; > @@ -1036,9 +1024,6 @@ struct i915_gem_mm { > /** Bit 6 swizzling required for Y tiling */ > uint32_t bit_6_swizzle_y; > = > - /* storage for physical objects */ > - struct drm_i915_gem_phys_object *phys_objs[I915_MAX_PHYS_OBJECT]; > - > /* accounting, useful for userland debugging */ > spinlock_t object_stat_lock; > size_t object_memory; > @@ -1647,7 +1632,7 @@ struct drm_i915_gem_object { > struct drm_file *pin_filp; > = > /** for phy allocated objects */ > - struct drm_i915_gem_phys_object *phys_obj; > + drm_dma_handle_t *phys_handle; > = > union { > struct i915_gem_userptr { > @@ -2250,13 +2235,8 @@ i915_gem_object_pin_to_display_plane(struct drm_i9= 15_gem_object *obj, > u32 alignment, > struct intel_ring_buffer *pipelined); > void i915_gem_object_unpin_from_display_plane(struct drm_i915_gem_object= *obj); > -int i915_gem_attach_phys_object(struct drm_device *dev, > - struct drm_i915_gem_object *obj, > - int id, > +int i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, > int align); > -void i915_gem_detach_phys_object(struct drm_device *dev, > - struct drm_i915_gem_object *obj); > -void i915_gem_free_all_phys_object(struct drm_device *dev); > int i915_gem_open(struct drm_device *dev, struct drm_file *file); > void i915_gem_release(struct drm_device *dev, struct drm_file *file); > = > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_= gem.c > index 764467ebade5..e302a23031fe 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -46,11 +46,6 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_obj= ect *obj, > static void > i915_gem_object_retire(struct drm_i915_gem_object *obj); > = > -static int i915_gem_phys_pwrite(struct drm_device *dev, > - struct drm_i915_gem_object *obj, > - struct drm_i915_gem_pwrite *args, > - struct drm_file *file); > - > static void i915_gem_write_fence(struct drm_device *dev, int reg, > struct drm_i915_gem_object *obj); > static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj, > @@ -212,6 +207,124 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev,= void *data, > return 0; > } > = > +static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) > +{ > + drm_dma_handle_t *phys =3D obj->phys_handle; > + > + if (!phys) > + return; > + > + if (obj->madv =3D=3D I915_MADV_WILLNEED) { > + struct address_space *mapping =3D file_inode(obj->base.filp)->i_mappin= g; > + char *vaddr =3D phys->vaddr; > + int i; > + > + for (i =3D 0; i < obj->base.size / PAGE_SIZE; i++) { > + struct page *page =3D shmem_read_mapping_page(mapping, i); > + if (!IS_ERR(page)) { > + char *dst =3D kmap_atomic(page); > + memcpy(dst, vaddr, PAGE_SIZE); > + kunmap_atomic(dst); > + > + drm_clflush_pages(&page, 1); Could clflush while already have it kmapped to avoid double kmap. > + > + set_page_dirty(page); > + mark_page_accessed(page); > + page_cache_release(page); > + } > + vaddr +=3D PAGE_SIZE; > + } > + i915_gem_chipset_flush(obj->base.dev); > + } > + > +#ifdef CONFIG_X86 > + set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); > +#endif > + drm_pci_free(obj->base.dev, phys); > + obj->phys_handle =3D NULL; > +} > + > +int > +i915_gem_object_attach_phys(struct drm_i915_gem_object *obj, > + int align) > +{ > + drm_dma_handle_t *phys; > + struct address_space *mapping; > + char *vaddr; > + int i; > + > + if (obj->phys_handle) { > + if ((unsigned long)obj->phys_handle->vaddr & (align -1)) > + return -EBUSY; > + > + return 0; > + } > + > + if (obj->madv !=3D I915_MADV_WILLNEED) > + return -EFAULT; > + > + if (obj->base.filp =3D=3D NULL) > + return -EINVAL; > + > + /* create a new object */ > + phys =3D drm_pci_alloc(obj->base.dev, obj->base.size, align); > + if (!phys) > + return -ENOMEM; > + > + vaddr =3D phys->vaddr; > +#ifdef CONFIG_X86 > + set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); > +#endif > + mapping =3D file_inode(obj->base.filp)->i_mapping; > + for (i =3D 0; i < obj->base.size / PAGE_SIZE; i++) { > + struct page *page; > + char *src; > + > + page =3D shmem_read_mapping_page(mapping, i); > + if (IS_ERR(page)) > + return PTR_ERR(page); This is going to leak. > + > + src =3D kmap_atomic(page); > + memcpy(vaddr, src, PAGE_SIZE); > + kunmap_atomic(src); > + > + mark_page_accessed(page); > + page_cache_release(page); > + > + vaddr +=3D PAGE_SIZE; > + } > + > + obj->phys_handle =3D phys; > + return 0; > +} > + > +static int > +i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, > + struct drm_i915_gem_pwrite *args, > + struct drm_file *file_priv) > +{ > + struct drm_device *dev =3D obj->base.dev; > + void *vaddr =3D obj->phys_handle->vaddr + args->offset; > + char __user *user_data =3D to_user_ptr(args->data_ptr); > + > + if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { > + unsigned long unwritten; > + > + /* The physical object once assigned is fixed for the lifetime > + * of the obj, so we can safely drop the lock and continue > + * to access vaddr. > + */ > + mutex_unlock(&dev->struct_mutex); > + unwritten =3D copy_from_user(vaddr, user_data, args->size); > + mutex_lock(&dev->struct_mutex); > + if (unwritten) > + return -EFAULT; I was wondering why we don't clflush here but then I realized the destination is either wc or uc. Apart from the potential leak the rest of patch looks OK to me. > + } > + > + i915_gem_chipset_flush(dev); > + return 0; > +} > + > void *i915_gem_object_alloc(struct drm_device *dev) > { > struct drm_i915_private *dev_priv =3D dev->dev_private; > @@ -1071,8 +1184,8 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void = *data, > * pread/pwrite currently are reading and writing from the CPU > * perspective, requiring manual detiling by the client. > */ > - if (obj->phys_obj) { > - ret =3D i915_gem_phys_pwrite(dev, obj, args, file); > + if (obj->phys_handle) { > + ret =3D i915_gem_phys_pwrite(obj, args, file); > goto out; > } > = > @@ -4376,9 +4489,6 @@ void i915_gem_free_object(struct drm_gem_object *ge= m_obj) > = > trace_i915_gem_object_destroy(obj); > = > - if (obj->phys_obj) > - i915_gem_detach_phys_object(dev, obj); > - > list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) { > int ret; > = > @@ -4408,6 +4518,7 @@ void i915_gem_free_object(struct drm_gem_object *ge= m_obj) > i915_gem_object_put_pages(obj); > i915_gem_object_free_mmap_offset(obj); > i915_gem_object_release_stolen(obj); > + i915_gem_object_detach_phys(obj); > = > BUG_ON(obj->pages); > = > @@ -4875,190 +4986,6 @@ i915_gem_load(struct drm_device *dev) > register_shrinker(&dev_priv->mm.shrinker); > } > = > -/* > - * Create a physically contiguous memory object for this object > - * e.g. for cursor + overlay regs > - */ > -static int i915_gem_init_phys_object(struct drm_device *dev, > - int id, int size, int align) > -{ > - struct drm_i915_private *dev_priv =3D dev->dev_private; > - struct drm_i915_gem_phys_object *phys_obj; > - int ret; > - > - if (dev_priv->mm.phys_objs[id - 1] || !size) > - return 0; > - > - phys_obj =3D kzalloc(sizeof(*phys_obj), GFP_KERNEL); > - if (!phys_obj) > - return -ENOMEM; > - > - phys_obj->id =3D id; > - > - phys_obj->handle =3D drm_pci_alloc(dev, size, align); > - if (!phys_obj->handle) { > - ret =3D -ENOMEM; > - goto kfree_obj; > - } > -#ifdef CONFIG_X86 > - set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle-= >size / PAGE_SIZE); > -#endif > - > - dev_priv->mm.phys_objs[id - 1] =3D phys_obj; > - > - return 0; > -kfree_obj: > - kfree(phys_obj); > - return ret; > -} > - > -static void i915_gem_free_phys_object(struct drm_device *dev, int id) > -{ > - struct drm_i915_private *dev_priv =3D dev->dev_private; > - struct drm_i915_gem_phys_object *phys_obj; > - > - if (!dev_priv->mm.phys_objs[id - 1]) > - return; > - > - phys_obj =3D dev_priv->mm.phys_objs[id - 1]; > - if (phys_obj->cur_obj) { > - i915_gem_detach_phys_object(dev, phys_obj->cur_obj); > - } > - > -#ifdef CONFIG_X86 > - set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle-= >size / PAGE_SIZE); > -#endif > - drm_pci_free(dev, phys_obj->handle); > - kfree(phys_obj); > - dev_priv->mm.phys_objs[id - 1] =3D NULL; > -} > - > -void i915_gem_free_all_phys_object(struct drm_device *dev) > -{ > - int i; > - > - for (i =3D I915_GEM_PHYS_CURSOR_0; i <=3D I915_MAX_PHYS_OBJECT; i++) > - i915_gem_free_phys_object(dev, i); > -} > - > -void i915_gem_detach_phys_object(struct drm_device *dev, > - struct drm_i915_gem_object *obj) > -{ > - struct address_space *mapping =3D file_inode(obj->base.filp)->i_mapping; > - char *vaddr; > - int i; > - int page_count; > - > - if (!obj->phys_obj) > - return; > - vaddr =3D obj->phys_obj->handle->vaddr; > - > - page_count =3D obj->base.size / PAGE_SIZE; > - for (i =3D 0; i < page_count; i++) { > - struct page *page =3D shmem_read_mapping_page(mapping, i); > - if (!IS_ERR(page)) { > - char *dst =3D kmap_atomic(page); > - memcpy(dst, vaddr + i*PAGE_SIZE, PAGE_SIZE); > - kunmap_atomic(dst); > - > - drm_clflush_pages(&page, 1); > - > - set_page_dirty(page); > - mark_page_accessed(page); > - page_cache_release(page); > - } > - } > - i915_gem_chipset_flush(dev); > - > - obj->phys_obj->cur_obj =3D NULL; > - obj->phys_obj =3D NULL; > -} > - > -int > -i915_gem_attach_phys_object(struct drm_device *dev, > - struct drm_i915_gem_object *obj, > - int id, > - int align) > -{ > - struct address_space *mapping =3D file_inode(obj->base.filp)->i_mapping; > - struct drm_i915_private *dev_priv =3D dev->dev_private; > - int ret =3D 0; > - int page_count; > - int i; > - > - if (id > I915_MAX_PHYS_OBJECT) > - return -EINVAL; > - > - if (obj->phys_obj) { > - if (obj->phys_obj->id =3D=3D id) > - return 0; > - i915_gem_detach_phys_object(dev, obj); > - } > - > - /* create a new object */ > - if (!dev_priv->mm.phys_objs[id - 1]) { > - ret =3D i915_gem_init_phys_object(dev, id, > - obj->base.size, align); > - if (ret) { > - DRM_ERROR("failed to init phys object %d size: %zu\n", > - id, obj->base.size); > - return ret; > - } > - } > - > - /* bind to the object */ > - obj->phys_obj =3D dev_priv->mm.phys_objs[id - 1]; > - obj->phys_obj->cur_obj =3D obj; > - > - page_count =3D obj->base.size / PAGE_SIZE; > - > - for (i =3D 0; i < page_count; i++) { > - struct page *page; > - char *dst, *src; > - > - page =3D shmem_read_mapping_page(mapping, i); > - if (IS_ERR(page)) > - return PTR_ERR(page); > - > - src =3D kmap_atomic(page); > - dst =3D obj->phys_obj->handle->vaddr + (i * PAGE_SIZE); > - memcpy(dst, src, PAGE_SIZE); > - kunmap_atomic(src); > - > - mark_page_accessed(page); > - page_cache_release(page); > - } > - > - return 0; > -} > - > -static int > -i915_gem_phys_pwrite(struct drm_device *dev, > - struct drm_i915_gem_object *obj, > - struct drm_i915_gem_pwrite *args, > - struct drm_file *file_priv) > -{ > - void *vaddr =3D obj->phys_obj->handle->vaddr + args->offset; > - char __user *user_data =3D to_user_ptr(args->data_ptr); > - > - if (__copy_from_user_inatomic_nocache(vaddr, user_data, args->size)) { > - unsigned long unwritten; > - > - /* The physical object once assigned is fixed for the lifetime > - * of the obj, so we can safely drop the lock and continue > - * to access vaddr. > - */ > - mutex_unlock(&dev->struct_mutex); > - unwritten =3D copy_from_user(vaddr, user_data, args->size); > - mutex_lock(&dev->struct_mutex); > - if (unwritten) > - return -EFAULT; > - } > - > - i915_gem_chipset_flush(dev); > - return 0; > -} > - > void i915_gem_release(struct drm_device *dev, struct drm_file *file) > { > struct drm_i915_file_private *file_priv =3D file->driver_priv; > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/= intel_display.c > index 3d017f5b656a..61735fde3f12 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -7777,14 +7777,12 @@ static int intel_crtc_cursor_set(struct drm_crtc = *crtc, > addr =3D i915_gem_obj_ggtt_offset(obj); > } else { > int align =3D IS_I830(dev) ? 16 * 1024 : 256; > - ret =3D i915_gem_attach_phys_object(dev, obj, > - (intel_crtc->pipe =3D=3D 0) ? I915_GEM_PHYS_CURSOR_0 : I915_GEM_= PHYS_CURSOR_1, > - align); > + ret =3D i915_gem_object_attach_phys(obj, align); > if (ret) { > DRM_DEBUG_KMS("failed to attach phys object\n"); > goto fail_locked; > } > - addr =3D obj->phys_obj->handle->busaddr; > + addr =3D obj->phys_handle->busaddr; > } > = > if (IS_GEN2(dev)) > @@ -7792,10 +7790,7 @@ static int intel_crtc_cursor_set(struct drm_crtc *= crtc, > = > finish: > if (intel_crtc->cursor_bo) { > - if (INTEL_INFO(dev)->cursor_needs_physical) { > - if (intel_crtc->cursor_bo !=3D obj) > - i915_gem_detach_phys_object(dev, intel_crtc->cursor_bo); > - } else > + if (!INTEL_INFO(dev)->cursor_needs_physical) > i915_gem_object_unpin_from_display_plane(intel_crtc->cursor_bo); > drm_gem_object_unreference(&intel_crtc->cursor_bo->base); > } > diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/= intel_overlay.c > index 4232230a78bd..5423eb0c1335 100644 > --- a/drivers/gpu/drm/i915/intel_overlay.c > +++ b/drivers/gpu/drm/i915/intel_overlay.c > @@ -194,7 +194,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay) > struct overlay_registers __iomem *regs; > = > if (OVERLAY_NEEDS_PHYSICAL(overlay->dev)) > - regs =3D (struct overlay_registers __iomem *)overlay->reg_bo->phys_obj= ->handle->vaddr; > + regs =3D (struct overlay_registers __iomem *)overlay->reg_bo->phys_han= dle->vaddr; > else > regs =3D io_mapping_map_wc(dev_priv->gtt.mappable, > i915_gem_obj_ggtt_offset(overlay->reg_bo)); > @@ -1347,14 +1347,12 @@ void intel_setup_overlay(struct drm_device *dev) > overlay->reg_bo =3D reg_bo; > = > if (OVERLAY_NEEDS_PHYSICAL(dev)) { > - ret =3D i915_gem_attach_phys_object(dev, reg_bo, > - I915_GEM_PHYS_OVERLAY_REGS, > - PAGE_SIZE); > + ret =3D i915_gem_object_attach_phys(reg_bo, PAGE_SIZE); > if (ret) { > DRM_ERROR("failed to attach phys overlay regs\n"); > goto out_free_bo; > } > - overlay->flip_addr =3D reg_bo->phys_obj->handle->busaddr; > + overlay->flip_addr =3D reg_bo->phys_handle->busaddr; > } else { > ret =3D i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE); > if (ret) { > @@ -1436,7 +1434,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay = *overlay) > /* Cast to make sparse happy, but it's wc memory anyway, so > * equivalent to the wc io mapping on X86. */ > regs =3D (struct overlay_registers __iomem *) > - overlay->reg_bo->phys_obj->handle->vaddr; > + overlay->reg_bo->phys_handle->vaddr; > else > regs =3D io_mapping_map_atomic_wc(dev_priv->gtt.mappable, > i915_gem_obj_ggtt_offset(overlay->reg_bo)); > @@ -1470,7 +1468,7 @@ intel_overlay_capture_error_state(struct drm_device= *dev) > error->dovsta =3D I915_READ(DOVSTA); > error->isr =3D I915_READ(ISR); > if (OVERLAY_NEEDS_PHYSICAL(overlay->dev)) > - error->base =3D (__force long)overlay->reg_bo->phys_obj->handle->vaddr; > + error->base =3D (__force long)overlay->reg_bo->phys_handle->vaddr; > else > error->base =3D i915_gem_obj_ggtt_offset(overlay->reg_bo); > = > -- = > 1.9.2 > = > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- = Ville Syrj=E4l=E4 Intel OTC