From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ville =?iso-8859-1?Q?Syrj=E4l=E4?= Subject: Re: [PATCH 2/2] drm/i915: Make the physical object coherent with GTT Date: Fri, 2 May 2014 20:17:22 +0300 Message-ID: <20140502171722.GV18465@intel.com> References: <1397806024-27336-1-git-send-email-chris@chris-wilson.co.uk> <1397806024-27336-2-git-send-email-chris@chris-wilson.co.uk> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mga03.intel.com (mga03.intel.com [143.182.124.21]) by gabe.freedesktop.org (Postfix) with ESMTP id CE8B06E759 for ; Fri, 2 May 2014 10:17:26 -0700 (PDT) Content-Disposition: inline In-Reply-To: <1397806024-27336-2-git-send-email-chris@chris-wilson.co.uk> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: Chris Wilson Cc: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On Fri, Apr 18, 2014 at 08:27:04AM +0100, Chris Wilson wrote: > Currently objects for which the hardware needs a contiguous physical > address are allocated a shadow backing storage to satisfy the contraint. > This shadow buffer is not wired into the normal obj->pages and so the > physical object is incoherent with accesses via the GPU, GTT and CPU. By > setting up the appropriate scatter-gather table, we can allow userspace > to access the physical object via either a GTT mmaping of or by rendering > into the GEM bo. However, keeping the CPU mmap of the shmemfs backing > storage coherent with the contiguous shadow is not yet possible. > Fortuituously, CPU mmaps of objects requiring physical addresses are not > expected to be coherent anyway. > = > This allows the physical constraint of the GEM object to be transparent > to userspace and allow it to efficiently render into or update them via > the GTT and GPU. > = > Signed-off-by: Chris Wilson > --- > drivers/gpu/drm/i915/i915_dma.c | 3 + > drivers/gpu/drm/i915/i915_gem.c | 182 +++++++++++++++++++++++++++-------= ------ > include/uapi/drm/i915_drm.h | 1 + > 3 files changed, 129 insertions(+), 57 deletions(-) > = > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_= dma.c > index 12571aac7516..dd43fdc736ac 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1022,6 +1022,9 @@ static int i915_getparam(struct drm_device *dev, vo= id *data, > case I915_PARAM_CMD_PARSER_VERSION: > value =3D i915_cmd_parser_get_version(); > break; > + case I915_PARAM_HAS_COHERENT_PHYS_GTT: > + value =3D 1; > + break; > default: > DRM_DEBUG("Unknown parameter %d\n", param->param); > return -EINVAL; > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_= gem.c > index e302a23031fe..3eb5c07e1018 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -207,41 +207,129 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev= , void *data, > return 0; > } > = > -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj) > +static int > +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj) > { > - drm_dma_handle_t *phys =3D obj->phys_handle; > + struct address_space *mapping =3D file_inode(obj->base.filp)->i_mapping; > + char *vaddr =3D obj->phys_handle->vaddr; > + struct sg_table *st; > + struct scatterlist *sg; > + int i; > = > - if (!phys) > - return; > + if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj))) > + return -EINVAL; > + > + for (i =3D 0; i < obj->base.size / PAGE_SIZE; i++) { > + struct page *page; > + char *src; > + > + page =3D shmem_read_mapping_page(mapping, i); > + if (IS_ERR(page)) > + return PTR_ERR(page); > + > + src =3D kmap_atomic(page); > + memcpy(vaddr, src, PAGE_SIZE); > + kunmap_atomic(src); > + > + page_cache_release(page); > + vaddr +=3D PAGE_SIZE; > + } > + > + drm_clflush_virt_range(obj->phys_handle->vaddr, obj->base.size); No longer wc/uc so now we flush. > + i915_gem_chipset_flush(obj->base.dev); > + > + st =3D kmalloc(sizeof(*st), GFP_KERNEL); > + if (st =3D=3D NULL) > + return -ENOMEM; > + > + if (sg_alloc_table(st, 1, GFP_KERNEL)) { > + kfree(st); > + return -ENOMEM; > + } > + > + sg =3D st->sgl; > + sg->offset =3D 0; > + sg->length =3D obj->base.size; > + > + sg_dma_address(sg) =3D obj->phys_handle->busaddr; > + sg_dma_len(sg) =3D obj->base.size; > = > - if (obj->madv =3D=3D I915_MADV_WILLNEED) { > + obj->pages =3D st; > + obj->has_dma_mapping =3D true; > + return 0; > +} > + > +static void > +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj) > +{ > + int ret; > + > + BUG_ON(obj->madv =3D=3D __I915_MADV_PURGED); > + > + ret =3D i915_gem_object_set_to_cpu_domain(obj, true); > + if (ret) { > + /* In the event of a disaster, abandon all caches and > + * hope for the best. > + */ > + WARN_ON(ret !=3D -EIO); > + obj->base.read_domains =3D obj->base.write_domain =3D I915_GEM_DOMAIN_= CPU; > + } > + > + if (obj->madv =3D=3D I915_MADV_DONTNEED) > + obj->dirty =3D 0; > + > + if (obj->dirty) { > struct address_space *mapping =3D file_inode(obj->base.filp)->i_mappin= g; > - char *vaddr =3D phys->vaddr; > + char *vaddr =3D obj->phys_handle->vaddr; > int i; > = > for (i =3D 0; i < obj->base.size / PAGE_SIZE; i++) { > - struct page *page =3D shmem_read_mapping_page(mapping, i); > - if (!IS_ERR(page)) { > - char *dst =3D kmap_atomic(page); > - memcpy(dst, vaddr, PAGE_SIZE); > - kunmap_atomic(dst); > + struct page *page; > + char *dst; > + > + page =3D shmem_read_mapping_page(mapping, i); > + if (IS_ERR(page)) > + continue; > = > - drm_clflush_pages(&page, 1); > + dst =3D kmap_atomic(page); > + memcpy(dst, vaddr, PAGE_SIZE); > + kunmap_atomic(dst); > = > - set_page_dirty(page); > + set_page_dirty(page); > + if (obj->madv =3D=3D I915_MADV_WILLNEED) > mark_page_accessed(page); > - page_cache_release(page); > - } > + page_cache_release(page); > vaddr +=3D PAGE_SIZE; > } > - i915_gem_chipset_flush(obj->base.dev); > + obj->dirty =3D 0; > } > = > -#ifdef CONFIG_X86 > - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE); > -#endif > - drm_pci_free(obj->base.dev, phys); This gets leaked. I suppose you should still keep i915_gem_object_detach_phys() around. > - obj->phys_handle =3D NULL; > + sg_free_table(obj->pages); > + kfree(obj->pages); > + > + obj->has_dma_mapping =3D false; > +} > + > +static const struct drm_i915_gem_object_ops i915_gem_phys_ops =3D { > + .get_pages =3D i915_gem_object_get_pages_phys, > + .put_pages =3D i915_gem_object_put_pages_phys, > +}; > + > +static int > +drop_pages(struct drm_i915_gem_object *obj) > +{ > + struct i915_vma *vma, *next; > + int ret; > + > + drm_gem_object_reference(&obj->base); > + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link) > + if (i915_vma_unbind(vma)) > + break; > + > + ret =3D i915_gem_object_put_pages(obj); > + drm_gem_object_unreference(&obj->base); > + > + return ret; > } > = > int > @@ -249,9 +337,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_objec= t *obj, > int align) > { > drm_dma_handle_t *phys; > - struct address_space *mapping; > - char *vaddr; > - int i; > + int ret; > = > if (obj->phys_handle) { > if ((unsigned long)obj->phys_handle->vaddr & (align -1)) > @@ -266,36 +352,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_obj= ect *obj, > if (obj->base.filp =3D=3D NULL) > return -EINVAL; > = > + ret =3D drop_pages(obj); > + if (ret) > + return ret; > + > /* create a new object */ > phys =3D drm_pci_alloc(obj->base.dev, obj->base.size, align); > if (!phys) > return -ENOMEM; > = > - vaddr =3D phys->vaddr; > -#ifdef CONFIG_X86 > - set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE); > -#endif > - mapping =3D file_inode(obj->base.filp)->i_mapping; > - for (i =3D 0; i < obj->base.size / PAGE_SIZE; i++) { > - struct page *page; > - char *src; > - > - page =3D shmem_read_mapping_page(mapping, i); > - if (IS_ERR(page)) > - return PTR_ERR(page); > - > - src =3D kmap_atomic(page); > - memcpy(vaddr, src, PAGE_SIZE); > - kunmap_atomic(src); > - > - mark_page_accessed(page); > - page_cache_release(page); > - > - vaddr +=3D PAGE_SIZE; > - } > - > obj->phys_handle =3D phys; > - return 0; > + obj->ops =3D &i915_gem_phys_ops; > + > + return i915_gem_object_get_pages(obj); > } > = > static int > @@ -321,6 +390,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj, > return -EFAULT; > } > = > + drm_clflush_virt_range(vaddr, args->size); And need to flush again. Check. The patch looks sane enough to me, save for the phys_handle leak. > i915_gem_chipset_flush(dev); > return 0; > } > @@ -1184,11 +1254,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void= *data, > * pread/pwrite currently are reading and writing from the CPU > * perspective, requiring manual detiling by the client. > */ > - if (obj->phys_handle) { > - ret =3D i915_gem_phys_pwrite(obj, args, file); > - goto out; > - } > - > if (obj->tiling_mode =3D=3D I915_TILING_NONE && > (obj->base.filp =3D=3D NULL || > (obj->base.write_domain !=3D I915_GEM_DOMAIN_CPU && > @@ -1199,8 +1264,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void= *data, > * textures). Fallback to the shmem path in that case. */ > } > = > - if (ret =3D=3D -EFAULT || ret =3D=3D -ENOSPC) > - ret =3D i915_gem_shmem_pwrite(dev, obj, args, file); > + if (ret =3D=3D -EFAULT || ret =3D=3D -ENOSPC) { > + if (obj->phys_handle) > + ret =3D i915_gem_phys_pwrite(obj, args, file); > + else > + ret =3D i915_gem_shmem_pwrite(dev, obj, args, file); > + } > = > out: > drm_gem_object_unreference(&obj->base); > @@ -3658,7 +3727,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object = *obj, > * Stolen memory is always coherent with the GPU as it is explicitly > * marked as wc by the system, or the system is cache-coherent. > */ > - if (obj->stolen) > + if (obj->stolen || obj->phys_handle) > return false; > = > /* If the GPU is snooping the contents of the CPU cache, > @@ -4518,7 +4587,6 @@ void i915_gem_free_object(struct drm_gem_object *ge= m_obj) > i915_gem_object_put_pages(obj); > i915_gem_object_free_mmap_offset(obj); > i915_gem_object_release_stolen(obj); > - i915_gem_object_detach_phys(obj); > = > BUG_ON(obj->pages); > = > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h > index beee13a53fbb..cf1425c9f54e 100644 > --- a/include/uapi/drm/i915_drm.h > +++ b/include/uapi/drm/i915_drm.h > @@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait { > #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26 > #define I915_PARAM_HAS_WT 27 > #define I915_PARAM_CMD_PARSER_VERSION 28 > +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29 > = > typedef struct drm_i915_getparam { > int param; > -- = > 1.9.2 > = > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- = Ville Syrj=E4l=E4 Intel OTC