From: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
To: Chris Wilson <chris@chris-wilson.co.uk>
Cc: intel-gfx@lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/i915: Make the physical object coherent with GTT
Date: Fri, 2 May 2014 20:17:22 +0300 [thread overview]
Message-ID: <20140502171722.GV18465@intel.com> (raw)
In-Reply-To: <1397806024-27336-2-git-send-email-chris@chris-wilson.co.uk>
On Fri, Apr 18, 2014 at 08:27:04AM +0100, Chris Wilson wrote:
> Currently objects for which the hardware needs a contiguous physical
> address are allocated a shadow backing storage to satisfy the contraint.
> This shadow buffer is not wired into the normal obj->pages and so the
> physical object is incoherent with accesses via the GPU, GTT and CPU. By
> setting up the appropriate scatter-gather table, we can allow userspace
> to access the physical object via either a GTT mmaping of or by rendering
> into the GEM bo. However, keeping the CPU mmap of the shmemfs backing
> storage coherent with the contiguous shadow is not yet possible.
> Fortuituously, CPU mmaps of objects requiring physical addresses are not
> expected to be coherent anyway.
>
> This allows the physical constraint of the GEM object to be transparent
> to userspace and allow it to efficiently render into or update them via
> the GTT and GPU.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 3 +
> drivers/gpu/drm/i915/i915_gem.c | 182 +++++++++++++++++++++++++++-------------
> include/uapi/drm/i915_drm.h | 1 +
> 3 files changed, 129 insertions(+), 57 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 12571aac7516..dd43fdc736ac 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1022,6 +1022,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> case I915_PARAM_CMD_PARSER_VERSION:
> value = i915_cmd_parser_get_version();
> break;
> + case I915_PARAM_HAS_COHERENT_PHYS_GTT:
> + value = 1;
> + break;
> default:
> DRM_DEBUG("Unknown parameter %d\n", param->param);
> return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index e302a23031fe..3eb5c07e1018 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -207,41 +207,129 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
> return 0;
> }
>
> -static void i915_gem_object_detach_phys(struct drm_i915_gem_object *obj)
> +static int
> +i915_gem_object_get_pages_phys(struct drm_i915_gem_object *obj)
> {
> - drm_dma_handle_t *phys = obj->phys_handle;
> + struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> + char *vaddr = obj->phys_handle->vaddr;
> + struct sg_table *st;
> + struct scatterlist *sg;
> + int i;
>
> - if (!phys)
> - return;
> + if (WARN_ON(i915_gem_object_needs_bit17_swizzle(obj)))
> + return -EINVAL;
> +
> + for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> + struct page *page;
> + char *src;
> +
> + page = shmem_read_mapping_page(mapping, i);
> + if (IS_ERR(page))
> + return PTR_ERR(page);
> +
> + src = kmap_atomic(page);
> + memcpy(vaddr, src, PAGE_SIZE);
> + kunmap_atomic(src);
> +
> + page_cache_release(page);
> + vaddr += PAGE_SIZE;
> + }
> +
> + drm_clflush_virt_range(obj->phys_handle->vaddr, obj->base.size);
No longer wc/uc so now we flush.
> + i915_gem_chipset_flush(obj->base.dev);
> +
> + st = kmalloc(sizeof(*st), GFP_KERNEL);
> + if (st == NULL)
> + return -ENOMEM;
> +
> + if (sg_alloc_table(st, 1, GFP_KERNEL)) {
> + kfree(st);
> + return -ENOMEM;
> + }
> +
> + sg = st->sgl;
> + sg->offset = 0;
> + sg->length = obj->base.size;
> +
> + sg_dma_address(sg) = obj->phys_handle->busaddr;
> + sg_dma_len(sg) = obj->base.size;
>
> - if (obj->madv == I915_MADV_WILLNEED) {
> + obj->pages = st;
> + obj->has_dma_mapping = true;
> + return 0;
> +}
> +
> +static void
> +i915_gem_object_put_pages_phys(struct drm_i915_gem_object *obj)
> +{
> + int ret;
> +
> + BUG_ON(obj->madv == __I915_MADV_PURGED);
> +
> + ret = i915_gem_object_set_to_cpu_domain(obj, true);
> + if (ret) {
> + /* In the event of a disaster, abandon all caches and
> + * hope for the best.
> + */
> + WARN_ON(ret != -EIO);
> + obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
> + }
> +
> + if (obj->madv == I915_MADV_DONTNEED)
> + obj->dirty = 0;
> +
> + if (obj->dirty) {
> struct address_space *mapping = file_inode(obj->base.filp)->i_mapping;
> - char *vaddr = phys->vaddr;
> + char *vaddr = obj->phys_handle->vaddr;
> int i;
>
> for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> - struct page *page = shmem_read_mapping_page(mapping, i);
> - if (!IS_ERR(page)) {
> - char *dst = kmap_atomic(page);
> - memcpy(dst, vaddr, PAGE_SIZE);
> - kunmap_atomic(dst);
> + struct page *page;
> + char *dst;
> +
> + page = shmem_read_mapping_page(mapping, i);
> + if (IS_ERR(page))
> + continue;
>
> - drm_clflush_pages(&page, 1);
> + dst = kmap_atomic(page);
> + memcpy(dst, vaddr, PAGE_SIZE);
> + kunmap_atomic(dst);
>
> - set_page_dirty(page);
> + set_page_dirty(page);
> + if (obj->madv == I915_MADV_WILLNEED)
> mark_page_accessed(page);
> - page_cache_release(page);
> - }
> + page_cache_release(page);
> vaddr += PAGE_SIZE;
> }
> - i915_gem_chipset_flush(obj->base.dev);
> + obj->dirty = 0;
> }
>
> -#ifdef CONFIG_X86
> - set_memory_wb((unsigned long)phys->vaddr, phys->size / PAGE_SIZE);
> -#endif
> - drm_pci_free(obj->base.dev, phys);
This gets leaked.
I suppose you should still keep i915_gem_object_detach_phys() around.
> - obj->phys_handle = NULL;
> + sg_free_table(obj->pages);
> + kfree(obj->pages);
> +
> + obj->has_dma_mapping = false;
> +}
> +
> +static const struct drm_i915_gem_object_ops i915_gem_phys_ops = {
> + .get_pages = i915_gem_object_get_pages_phys,
> + .put_pages = i915_gem_object_put_pages_phys,
> +};
> +
> +static int
> +drop_pages(struct drm_i915_gem_object *obj)
> +{
> + struct i915_vma *vma, *next;
> + int ret;
> +
> + drm_gem_object_reference(&obj->base);
> + list_for_each_entry_safe(vma, next, &obj->vma_list, vma_link)
> + if (i915_vma_unbind(vma))
> + break;
> +
> + ret = i915_gem_object_put_pages(obj);
> + drm_gem_object_unreference(&obj->base);
> +
> + return ret;
> }
>
> int
> @@ -249,9 +337,7 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
> int align)
> {
> drm_dma_handle_t *phys;
> - struct address_space *mapping;
> - char *vaddr;
> - int i;
> + int ret;
>
> if (obj->phys_handle) {
> if ((unsigned long)obj->phys_handle->vaddr & (align -1))
> @@ -266,36 +352,19 @@ i915_gem_object_attach_phys(struct drm_i915_gem_object *obj,
> if (obj->base.filp == NULL)
> return -EINVAL;
>
> + ret = drop_pages(obj);
> + if (ret)
> + return ret;
> +
> /* create a new object */
> phys = drm_pci_alloc(obj->base.dev, obj->base.size, align);
> if (!phys)
> return -ENOMEM;
>
> - vaddr = phys->vaddr;
> -#ifdef CONFIG_X86
> - set_memory_wc((unsigned long)vaddr, phys->size / PAGE_SIZE);
> -#endif
> - mapping = file_inode(obj->base.filp)->i_mapping;
> - for (i = 0; i < obj->base.size / PAGE_SIZE; i++) {
> - struct page *page;
> - char *src;
> -
> - page = shmem_read_mapping_page(mapping, i);
> - if (IS_ERR(page))
> - return PTR_ERR(page);
> -
> - src = kmap_atomic(page);
> - memcpy(vaddr, src, PAGE_SIZE);
> - kunmap_atomic(src);
> -
> - mark_page_accessed(page);
> - page_cache_release(page);
> -
> - vaddr += PAGE_SIZE;
> - }
> -
> obj->phys_handle = phys;
> - return 0;
> + obj->ops = &i915_gem_phys_ops;
> +
> + return i915_gem_object_get_pages(obj);
> }
>
> static int
> @@ -321,6 +390,7 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
> return -EFAULT;
> }
>
> + drm_clflush_virt_range(vaddr, args->size);
And need to flush again. Check.
The patch looks sane enough to me, save for the phys_handle leak.
> i915_gem_chipset_flush(dev);
> return 0;
> }
> @@ -1184,11 +1254,6 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> * pread/pwrite currently are reading and writing from the CPU
> * perspective, requiring manual detiling by the client.
> */
> - if (obj->phys_handle) {
> - ret = i915_gem_phys_pwrite(obj, args, file);
> - goto out;
> - }
> -
> if (obj->tiling_mode == I915_TILING_NONE &&
> (obj->base.filp == NULL ||
> (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> @@ -1199,8 +1264,12 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> * textures). Fallback to the shmem path in that case. */
> }
>
> - if (ret == -EFAULT || ret == -ENOSPC)
> - ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> + if (ret == -EFAULT || ret == -ENOSPC) {
> + if (obj->phys_handle)
> + ret = i915_gem_phys_pwrite(obj, args, file);
> + else
> + ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> + }
>
> out:
> drm_gem_object_unreference(&obj->base);
> @@ -3658,7 +3727,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj,
> * Stolen memory is always coherent with the GPU as it is explicitly
> * marked as wc by the system, or the system is cache-coherent.
> */
> - if (obj->stolen)
> + if (obj->stolen || obj->phys_handle)
> return false;
>
> /* If the GPU is snooping the contents of the CPU cache,
> @@ -4518,7 +4587,6 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> i915_gem_object_put_pages(obj);
> i915_gem_object_free_mmap_offset(obj);
> i915_gem_object_release_stolen(obj);
> - i915_gem_object_detach_phys(obj);
>
> BUG_ON(obj->pages);
>
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index beee13a53fbb..cf1425c9f54e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -390,6 +390,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
> #define I915_PARAM_HAS_WT 27
> #define I915_PARAM_CMD_PARSER_VERSION 28
> +#define I915_PARAM_HAS_COHERENT_PHYS_GTT 29
>
> typedef struct drm_i915_getparam {
> int param;
> --
> 1.9.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Ville Syrjälä
Intel OTC
next prev parent reply other threads:[~2014-05-02 17:17 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-04-18 7:27 [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
2014-04-18 7:27 ` [PATCH 2/2] drm/i915: Make the physical object coherent with GTT Chris Wilson
2014-04-22 20:03 ` Daniel Vetter
2014-05-02 17:17 ` Ville Syrjälä [this message]
2014-05-01 8:32 ` [PATCH 1/2] drm/i915: Fix dynamic allocation of physical handles Chris Wilson
2014-05-02 17:12 ` Ville Syrjälä
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20140502171722.GV18465@intel.com \
--to=ville.syrjala@linux.intel.com \
--cc=chris@chris-wilson.co.uk \
--cc=intel-gfx@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.