* [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-05-06 10:15 [PATCH v3 0/4] Support for creating/using Stolen memory " ankitprasad.r.sharma
@ 2015-05-06 10:16 ` ankitprasad.r.sharma
0 siblings, 0 replies; 36+ messages in thread
From: ankitprasad.r.sharma @ 2015-05-06 10:16 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel, shashidhar.hiremath
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch adds support for extending the pread/pwrite functionality
for objects not backed by shmem. The access will be made through
gtt interface.
This will cover prime objects as well as stolen memory backed objects
but for userptr objects it is still forbidden.
v2: drop locks around slow_user_access, prefault the pages before
access (Chris)
testcase: igt/gem_create_stolen
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 137 +++++++++++++++++++++++++++++++++++-----
1 file changed, 120 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 81c5381..fe14ddc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -631,6 +631,102 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
return ret ? - EFAULT : 0;
}
+static inline int
+slow_user_access(struct io_mapping *mapping,
+ loff_t page_base, int page_offset,
+ char __user *user_data,
+ int length, bool write)
+{
+ void __iomem *vaddr_inatomic;
+ void *vaddr;
+ unsigned long unwritten;
+
+ vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
+ /* We can use the cpu mem copy function because this is X86. */
+ vaddr = (void __force *)vaddr_inatomic + page_offset;
+ if (write)
+ unwritten = __copy_from_user(vaddr, user_data, length);
+ else
+ unwritten = __copy_to_user(user_data, vaddr, length);
+
+ io_mapping_unmap(vaddr_inatomic);
+ return unwritten;
+}
+
+static int
+i915_gem_gtt_pread_pwrite(struct drm_device *dev,
+ struct drm_i915_gem_object *obj, uint64_t size,
+ uint64_t data_offset, uint64_t data_ptr, bool write)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ char __user *user_data;
+ ssize_t remain;
+ loff_t offset, page_base;
+ int page_offset, page_length, ret = 0;
+
+ ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+ if (ret)
+ goto out;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, write);
+ if (ret)
+ goto out_unpin;
+
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ goto out_unpin;
+
+ user_data = to_user_ptr(data_ptr);
+ remain = size;
+
+ offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
+
+ if (write)
+ intel_fb_obj_invalidate(obj, NULL, ORIGIN_GTT);
+
+ mutex_unlock(&dev->struct_mutex);
+ if (!write && likely(!i915.prefault_disable))
+ ret = fault_in_multipages_writeable(user_data, remain);
+
+ while (remain > 0) {
+ /* Operation in this page
+ *
+ * page_base = page offset within aperture
+ * page_offset = offset within page
+ * page_length = bytes to copy for this page
+ */
+ page_base = offset & PAGE_MASK;
+ page_offset = offset_in_page(offset);
+ page_length = remain;
+ if ((page_offset + remain) > PAGE_SIZE)
+ page_length = PAGE_SIZE - page_offset;
+
+ /* This is a slow read/write as it tries to read from
+ * and write to user memory which may result into page
+ * faults
+ */
+ ret = slow_user_access(dev_priv->gtt.mappable, page_base,
+ page_offset, user_data,
+ page_length, write);
+
+ if (ret) {
+ ret = -EINVAL;
+ break;
+ }
+
+ remain -= page_length;
+ user_data += page_length;
+ offset += page_length;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+
+out_unpin:
+ i915_gem_object_ggtt_unpin(obj);
+out:
+ return ret;
+}
+
static int
i915_gem_shmem_pread(struct drm_device *dev,
struct drm_i915_gem_object *obj,
@@ -754,17 +850,19 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pread(obj, args->offset, args->size);
- ret = i915_gem_shmem_pread(dev, obj, args, file);
+ /* pread for non shmem backed objects */
+ if (!obj->base.filp) {
+ if (obj->tiling_mode == I915_TILING_NONE)
+ ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+ args->offset,
+ args->data_ptr,
+ false);
+ else
+ ret = -EINVAL;
+ } else
+ ret = i915_gem_shmem_pread(dev, obj, args, file);
out:
drm_gem_object_unreference(&obj->base);
@@ -1107,17 +1205,22 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
ret = -EFAULT;
+
+ /* pwrite for non shmem backed objects */
+ if (!obj->base.filp) {
+ if (obj->tiling_mode == I915_TILING_NONE)
+ ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+ args->offset,
+ args->data_ptr,
+ true);
+ else
+ ret = -EINVAL;
+
+ goto out;
+ }
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get
* different detiling behavior between reading and writing.
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH v4 0/4] Support for creating/using Stolen memory backed objects
@ 2015-07-01 9:25 ankitprasad.r.sharma
2015-07-01 9:25 ` [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine ankitprasad.r.sharma
` (3 more replies)
0 siblings, 4 replies; 36+ messages in thread
From: ankitprasad.r.sharma @ 2015-07-01 9:25 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel, shashidhar.hiremath
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch series adds support for creating/using Stolen memory backed
objects.
Despite being a unified memory architecture (UMA) some bits of memory
are more equal than others. In particular we have the thorny issue of
stolen memory, memory stolen from the system by the BIOS and reserved
for igfx use. Stolen memory is required for some functions of the GPU
and display engine, but in general it goes wasted. Whilst we cannot
return it back to the system, we need to find some other method for
utilising it. As we do not support direct access to the physical address
in the stolen region, it behaves like a different class of memory,
closer in kin to local GPU memory. This strongly suggests that we need a
placement model like TTM if we are to fully utilize these discrete
chunks of differing memory.
To add support for creating Stolen memory backed objects, we extend the
drm_i915_gem_create structure, by adding a new flag through which user
can specify the preference to allocate the object from stolen memory,
which if set, an attempt will be made to allocate the object from stolen
memory subject to the availability of free space in the stolen region.
This patch series adds support for clearing buffer objects via blitter
engines. This is particularly useful for clearing out the memory from
stolen region, but can also be used for other shmem allocated objects.
Also adding support for stealing purgable stolen pages, if we run out
of stolen memory when trying to allocate an object.
v2: Added support for read/write from/to objects not backed by
shmem using the pread/pwrite interface.
Also extended the current get_aperture ioctl to retrieve the
total and available size of the stolen region
v3: Removed the extended get_aperture ioctl patch 5 (to be submitted as
part of other patch series), addressed comments by Chris about pread/pwrite
for non shmem backed objects
v4: Rebased to the latest drm-intel-nightly
This can be verified using IGT tests: igt/gem_stolen
Ankitprasad Sharma (3):
drm/i915: Clearing buffer objects via blitter engine
drm/i915: Support for creating Stolen memory backed objects
drm/i915: Support for pread/pwrite from/to non shmem backed objects
Chris Wilson (1):
drm/i915: Add support for stealing purgable stolen pages
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_dma.c | 3 +
drivers/gpu/drm/i915/i915_drv.h | 4 +
drivers/gpu/drm/i915/i915_gem.c | 168 ++++++++++++++++++++++----
drivers/gpu/drm/i915/i915_gem_exec.c | 201 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_stolen.c | 121 +++++++++++++++++--
drivers/gpu/drm/i915/intel_lrc.c | 4 +-
drivers/gpu/drm/i915/intel_lrc.h | 3 +
drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
include/uapi/drm/i915_drm.h | 15 +++
11 files changed, 488 insertions(+), 35 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_exec.c
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
2015-07-01 9:25 [PATCH v4 0/4] Support for creating/using Stolen memory backed objects ankitprasad.r.sharma
@ 2015-07-01 9:25 ` ankitprasad.r.sharma
2015-07-01 14:54 ` Tvrtko Ursulin
2015-07-01 9:25 ` [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects ankitprasad.r.sharma
` (2 subsequent siblings)
3 siblings, 1 reply; 36+ messages in thread
From: ankitprasad.r.sharma @ 2015-07-01 9:25 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel, shashidhar.hiremath
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch adds support for clearing buffer objects via blitter
engines. This is particularly useful for clearing out the memory
from stolen region.
v2: Add support for using execlists & PPGTT
v3: Fix issues in legacy ringbuffer submission mode
v4: Rebased to the latest drm-intel-nightly (Ankit)
testcase: igt/gem_stolen
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Deepak S <deepak.s at linux.intel.com>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
---
drivers/gpu/drm/i915/Makefile | 1 +
drivers/gpu/drm/i915/i915_drv.h | 4 +
drivers/gpu/drm/i915/i915_gem_exec.c | 201 ++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_lrc.c | 4 +-
drivers/gpu/drm/i915/intel_lrc.h | 3 +
drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
7 files changed, 213 insertions(+), 3 deletions(-)
create mode 100644 drivers/gpu/drm/i915/i915_gem_exec.c
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index de21965..1959314 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -24,6 +24,7 @@ i915-y += i915_cmd_parser.o \
i915_gem_debug.o \
i915_gem_dmabuf.o \
i915_gem_evict.o \
+ i915_gem_exec.o \
i915_gem_execbuffer.o \
i915_gem_gtt.o \
i915_gem.o \
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ea9caf2..d1e151e 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3082,6 +3082,10 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
int i915_gem_evict_everything(struct drm_device *dev);
+/* i915_gem_exec.c */
+int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
+ struct drm_i915_file_private *file_priv);
+
/* belongs in i915_gem_gtt.h */
static inline void i915_gem_chipset_flush(struct drm_device *dev)
{
diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
new file mode 100644
index 0000000..a07fda0
--- /dev/null
+++ b/drivers/gpu/drm/i915/i915_gem_exec.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris at chris-wilson.co.uk>
+ *
+ */
+
+#include <drm/drmP.h>
+#include <drm/i915_drm.h>
+#include "i915_drv.h"
+
+#define GEN8_COLOR_BLT_CMD (2<<29 | 0x50<<22)
+
+#define BPP_8 0
+#define BPP_16 (1<<24)
+#define BPP_32 (1<<25 | 1<<24)
+
+#define ROP_FILL_COPY (0xf0 << 16)
+
+static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
+ struct intel_engine_cs *ring,
+ struct intel_context *ctx,
+ struct drm_i915_gem_request **req)
+{
+ int ret;
+
+ ret = i915_gem_object_sync(obj, ring, req);
+ if (ret)
+ return ret;
+
+ if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
+ if (i915_gem_clflush_object(obj, false))
+ i915_gem_chipset_flush(obj->base.dev);
+ obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU;
+ }
+ if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
+ wmb();
+ obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
+ }
+
+
+ return i915.enable_execlists ?
+ logical_ring_invalidate_all_caches(*req) :
+ intel_ring_invalidate_all_caches(*req);
+}
+
+static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
+ struct intel_engine_cs *ring,
+ struct i915_address_space *vm,
+ struct drm_i915_gem_request *req)
+{
+ i915_gem_request_assign(&obj->last_write_req, req);
+ obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
+ obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
+ i915_vma_move_to_active(i915_gem_obj_to_vma(obj, vm), req);
+ obj->dirty = 1;
+
+ ring->gpu_caches_dirty = true;
+}
+
+int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
+ struct drm_i915_file_private *file_priv)
+{
+ struct drm_device *dev = obj->base.dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct intel_engine_cs *ring;
+ struct intel_context *ctx;
+ struct intel_ringbuffer *ringbuf;
+ struct i915_address_space *vm;
+ struct drm_i915_gem_request *req;
+ int ret = 0;
+
+ lockdep_assert_held(&dev->struct_mutex);
+
+ ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
+ ctx = i915_gem_context_get(file_priv, DEFAULT_CONTEXT_HANDLE);
+ /* Allocate a request for this operation nice and early. */
+ ret = i915_gem_request_alloc(ring, ctx, &req);
+ if (ret)
+ return ret;
+
+ if (ctx->ppgtt)
+ vm = &ctx->ppgtt->base;
+ else
+ vm = &dev_priv->gtt.base;
+
+ if (i915.enable_execlists && !ctx->engine[ring->id].state) {
+ ret = intel_lr_context_deferred_create(ctx, ring);
+ if (ret)
+ return ret;
+ }
+
+ ringbuf = ctx->engine[ring->id].ringbuf;
+
+ ret = i915_gem_object_pin(obj, vm, PAGE_SIZE, 0);
+ if (ret)
+ return ret;
+
+ if (obj->tiling_mode && INTEL_INFO(dev)->gen <= 3) {
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ goto unpin;
+ }
+
+ ret = i915_gem_exec_flush_object(obj, ring, ctx, &req);
+ if (ret)
+ goto unpin;
+
+ if (i915.enable_execlists) {
+ if (dev_priv->info.gen >= 8) {
+ ret = intel_logical_ring_begin(req, 8);
+ if (ret)
+ goto unpin;
+
+ intel_logical_ring_emit(ringbuf, GEN8_COLOR_BLT_CMD |
+ BLT_WRITE_RGBA |
+ (7-2));
+ intel_logical_ring_emit(ringbuf, BPP_32 |
+ ROP_FILL_COPY |
+ PAGE_SIZE);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf,
+ obj->base.size >> PAGE_SHIFT
+ << 16 | PAGE_SIZE / 4);
+ intel_logical_ring_emit(ringbuf,
+ i915_gem_obj_offset(obj, vm));
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, 0);
+ intel_logical_ring_emit(ringbuf, MI_NOOP);
+
+ intel_logical_ring_advance(ringbuf);
+ } else {
+ DRM_ERROR("Execlists not supported for gen %d\n",
+ dev_priv->info.gen);
+ ret = -EINVAL;
+ goto unpin;
+ }
+ } else {
+ if (IS_GEN8(dev)) {
+ ret = intel_ring_begin(req, 8);
+ if (ret)
+ goto unpin;
+
+ intel_ring_emit(ring, GEN8_COLOR_BLT_CMD |
+ BLT_WRITE_RGBA | (7-2));
+ intel_ring_emit(ring, BPP_32 |
+ ROP_FILL_COPY | PAGE_SIZE);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring,
+ obj->base.size >> PAGE_SHIFT << 16 |
+ PAGE_SIZE / 4);
+ intel_ring_emit(ring, i915_gem_obj_offset(obj, vm));
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ } else {
+ ret = intel_ring_begin(req, 6);
+ if (ret)
+ goto unpin;
+
+ intel_ring_emit(ring, COLOR_BLT_CMD |
+ BLT_WRITE_RGBA);
+ intel_ring_emit(ring, BPP_32 |
+ ROP_FILL_COPY | PAGE_SIZE);
+ intel_ring_emit(ring,
+ obj->base.size >> PAGE_SHIFT << 16 |
+ PAGE_SIZE);
+ intel_ring_emit(ring, i915_gem_obj_offset(obj, vm));
+ intel_ring_emit(ring, 0);
+ intel_ring_emit(ring, MI_NOOP);
+ }
+
+ __intel_ring_advance(ring);
+ }
+
+ i915_gem_exec_dirty_object(obj, ring, vm, req);
+
+unpin:
+ i915_gem_obj_to_vma(obj, vm)->pin_count--;
+ return ret;
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index e87d74c..743c9f8 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -586,7 +586,7 @@ static int execlists_context_queue(struct drm_i915_gem_request *request)
return 0;
}
-static int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
+int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *req)
{
struct intel_engine_cs *ring = req->ring;
uint32_t flush_domains;
@@ -792,7 +792,7 @@ static int logical_ring_prepare(struct drm_i915_gem_request *req, int bytes)
*
* Return: non-zero if the ringbuffer is not ready to be written to.
*/
-static int intel_logical_ring_begin(struct drm_i915_gem_request *req,
+int intel_logical_ring_begin(struct drm_i915_gem_request *req,
int num_dwords)
{
struct drm_i915_private *dev_priv;
diff --git a/drivers/gpu/drm/i915/intel_lrc.h b/drivers/gpu/drm/i915/intel_lrc.h
index f59940a..19a85a7 100644
--- a/drivers/gpu/drm/i915/intel_lrc.h
+++ b/drivers/gpu/drm/i915/intel_lrc.h
@@ -43,6 +43,9 @@ void intel_logical_ring_cleanup(struct intel_engine_cs *ring);
int intel_logical_rings_init(struct drm_device *dev);
int logical_ring_flush_all_caches(struct drm_i915_gem_request *req);
+int logical_ring_invalidate_all_caches(struct drm_i915_gem_request *request);
+int intel_logical_ring_begin(struct drm_i915_gem_request *req,
+ int num_dwords);
/**
* intel_logical_ring_advance() - advance the ringbuffer tail
* @ringbuf: Ringbuffer to advance.
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index af7c12e..63bce53 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -81,7 +81,7 @@ bool intel_ring_stopped(struct intel_engine_cs *ring)
return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
}
-static void __intel_ring_advance(struct intel_engine_cs *ring)
+void __intel_ring_advance(struct intel_engine_cs *ring)
{
struct intel_ringbuffer *ringbuf = ring->buffer;
ringbuf->tail &= ringbuf->size - 1;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 0e2bbc6..942aff0 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -431,6 +431,7 @@ static inline void intel_ring_advance(struct intel_engine_cs *ring)
struct intel_ringbuffer *ringbuf = ring->buffer;
ringbuf->tail &= ringbuf->size - 1;
}
+void __intel_ring_advance(struct intel_engine_cs *ring);
int __intel_ring_space(int head, int tail, int size);
void intel_ring_update_space(struct intel_ringbuffer *ringbuf);
int intel_ring_space(struct intel_ringbuffer *ringbuf);
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects
2015-07-01 9:25 [PATCH v4 0/4] Support for creating/using Stolen memory backed objects ankitprasad.r.sharma
2015-07-01 9:25 ` [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine ankitprasad.r.sharma
@ 2015-07-01 9:25 ` ankitprasad.r.sharma
2015-07-01 15:06 ` Tvrtko Ursulin
2015-07-01 16:20 ` Tvrtko Ursulin
2015-07-01 9:25 ` [PATCH 3/4] drm/i915: Add support for stealing purgable stolen pages ankitprasad.r.sharma
2015-07-01 9:25 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects ankitprasad.r.sharma
3 siblings, 2 replies; 36+ messages in thread
From: ankitprasad.r.sharma @ 2015-07-01 9:25 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel, shashidhar.hiremath
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
Extend the drm_i915_gem_create structure to add support for
creating Stolen memory backed objects. Added a new flag through
which user can specify the preference to allocate the object from
stolen memory, which if set, an attempt will be made to allocate
the object from stolen memory subject to the availability of
free space in the stolen region.
v2: Rebased to the latest drm-intel-nightly (Ankit)
testcase: igt/gem_stolen
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 3 +++
drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++----
include/uapi/drm/i915_drm.h | 15 +++++++++++++++
3 files changed, 45 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index c5349fa..6045749 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = i915.enable_hangcheck &&
intel_has_gpu_reset(dev);
break;
+ case I915_PARAM_CREATE_VERSION:
+ value = 1;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a2a4a27..4acf331 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -391,7 +391,8 @@ static int
i915_gem_create(struct drm_file *file,
struct drm_device *dev,
uint64_t size,
- uint32_t *handle_p)
+ uint32_t *handle_p,
+ uint32_t flags)
{
struct drm_i915_gem_object *obj;
int ret;
@@ -401,8 +402,29 @@ i915_gem_create(struct drm_file *file,
if (size == 0)
return -EINVAL;
+ if (flags & ~(I915_CREATE_PLACEMENT_STOLEN))
+ return -EINVAL;
+
/* Allocate the new object */
- obj = i915_gem_alloc_object(dev, size);
+ if (flags & I915_CREATE_PLACEMENT_STOLEN) {
+ mutex_lock(&dev->struct_mutex);
+ obj = i915_gem_object_create_stolen(dev, size);
+ if (!obj) {
+ mutex_unlock(&dev->struct_mutex);
+ return -ENOMEM;
+ }
+
+ ret = i915_gem_exec_clear_object(obj, file->driver_priv);
+ if (ret) {
+ i915_gem_object_free(obj);
+ mutex_unlock(&dev->struct_mutex);
+ return ret;
+ }
+
+ mutex_unlock(&dev->struct_mutex);
+ } else
+ obj = i915_gem_alloc_object(dev, size);
+
if (obj == NULL)
return -ENOMEM;
@@ -425,7 +447,7 @@ i915_gem_dumb_create(struct drm_file *file,
args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
args->size = args->pitch * args->height;
return i915_gem_create(file, dev,
- args->size, &args->handle);
+ args->size, &args->handle, 0);
}
/**
@@ -438,7 +460,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
struct drm_i915_gem_create *args = data;
return i915_gem_create(file, dev,
- args->size, &args->handle);
+ args->size, &args->handle,
+ args->flags);
}
static inline int
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f88cc1c..87992d1 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_SUBSLICE_TOTAL 33
#define I915_PARAM_EU_TOTAL 34
#define I915_PARAM_HAS_GPU_RESET 35
+#define I915_PARAM_CREATE_VERSION 36
typedef struct drm_i915_getparam {
int param;
@@ -450,6 +451,20 @@ struct drm_i915_gem_create {
*/
__u32 handle;
__u32 pad;
+ /**
+ * Requested flags (currently used for placement
+ * (which memory domain))
+ *
+ * You can request that the object be created from special memory
+ * rather than regular system pages using this parameter. Such
+ * irregular objects may have certain restrictions (such as CPU
+ * access to a stolen object is verboten).
+ *
+ * This can be used in the future for other purposes too
+ * e.g. specifying tiling/caching/madvise
+ */
+ __u32 flags;
+#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */
};
struct drm_i915_gem_pread {
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 3/4] drm/i915: Add support for stealing purgable stolen pages
2015-07-01 9:25 [PATCH v4 0/4] Support for creating/using Stolen memory backed objects ankitprasad.r.sharma
2015-07-01 9:25 ` [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine ankitprasad.r.sharma
2015-07-01 9:25 ` [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects ankitprasad.r.sharma
@ 2015-07-01 9:25 ` ankitprasad.r.sharma
2015-07-01 16:17 ` Tvrtko Ursulin
2015-07-01 9:25 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects ankitprasad.r.sharma
3 siblings, 1 reply; 36+ messages in thread
From: ankitprasad.r.sharma @ 2015-07-01 9:25 UTC (permalink / raw)
To: intel-gfx; +Cc: akash.goel, shashidhar.hiremath
From: Chris Wilson <chris at chris-wilson.co.uk>
If we run out of stolen memory when trying to allocate an object, see if
we can reap enough purgeable objects to free up enough contiguous free
space for the allocation. This is in principle very much like evicting
objects to free up enough contiguous space in the vma when binding
a new object - and you will be forgiven for thinking that the code looks
very similar.
At the moment, we do not allow userspace to allocate objects in stolen,
so there is neither the memory pressure to trigger stolen eviction nor
any purgeable objects inside the stolen arena. However, this will change
in the near future, and so better management and defragmentation of
stolen memory will become a real issue.
v2: Remember to remove the drm_mm_node.
v3: Rebased to the latest drm-intel-nightly (Ankit)
testcase: igt/gem_stolen
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_stolen.c | 121 ++++++++++++++++++++++++++++++---
1 file changed, 110 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 348ed5a..7e216be 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -430,18 +430,29 @@ cleanup:
return NULL;
}
-struct drm_i915_gem_object *
-i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
+static bool mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
+{
+ if (obj->stolen == NULL)
+ return false;
+
+ if (obj->madv != I915_MADV_DONTNEED)
+ return false;
+
+ if (i915_gem_obj_is_pinned(obj))
+ return false;
+
+ list_add(&obj->obj_exec_link, unwind);
+ return drm_mm_scan_add_block(obj->stolen);
+}
+
+static struct drm_mm_node *
+stolen_alloc(struct drm_i915_private *dev_priv, u32 size)
{
- struct drm_i915_private *dev_priv = dev->dev_private;
- struct drm_i915_gem_object *obj;
struct drm_mm_node *stolen;
+ struct drm_i915_gem_object *obj;
+ struct list_head unwind, evict;
int ret;
- if (!drm_mm_initialized(&dev_priv->mm.stolen))
- return NULL;
-
- DRM_DEBUG_KMS("creating stolen object: size=%x\n", size);
if (size == 0)
return NULL;
@@ -451,11 +462,99 @@ i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
ret = drm_mm_insert_node(&dev_priv->mm.stolen, stolen, size,
4096, DRM_MM_SEARCH_DEFAULT);
- if (ret) {
- kfree(stolen);
- return NULL;
+ if (ret == 0)
+ return stolen;
+
+ /* No more stolen memory available, or too fragmented.
+ * Try evicting purgeable objects and search again.
+ */
+
+ drm_mm_init_scan(&dev_priv->mm.stolen, size, 4096, 0);
+ INIT_LIST_HEAD(&unwind);
+
+ list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
+ if (mark_free(obj, &unwind))
+ goto found;
+
+ list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
+ if (mark_free(obj, &unwind))
+ goto found;
+
+found:
+ INIT_LIST_HEAD(&evict);
+ while (!list_empty(&unwind)) {
+ obj = list_first_entry(&unwind,
+ struct drm_i915_gem_object,
+ obj_exec_link);
+ list_del_init(&obj->obj_exec_link);
+
+ if (drm_mm_scan_remove_block(obj->stolen)) {
+ list_add(&obj->obj_exec_link, &evict);
+ drm_gem_object_reference(&obj->base);
+ }
}
+ ret = 0;
+ while (!list_empty(&evict)) {
+ obj = list_first_entry(&evict,
+ struct drm_i915_gem_object,
+ obj_exec_link);
+ list_del_init(&obj->obj_exec_link);
+
+ if (ret == 0) {
+ struct i915_vma *vma, *vma_next;
+
+ list_for_each_entry_safe(vma, vma_next,
+ &obj->vma_list,
+ vma_link)
+ if (i915_vma_unbind(vma))
+ break;
+
+ /* Stolen pins its pages to prevent the
+ * normal shrinker from processing stolen
+ * objects.
+ */
+ i915_gem_object_unpin_pages(obj);
+
+ ret = i915_gem_object_put_pages(obj);
+ if (ret == 0) {
+ i915_gem_object_release_stolen(obj);
+ obj->madv = __I915_MADV_PURGED;
+ } else
+ i915_gem_object_pin_pages(obj);
+ }
+
+ drm_gem_object_unreference(&obj->base);
+ }
+
+ if (ret == 0)
+ ret = drm_mm_insert_node(&dev_priv->mm.stolen, stolen, size,
+ 4096, DRM_MM_SEARCH_DEFAULT);
+ if (ret == 0)
+ return stolen;
+
+ kfree(stolen);
+ return NULL;
+}
+
+struct drm_i915_gem_object *
+i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct drm_i915_gem_object *obj;
+ struct drm_mm_node *stolen;
+
+ lockdep_assert_held(&dev->struct_mutex);
+
+ if (!drm_mm_initialized(&dev_priv->mm.stolen))
+ return NULL;
+
+ DRM_DEBUG_KMS("creating stolen object: size=%x\n", size);
+
+ stolen = stolen_alloc(dev_priv, size);
+ if (stolen == NULL)
+ return NULL;
+
obj = _i915_gem_object_create_stolen(dev, stolen);
if (obj)
return obj;
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 36+ messages in thread
* [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-01 9:25 [PATCH v4 0/4] Support for creating/using Stolen memory backed objects ankitprasad.r.sharma
` (2 preceding siblings ...)
2015-07-01 9:25 ` [PATCH 3/4] drm/i915: Add support for stealing purgable stolen pages ankitprasad.r.sharma
@ 2015-07-01 9:25 ` ankitprasad.r.sharma
2015-07-01 9:54 ` Chris Wilson
` (2 more replies)
3 siblings, 3 replies; 36+ messages in thread
From: ankitprasad.r.sharma @ 2015-07-01 9:25 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel, shashidhar.hiremath
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch adds support for extending the pread/pwrite functionality
for objects not backed by shmem. The access will be made through
gtt interface.
This will cover prime objects as well as stolen memory backed objects
but for userptr objects it is still forbidden.
v2: drop locks around slow_user_access, prefault the pages before
access (Chris)
v3: Rebased to the latest drm-intel-nightly (Ankit)
testcase: igt/gem_stolen
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 137 +++++++++++++++++++++++++++++++++++-----
1 file changed, 120 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4acf331..4be6eb4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -629,6 +629,102 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
return ret ? - EFAULT : 0;
}
+static inline int
+slow_user_access(struct io_mapping *mapping,
+ loff_t page_base, int page_offset,
+ char __user *user_data,
+ int length, bool write)
+{
+ void __iomem *vaddr_inatomic;
+ void *vaddr;
+ unsigned long unwritten;
+
+ vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
+ /* We can use the cpu mem copy function because this is X86. */
+ vaddr = (void __force *)vaddr_inatomic + page_offset;
+ if (write)
+ unwritten = __copy_from_user(vaddr, user_data, length);
+ else
+ unwritten = __copy_to_user(user_data, vaddr, length);
+
+ io_mapping_unmap(vaddr_inatomic);
+ return unwritten;
+}
+
+static int
+i915_gem_gtt_pread_pwrite(struct drm_device *dev,
+ struct drm_i915_gem_object *obj, uint64_t size,
+ uint64_t data_offset, uint64_t data_ptr, bool write)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ char __user *user_data;
+ ssize_t remain;
+ loff_t offset, page_base;
+ int page_offset, page_length, ret = 0;
+
+ ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+ if (ret)
+ goto out;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, write);
+ if (ret)
+ goto out_unpin;
+
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ goto out_unpin;
+
+ user_data = to_user_ptr(data_ptr);
+ remain = size;
+
+ offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
+
+ if (write)
+ intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+
+ mutex_unlock(&dev->struct_mutex);
+ if (!write && likely(!i915.prefault_disable))
+ ret = fault_in_multipages_writeable(user_data, remain);
+
+ while (remain > 0) {
+ /* Operation in this page
+ *
+ * page_base = page offset within aperture
+ * page_offset = offset within page
+ * page_length = bytes to copy for this page
+ */
+ page_base = offset & PAGE_MASK;
+ page_offset = offset_in_page(offset);
+ page_length = remain;
+ if ((page_offset + remain) > PAGE_SIZE)
+ page_length = PAGE_SIZE - page_offset;
+
+ /* This is a slow read/write as it tries to read from
+ * and write to user memory which may result into page
+ * faults
+ */
+ ret = slow_user_access(dev_priv->gtt.mappable, page_base,
+ page_offset, user_data,
+ page_length, write);
+
+ if (ret) {
+ ret = -EINVAL;
+ break;
+ }
+
+ remain -= page_length;
+ user_data += page_length;
+ offset += page_length;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+
+out_unpin:
+ i915_gem_object_ggtt_unpin(obj);
+out:
+ return ret;
+}
+
static int
i915_gem_shmem_pread(struct drm_device *dev,
struct drm_i915_gem_object *obj,
@@ -752,17 +848,19 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pread(obj, args->offset, args->size);
- ret = i915_gem_shmem_pread(dev, obj, args, file);
+ /* pread for non shmem backed objects */
+ if (!obj->base.filp) {
+ if (obj->tiling_mode == I915_TILING_NONE)
+ ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+ args->offset,
+ args->data_ptr,
+ false);
+ else
+ ret = -EINVAL;
+ } else
+ ret = i915_gem_shmem_pread(dev, obj, args, file);
out:
drm_gem_object_unreference(&obj->base);
@@ -1103,17 +1201,22 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
ret = -EFAULT;
+
+ /* pwrite for non shmem backed objects */
+ if (!obj->base.filp) {
+ if (obj->tiling_mode == I915_TILING_NONE)
+ ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+ args->offset,
+ args->data_ptr,
+ true);
+ else
+ ret = -EINVAL;
+
+ goto out;
+ }
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get
* different detiling behavior between reading and writing.
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-01 9:25 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects ankitprasad.r.sharma
@ 2015-07-01 9:54 ` Chris Wilson
2015-07-02 10:42 ` Tvrtko Ursulin
2015-07-03 5:07 ` shuang.he
2 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2015-07-01 9:54 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel, shashidhar.hiremath
On Wed, Jul 01, 2015 at 02:55:47PM +0530, ankitprasad.r.sharma@intel.com wrote:
> - ret = i915_gem_shmem_pread(dev, obj, args, file);
> + /* pread for non shmem backed objects */
> + if (!obj->base.filp) {
> + if (obj->tiling_mode == I915_TILING_NONE)
> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
> + args->offset,
> + args->data_ptr,
> + false);
> + else
> + ret = -EINVAL;
> + } else
> + ret = i915_gem_shmem_pread(dev, obj, args, file);
My preference for structuring this would be to always try the faster
method and move the can-gtt/can-shm tests into that method and then try the
slow path for suitable errno.
That way with the refactoring here, we place all the knowledge of what
objects work with the GTT access inside the GTT accessor. Similarly
wrt to the fast shmem read path.
For extra fun, we can directly read from stolen physical addresses for
gen<6.
Overall the patch lgtm, though I have some cleanups pending to the GTT
access path to handle huge objects - the ordering of those with this
doesn't matter.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
2015-07-01 9:25 ` [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine ankitprasad.r.sharma
@ 2015-07-01 14:54 ` Tvrtko Ursulin
2015-07-01 16:30 ` Chris Wilson
0 siblings, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-01 14:54 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel, shashidhar.hiremath
Hi,
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> This patch adds support for clearing buffer objects via blitter
> engines. This is particularly useful for clearing out the memory
> from stolen region.
Because CPU cannot access it? I would put that into the commit message
since I think cover letter does not go into the git history.
> v2: Add support for using execlists & PPGTT
>
> v3: Fix issues in legacy ringbuffer submission mode
>
> v4: Rebased to the latest drm-intel-nightly (Ankit)
>
> testcase: igt/gem_stolen
>
Nitpick: usually it is "Testcase:" and all tags grouped together.
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Signed-off-by: Deepak S <deepak.s at linux.intel.com>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
> ---
> drivers/gpu/drm/i915/Makefile | 1 +
> drivers/gpu/drm/i915/i915_drv.h | 4 +
> drivers/gpu/drm/i915/i915_gem_exec.c | 201 ++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_lrc.c | 4 +-
> drivers/gpu/drm/i915/intel_lrc.h | 3 +
> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
> drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> 7 files changed, 213 insertions(+), 3 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/i915_gem_exec.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index de21965..1959314 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -24,6 +24,7 @@ i915-y += i915_cmd_parser.o \
> i915_gem_debug.o \
> i915_gem_dmabuf.o \
> i915_gem_evict.o \
> + i915_gem_exec.o \
> i915_gem_execbuffer.o \
> i915_gem_gtt.o \
> i915_gem.o \
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index ea9caf2..d1e151e 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -3082,6 +3082,10 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
> int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
> int i915_gem_evict_everything(struct drm_device *dev);
>
> +/* i915_gem_exec.c */
> +int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
> + struct drm_i915_file_private *file_priv);
> +
> /* belongs in i915_gem_gtt.h */
> static inline void i915_gem_chipset_flush(struct drm_device *dev)
> {
> diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
> new file mode 100644
> index 0000000..a07fda0
> --- /dev/null
> +++ b/drivers/gpu/drm/i915/i915_gem_exec.c
> @@ -0,0 +1,201 @@
> +/*
> + * Copyright © 2013 Intel Corporation
Is the year correct?
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Chris Wilson <chris at chris-wilson.co.uk>
And author?
> + *
> + */
> +
> +#include <drm/drmP.h>
> +#include <drm/i915_drm.h>
> +#include "i915_drv.h"
> +
> +#define GEN8_COLOR_BLT_CMD (2<<29 | 0x50<<22)
> +
> +#define BPP_8 0
> +#define BPP_16 (1<<24)
> +#define BPP_32 (1<<25 | 1<<24)
> +
> +#define ROP_FILL_COPY (0xf0 << 16)
> +
> +static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
> + struct intel_engine_cs *ring,
> + struct intel_context *ctx,
> + struct drm_i915_gem_request **req)
> +{
> + int ret;
> +
> + ret = i915_gem_object_sync(obj, ring, req);
> + if (ret)
> + return ret;
> +
> + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
> + if (i915_gem_clflush_object(obj, false))
> + i915_gem_chipset_flush(obj->base.dev);
> + obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU;
> + }
> + if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
> + wmb();
> + obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
> + }
All this could be replaced with i915_gem_object_set_to_gtt_domain, no?
> +
> + return i915.enable_execlists ?
> + logical_ring_invalidate_all_caches(*req) :
> + intel_ring_invalidate_all_caches(*req);
And this is done on actual submission for you by the lower levels so you
don't need to call it directly.
> +}
> +
> +static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
> + struct intel_engine_cs *ring,
> + struct i915_address_space *vm,
> + struct drm_i915_gem_request *req)
> +{
> + i915_gem_request_assign(&obj->last_write_req, req);
> + obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
> + obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
> + i915_vma_move_to_active(i915_gem_obj_to_vma(obj, vm), req);
> + obj->dirty = 1;
> +
> + ring->gpu_caches_dirty = true;
> +}
> +
> +int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
> + struct drm_i915_file_private *file_priv)
> +{
Function needs some good kerneldoc.
> + struct drm_device *dev = obj->base.dev;
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct intel_engine_cs *ring;
> + struct intel_context *ctx;
> + struct intel_ringbuffer *ringbuf;
> + struct i915_address_space *vm;
> + struct drm_i915_gem_request *req;
> + int ret = 0;
> +
> + lockdep_assert_held(&dev->struct_mutex);
It think there was some guidance that lockdep_assert_held is compiled
out when lockdep is not in the kernel and that WARN_ON is preferred. In
this case that would probably be WARN_ON_ONCE and return error.
> +
> + ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
> + ctx = i915_gem_context_get(file_priv, DEFAULT_CONTEXT_HANDLE);
> + /* Allocate a request for this operation nice and early. */
> + ret = i915_gem_request_alloc(ring, ctx, &req);
> + if (ret)
> + return ret;
> +
> + if (ctx->ppgtt)
> + vm = &ctx->ppgtt->base;
> + else
> + vm = &dev_priv->gtt.base;
> +
> + if (i915.enable_execlists && !ctx->engine[ring->id].state) {
> + ret = intel_lr_context_deferred_create(ctx, ring);
i915_gem_context_get above and this call are very similar to what
i915_gem_validate_context does. It seems to me it would be better to
call the latter function here.
> + if (ret)
> + return ret;
Failure path (and some below) leaks the request. i915_gem_request_cancel
is the new API to be called I understand.
> + }
> +
> + ringbuf = ctx->engine[ring->id].ringbuf;
> +
> + ret = i915_gem_object_pin(obj, vm, PAGE_SIZE, 0);
> + if (ret)
> + return ret;
> +
> + if (obj->tiling_mode && INTEL_INFO(dev)->gen <= 3) {
> + ret = i915_gem_object_put_fence(obj);
> + if (ret)
> + goto unpin;
> + }
Why is this needed?
Could it be called unconditionally and still work?
At least I would recommend a comment explaining it.
> + ret = i915_gem_exec_flush_object(obj, ring, ctx, &req);
> + if (ret)
> + goto unpin;
As I said above one call to i915_gem_object_set_to_gtt_domain would be
enough I think.
> + if (i915.enable_execlists) {
> + if (dev_priv->info.gen >= 8) {
> + ret = intel_logical_ring_begin(req, 8);
> + if (ret)
> + goto unpin;
> +
> + intel_logical_ring_emit(ringbuf, GEN8_COLOR_BLT_CMD |
> + BLT_WRITE_RGBA |
> + (7-2));
> + intel_logical_ring_emit(ringbuf, BPP_32 |
> + ROP_FILL_COPY |
> + PAGE_SIZE);
> + intel_logical_ring_emit(ringbuf, 0);
> + intel_logical_ring_emit(ringbuf,
> + obj->base.size >> PAGE_SHIFT
> + << 16 | PAGE_SIZE / 4);
> + intel_logical_ring_emit(ringbuf,
> + i915_gem_obj_offset(obj, vm));
> + intel_logical_ring_emit(ringbuf, 0);
> + intel_logical_ring_emit(ringbuf, 0);
> + intel_logical_ring_emit(ringbuf, MI_NOOP);
> +
> + intel_logical_ring_advance(ringbuf);
> + } else {
> + DRM_ERROR("Execlists not supported for gen %d\n",
> + dev_priv->info.gen);
> + ret = -EINVAL;
I would put a WARN_ON_ONCE here, or even just return -EINVAL. If the
driver is so messed up in general that execlists are enabled < gen8 I
think there is no point logging errors about it from here. Would also
save you one indentation level.
> + goto unpin;
> + }
> + } else {
> + if (IS_GEN8(dev)) {
> + ret = intel_ring_begin(req, 8);
> + if (ret)
> + goto unpin;
> +
> + intel_ring_emit(ring, GEN8_COLOR_BLT_CMD |
> + BLT_WRITE_RGBA | (7-2));
> + intel_ring_emit(ring, BPP_32 |
> + ROP_FILL_COPY | PAGE_SIZE);
> + intel_ring_emit(ring, 0);
> + intel_ring_emit(ring,
> + obj->base.size >> PAGE_SHIFT << 16 |
> + PAGE_SIZE / 4);
> + intel_ring_emit(ring, i915_gem_obj_offset(obj, vm));
> + intel_ring_emit(ring, 0);
> + intel_ring_emit(ring, 0);
> + intel_ring_emit(ring, MI_NOOP);
Such a pitty that these two emit blocks need to be duplicated but I
guess it is what it is now.
> + } else {
> + ret = intel_ring_begin(req, 6);
> + if (ret)
> + goto unpin;
> +
> + intel_ring_emit(ring, COLOR_BLT_CMD |
> + BLT_WRITE_RGBA);
> + intel_ring_emit(ring, BPP_32 |
> + ROP_FILL_COPY | PAGE_SIZE);
> + intel_ring_emit(ring,
> + obj->base.size >> PAGE_SHIFT << 16 |
> + PAGE_SIZE);
> + intel_ring_emit(ring, i915_gem_obj_offset(obj, vm));
> + intel_ring_emit(ring, 0);
> + intel_ring_emit(ring, MI_NOOP);
> + }
> +
> + __intel_ring_advance(ring);
> + }
> +
> + i915_gem_exec_dirty_object(obj, ring, vm, req);
Where is this request actually submitted?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects
2015-07-01 9:25 ` [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects ankitprasad.r.sharma
@ 2015-07-01 15:06 ` Tvrtko Ursulin
2015-07-01 16:19 ` Chris Wilson
2015-07-01 16:20 ` Tvrtko Ursulin
1 sibling, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-01 15:06 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel, shashidhar.hiremath
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> Extend the drm_i915_gem_create structure to add support for
> creating Stolen memory backed objects. Added a new flag through
> which user can specify the preference to allocate the object from
> stolen memory, which if set, an attempt will be made to allocate
> the object from stolen memory subject to the availability of
> free space in the stolen region.
>
> v2: Rebased to the latest drm-intel-nightly (Ankit)
>
> testcase: igt/gem_stolen
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 3 +++
> drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++----
> include/uapi/drm/i915_drm.h | 15 +++++++++++++++
> 3 files changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index c5349fa..6045749 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> value = i915.enable_hangcheck &&
> intel_has_gpu_reset(dev);
> break;
> + case I915_PARAM_CREATE_VERSION:
> + value = 1;
Shouldn't it be 2?
> + break;
> default:
> DRM_DEBUG("Unknown parameter %d\n", param->param);
> return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a2a4a27..4acf331 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -391,7 +391,8 @@ static int
> i915_gem_create(struct drm_file *file,
> struct drm_device *dev,
> uint64_t size,
> - uint32_t *handle_p)
> + uint32_t *handle_p,
> + uint32_t flags)
> {
> struct drm_i915_gem_object *obj;
> int ret;
> @@ -401,8 +402,29 @@ i915_gem_create(struct drm_file *file,
> if (size == 0)
> return -EINVAL;
>
> + if (flags & ~(I915_CREATE_PLACEMENT_STOLEN))
> + return -EINVAL;
> +
> /* Allocate the new object */
> - obj = i915_gem_alloc_object(dev, size);
> + if (flags & I915_CREATE_PLACEMENT_STOLEN) {
> + mutex_lock(&dev->struct_mutex);
Probably need the interruptible variant so userspace can Ctrl-C if
things get stuck in submission/waiting.
> + obj = i915_gem_object_create_stolen(dev, size);
> + if (!obj) {
> + mutex_unlock(&dev->struct_mutex);
> + return -ENOMEM;
> + }
> +
> + ret = i915_gem_exec_clear_object(obj, file->driver_priv);
I would put a comment here saying why it is important to clear stolen
memory.
> + if (ret) {
> + i915_gem_object_free(obj);
This should probably be drm_gem_object_unreference.
> + mutex_unlock(&dev->struct_mutex);
> + return ret;
> + }
> +
> + mutex_unlock(&dev->struct_mutex);
> + } else
> + obj = i915_gem_alloc_object(dev, size);
Need curly braces on both branches.
> if (obj == NULL)
> return -ENOMEM;
>
> @@ -425,7 +447,7 @@ i915_gem_dumb_create(struct drm_file *file,
> args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
> args->size = args->pitch * args->height;
> return i915_gem_create(file, dev,
> - args->size, &args->handle);
> + args->size, &args->handle, 0);
> }
>
> /**
> @@ -438,7 +460,8 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data,
> struct drm_i915_gem_create *args = data;
>
> return i915_gem_create(file, dev,
> - args->size, &args->handle);
> + args->size, &args->handle,
> + args->flags);
> }
>
> static inline int
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index f88cc1c..87992d1 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_PARAM_SUBSLICE_TOTAL 33
> #define I915_PARAM_EU_TOTAL 34
> #define I915_PARAM_HAS_GPU_RESET 35
> +#define I915_PARAM_CREATE_VERSION 36
>
> typedef struct drm_i915_getparam {
> int param;
> @@ -450,6 +451,20 @@ struct drm_i915_gem_create {
> */
> __u32 handle;
> __u32 pad;
> + /**
> + * Requested flags (currently used for placement
> + * (which memory domain))
> + *
> + * You can request that the object be created from special memory
> + * rather than regular system pages using this parameter. Such
> + * irregular objects may have certain restrictions (such as CPU
> + * access to a stolen object is verboten).
I'd just use English all the way. :)
> + *
> + * This can be used in the future for other purposes too
> + * e.g. specifying tiling/caching/madvise
> + */
> + __u32 flags;
> +#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */
> };
>
> struct drm_i915_gem_pread {
>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 3/4] drm/i915: Add support for stealing purgable stolen pages
2015-07-01 9:25 ` [PATCH 3/4] drm/i915: Add support for stealing purgable stolen pages ankitprasad.r.sharma
@ 2015-07-01 16:17 ` Tvrtko Ursulin
0 siblings, 0 replies; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-01 16:17 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel, shashidhar.hiremath
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> From: Chris Wilson <chris at chris-wilson.co.uk>
>
> If we run out of stolen memory when trying to allocate an object, see if
> we can reap enough purgeable objects to free up enough contiguous free
> space for the allocation. This is in principle very much like evicting
> objects to free up enough contiguous space in the vma when binding
> a new object - and you will be forgiven for thinking that the code looks
> very similar.
>
> At the moment, we do not allow userspace to allocate objects in stolen,
> so there is neither the memory pressure to trigger stolen eviction nor
> any purgeable objects inside the stolen arena. However, this will change
> in the near future, and so better management and defragmentation of
> stolen memory will become a real issue.
>
> v2: Remember to remove the drm_mm_node.
>
> v3: Rebased to the latest drm-intel-nightly (Ankit)
>
> testcase: igt/gem_stolen
>
Tidy "Testcase:" tag again.
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem_stolen.c | 121 ++++++++++++++++++++++++++++++---
> 1 file changed, 110 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
> index 348ed5a..7e216be 100644
> --- a/drivers/gpu/drm/i915/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
> @@ -430,18 +430,29 @@ cleanup:
> return NULL;
> }
>
> -struct drm_i915_gem_object *
> -i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
> +static bool mark_free(struct drm_i915_gem_object *obj, struct list_head *unwind)
> +{
> + if (obj->stolen == NULL)
> + return false;
> +
> + if (obj->madv != I915_MADV_DONTNEED)
> + return false;
> +
> + if (i915_gem_obj_is_pinned(obj))
> + return false;
> +
> + list_add(&obj->obj_exec_link, unwind);
> + return drm_mm_scan_add_block(obj->stolen);
> +}
> +
> +static struct drm_mm_node *
> +stolen_alloc(struct drm_i915_private *dev_priv, u32 size)
> {
> - struct drm_i915_private *dev_priv = dev->dev_private;
> - struct drm_i915_gem_object *obj;
> struct drm_mm_node *stolen;
> + struct drm_i915_gem_object *obj;
> + struct list_head unwind, evict;
> int ret;
>
> - if (!drm_mm_initialized(&dev_priv->mm.stolen))
> - return NULL;
> -
> - DRM_DEBUG_KMS("creating stolen object: size=%x\n", size);
> if (size == 0)
> return NULL;
>
> @@ -451,11 +462,99 @@ i915_gem_object_create_stolen(struct drm_device *dev, u32 size)
>
> ret = drm_mm_insert_node(&dev_priv->mm.stolen, stolen, size,
> 4096, DRM_MM_SEARCH_DEFAULT);
> - if (ret) {
> - kfree(stolen);
> - return NULL;
> + if (ret == 0)
> + return stolen;
> +
> + /* No more stolen memory available, or too fragmented.
> + * Try evicting purgeable objects and search again.
> + */
> +
> + drm_mm_init_scan(&dev_priv->mm.stolen, size, 4096, 0);
> + INIT_LIST_HEAD(&unwind);
> +
> + list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
> + if (mark_free(obj, &unwind))
> + goto found;
> +
> + list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list)
> + if (mark_free(obj, &unwind))
> + goto found;
> +
> +found:
> + INIT_LIST_HEAD(&evict);
> + while (!list_empty(&unwind)) {
> + obj = list_first_entry(&unwind,
> + struct drm_i915_gem_object,
> + obj_exec_link);
> + list_del_init(&obj->obj_exec_link);
> +
> + if (drm_mm_scan_remove_block(obj->stolen)) {
> + list_add(&obj->obj_exec_link, &evict);
> + drm_gem_object_reference(&obj->base);
> + }
> }
>
> + ret = 0;
> + while (!list_empty(&evict)) {
> + obj = list_first_entry(&evict,
> + struct drm_i915_gem_object,
> + obj_exec_link);
> + list_del_init(&obj->obj_exec_link);
> +
> + if (ret == 0) {
> + struct i915_vma *vma, *vma_next;
> +
> + list_for_each_entry_safe(vma, vma_next,
> + &obj->vma_list,
> + vma_link)
> + if (i915_vma_unbind(vma))
> + break;
> +
> + /* Stolen pins its pages to prevent the
> + * normal shrinker from processing stolen
> + * objects.
> + */
> + i915_gem_object_unpin_pages(obj);
> +
> + ret = i915_gem_object_put_pages(obj);
> + if (ret == 0) {
> + i915_gem_object_release_stolen(obj);
> + obj->madv = __I915_MADV_PURGED;
> + } else
> + i915_gem_object_pin_pages(obj);
Both blocks need curly braces if one has them.
With these two tiny details fixed,
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects
2015-07-01 15:06 ` Tvrtko Ursulin
@ 2015-07-01 16:19 ` Chris Wilson
2015-07-02 9:37 ` Tvrtko Ursulin
0 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2015-07-01 16:19 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: ankitprasad.r.sharma, intel-gfx, akash.goel, shashidhar.hiremath
On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote:
>
>
> On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> >From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> >Extend the drm_i915_gem_create structure to add support for
> >creating Stolen memory backed objects. Added a new flag through
> >which user can specify the preference to allocate the object from
> >stolen memory, which if set, an attempt will be made to allocate
> >the object from stolen memory subject to the availability of
> >free space in the stolen region.
> >
> >v2: Rebased to the latest drm-intel-nightly (Ankit)
> >
> >testcase: igt/gem_stolen
> >
> >Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >---
> > drivers/gpu/drm/i915/i915_dma.c | 3 +++
> > drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++----
> > include/uapi/drm/i915_drm.h | 15 +++++++++++++++
> > 3 files changed, 45 insertions(+), 4 deletions(-)
> >
> >diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> >index c5349fa..6045749 100644
> >--- a/drivers/gpu/drm/i915/i915_dma.c
> >+++ b/drivers/gpu/drm/i915/i915_dma.c
> >@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> > value = i915.enable_hangcheck &&
> > intel_has_gpu_reset(dev);
> > break;
> >+ case I915_PARAM_CREATE_VERSION:
> >+ value = 1;
>
> Shouldn't it be 2?
But 1 is the 2nd number, discounting all those pesky negative versions :)
> > /* Allocate the new object */
> >- obj = i915_gem_alloc_object(dev, size);
> >+ if (flags & I915_CREATE_PLACEMENT_STOLEN) {
> >+ mutex_lock(&dev->struct_mutex);
>
> Probably need the interruptible variant so userspace can Ctrl-C if
> things get stuck in submission/waiting.
Paulo has been working on removing this struct_mutex requirement, in
which case internally we will take a stolen_mutex around the drm_mm as
required.
> >+ obj = i915_gem_object_create_stolen(dev, size);
> >+ if (!obj) {
> >+ mutex_unlock(&dev->struct_mutex);
> >+ return -ENOMEM;
> >+ }
> >+
And pushes the struct_mutex to here (one day, one glorious day that will
be a vm->mutex or something!).
And yes, you will want, nay must use,
ret = i915_mutex_interruptible(dev);
before thinking about using the GPU.
> >+ ret = i915_gem_exec_clear_object(obj, file->driver_priv);
>
> I would put a comment here saying why it is important to clear
> stolen memory.
Userspace ABI (and kernel ABI in general) is that we do not hand back
uncleared buffers. Something to do with bank card details I guess.
So just:
/* always clear fresh buffers before handing to userspace */
An alternative is that I've been contemplating a private page pool to
reuse and not clear. It's a trade-off between having a large cache in
userspace, and a less flexible cache in the kernel with the supposed
advantage that the kernel cache could be more space efficient.
> >+ if (ret) {
> >+ i915_gem_object_free(obj);
>
> This should probably be drm_gem_object_unreference.
>
> >+ mutex_unlock(&dev->struct_mutex);
> >+ return ret;
> >+ }
> >+
> >+ mutex_unlock(&dev->struct_mutex);
> >+ } else
> >+ obj = i915_gem_alloc_object(dev, size);
>
> Need curly braces on both branches.
I am sure someone hacked CODING_STYLE. Or I should.
> >@@ -355,6 +355,7 @@ typedef struct drm_i915_irq_wait {
> > #define I915_PARAM_SUBSLICE_TOTAL 33
> > #define I915_PARAM_EU_TOTAL 34
> > #define I915_PARAM_HAS_GPU_RESET 35
> >+#define I915_PARAM_CREATE_VERSION 36
> >
> > typedef struct drm_i915_getparam {
> > int param;
> >@@ -450,6 +451,20 @@ struct drm_i915_gem_create {
> > */
> > __u32 handle;
> > __u32 pad;
> >+ /**
> >+ * Requested flags (currently used for placement
> >+ * (which memory domain))
> >+ *
> >+ * You can request that the object be created from special memory
> >+ * rather than regular system pages using this parameter. Such
> >+ * irregular objects may have certain restrictions (such as CPU
> >+ * access to a stolen object is verboten).
>
> I'd just use English all the way. :)
Heh!, English is highly adaptible language and steals good words all
the time!
> >+ *
> >+ * This can be used in the future for other purposes too
> >+ * e.g. specifying tiling/caching/madvise
> >+ */
> >+ __u32 flags;
> >+#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps or pread/pwrite */
Note that we dropped the pread/pwrite restriction.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects
2015-07-01 9:25 ` [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects ankitprasad.r.sharma
2015-07-01 15:06 ` Tvrtko Ursulin
@ 2015-07-01 16:20 ` Tvrtko Ursulin
1 sibling, 0 replies; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-01 16:20 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel, shashidhar.hiremath
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> Extend the drm_i915_gem_create structure to add support for
> creating Stolen memory backed objects. Added a new flag through
> which user can specify the preference to allocate the object from
> stolen memory, which if set, an attempt will be made to allocate
> the object from stolen memory subject to the availability of
> free space in the stolen region.
>
> v2: Rebased to the latest drm-intel-nightly (Ankit)
>
> testcase: igt/gem_stolen
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 3 +++
> drivers/gpu/drm/i915/i915_gem.c | 31 +++++++++++++++++++++++++++----
> include/uapi/drm/i915_drm.h | 15 +++++++++++++++
> 3 files changed, 45 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index c5349fa..6045749 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> value = i915.enable_hangcheck &&
> intel_has_gpu_reset(dev);
> break;
> + case I915_PARAM_CREATE_VERSION:
> + value = 1;
> + break;
> default:
> DRM_DEBUG("Unknown parameter %d\n", param->param);
> return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index a2a4a27..4acf331 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -391,7 +391,8 @@ static int
> i915_gem_create(struct drm_file *file,
> struct drm_device *dev,
> uint64_t size,
> - uint32_t *handle_p)
> + uint32_t *handle_p,
> + uint32_t flags)
> {
> struct drm_i915_gem_object *obj;
> int ret;
> @@ -401,8 +402,29 @@ i915_gem_create(struct drm_file *file,
> if (size == 0)
> return -EINVAL;
>
> + if (flags & ~(I915_CREATE_PLACEMENT_STOLEN))
> + return -EINVAL;
> +
> /* Allocate the new object */
> - obj = i915_gem_alloc_object(dev, size);
> + if (flags & I915_CREATE_PLACEMENT_STOLEN) {
> + mutex_lock(&dev->struct_mutex);
> + obj = i915_gem_object_create_stolen(dev, size);
One more thing here, size is u64 in this function but
i915_gem_object_create_stolen takes u32. Is compiler not noticing this?
(And i915_gem_alloc_object is size_t for a complete win!) :D
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
2015-07-01 14:54 ` Tvrtko Ursulin
@ 2015-07-01 16:30 ` Chris Wilson
2015-07-02 9:30 ` Tvrtko Ursulin
0 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2015-07-01 16:30 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: ankitprasad.r.sharma, intel-gfx, akash.goel, shashidhar.hiremath
On Wed, Jul 01, 2015 at 03:54:55PM +0100, Tvrtko Ursulin wrote:
>
> Hi,
>
> On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> >From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> >This patch adds support for clearing buffer objects via blitter
> >engines. This is particularly useful for clearing out the memory
> >from stolen region.
>
> Because CPU cannot access it? I would put that into the commit
> message since I think cover letter does not go into the git history.
>
> >v2: Add support for using execlists & PPGTT
> >
> >v3: Fix issues in legacy ringbuffer submission mode
> >
> >v4: Rebased to the latest drm-intel-nightly (Ankit)
> >
> >testcase: igt/gem_stolen
> >
>
> Nitpick: usually it is "Testcase:" and all tags grouped together.
>
> >Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> >Signed-off-by: Deepak S <deepak.s at linux.intel.com>
> >Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma at intel.com>
> >---
> > drivers/gpu/drm/i915/Makefile | 1 +
> > drivers/gpu/drm/i915/i915_drv.h | 4 +
> > drivers/gpu/drm/i915/i915_gem_exec.c | 201 ++++++++++++++++++++++++++++++++
> > drivers/gpu/drm/i915/intel_lrc.c | 4 +-
> > drivers/gpu/drm/i915/intel_lrc.h | 3 +
> > drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
> > drivers/gpu/drm/i915/intel_ringbuffer.h | 1 +
> > 7 files changed, 213 insertions(+), 3 deletions(-)
> > create mode 100644 drivers/gpu/drm/i915/i915_gem_exec.c
> >
> >diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> >index de21965..1959314 100644
> >--- a/drivers/gpu/drm/i915/Makefile
> >+++ b/drivers/gpu/drm/i915/Makefile
> >@@ -24,6 +24,7 @@ i915-y += i915_cmd_parser.o \
> > i915_gem_debug.o \
> > i915_gem_dmabuf.o \
> > i915_gem_evict.o \
> >+ i915_gem_exec.o \
> > i915_gem_execbuffer.o \
> > i915_gem_gtt.o \
> > i915_gem.o \
> >diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >index ea9caf2..d1e151e 100644
> >--- a/drivers/gpu/drm/i915/i915_drv.h
> >+++ b/drivers/gpu/drm/i915/i915_drv.h
> >@@ -3082,6 +3082,10 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
> > int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
> > int i915_gem_evict_everything(struct drm_device *dev);
> >
> >+/* i915_gem_exec.c */
> >+int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
> >+ struct drm_i915_file_private *file_priv);
> >+
> > /* belongs in i915_gem_gtt.h */
> > static inline void i915_gem_chipset_flush(struct drm_device *dev)
> > {
> >diff --git a/drivers/gpu/drm/i915/i915_gem_exec.c b/drivers/gpu/drm/i915/i915_gem_exec.c
> >new file mode 100644
> >index 0000000..a07fda0
> >--- /dev/null
> >+++ b/drivers/gpu/drm/i915/i915_gem_exec.c
> >@@ -0,0 +1,201 @@
> >+/*
> >+ * Copyright © 2013 Intel Corporation
>
> Is the year correct?
Yes, but it should be extended to include the lrc mess.
> >+ *
> >+ * Permission is hereby granted, free of charge, to any person obtaining a
> >+ * copy of this software and associated documentation files (the "Software"),
> >+ * to deal in the Software without restriction, including without limitation
> >+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> >+ * and/or sell copies of the Software, and to permit persons to whom the
> >+ * Software is furnished to do so, subject to the following conditions:
> >+ *
> >+ * The above copyright notice and this permission notice (including the next
> >+ * paragraph) shall be included in all copies or substantial portions of the
> >+ * Software.
> >+ *
> >+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> >+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> >+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> >+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> >+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> >+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> >+ * IN THE SOFTWARE.
> >+ *
> >+ * Authors:
> >+ * Chris Wilson <chris at chris-wilson.co.uk>
>
> And author?
Yes. If we discount the ugly changes to support two parallel ring
interface api's that are doing identical jobs.
> >+ *
> >+ */
> >+
> >+#include <drm/drmP.h>
> >+#include <drm/i915_drm.h>
> >+#include "i915_drv.h"
> >+
> >+#define GEN8_COLOR_BLT_CMD (2<<29 | 0x50<<22)
> >+
> >+#define BPP_8 0
> >+#define BPP_16 (1<<24)
> >+#define BPP_32 (1<<25 | 1<<24)
> >+
> >+#define ROP_FILL_COPY (0xf0 << 16)
> >+
> >+static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
> >+ struct intel_engine_cs *ring,
> >+ struct intel_context *ctx,
> >+ struct drm_i915_gem_request **req)
> >+{
> >+ int ret;
> >+
> >+ ret = i915_gem_object_sync(obj, ring, req);
> >+ if (ret)
> >+ return ret;
> >+
> >+ if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
> >+ if (i915_gem_clflush_object(obj, false))
> >+ i915_gem_chipset_flush(obj->base.dev);
> >+ obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU;
> >+ }
> >+ if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
> >+ wmb();
> >+ obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
> >+ }
>
> All this could be replaced with i915_gem_object_set_to_gtt_domain, no?
No. Technically this is i915_gem_execbuffer_move_to_gpu().
> >+
> >+ return i915.enable_execlists ?
> >+ logical_ring_invalidate_all_caches(*req) :
> >+ intel_ring_invalidate_all_caches(*req);
>
> And this is done on actual submission for you by the lower levels so
> you don't need to call it directly.
What submission? We don't build a batch, we are building a raw request
to do the operation from the ring.
> >+}
> >+
> >+static void i915_gem_exec_dirty_object(struct drm_i915_gem_object *obj,
> >+ struct intel_engine_cs *ring,
> >+ struct i915_address_space *vm,
> >+ struct drm_i915_gem_request *req)
> >+{
> >+ i915_gem_request_assign(&obj->last_write_req, req);
> >+ obj->base.read_domains = I915_GEM_DOMAIN_RENDER;
> >+ obj->base.write_domain = I915_GEM_DOMAIN_RENDER;
> >+ i915_vma_move_to_active(i915_gem_obj_to_vma(obj, vm), req);
> >+ obj->dirty = 1;
> >+
> >+ ring->gpu_caches_dirty = true;
> >+}
> >+
> >+int i915_gem_exec_clear_object(struct drm_i915_gem_object *obj,
> >+ struct drm_i915_file_private *file_priv)
> >+{
>
> Function needs some good kerneldoc.
>
> >+ struct drm_device *dev = obj->base.dev;
> >+ struct drm_i915_private *dev_priv = dev->dev_private;
> >+ struct intel_engine_cs *ring;
> >+ struct intel_context *ctx;
> >+ struct intel_ringbuffer *ringbuf;
> >+ struct i915_address_space *vm;
> >+ struct drm_i915_gem_request *req;
> >+ int ret = 0;
> >+
> >+ lockdep_assert_held(&dev->struct_mutex);
>
> It think there was some guidance that lockdep_assert_held is
> compiled out when lockdep is not in the kernel and that WARN_ON is
> preferred. In this case that would probably be WARN_ON_ONCE and
> return error.
Hah, this predates that and I still disagree.
> >+ ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
> >+ ctx = i915_gem_context_get(file_priv, DEFAULT_CONTEXT_HANDLE);
> >+ /* Allocate a request for this operation nice and early. */
> >+ ret = i915_gem_request_alloc(ring, ctx, &req);
> >+ if (ret)
> >+ return ret;
> >+
> >+ if (ctx->ppgtt)
> >+ vm = &ctx->ppgtt->base;
> >+ else
> >+ vm = &dev_priv->gtt.base;
> >+
> >+ if (i915.enable_execlists && !ctx->engine[ring->id].state) {
> >+ ret = intel_lr_context_deferred_create(ctx, ring);
>
> i915_gem_context_get above and this call are very similar to what
> i915_gem_validate_context does. It seems to me it would be better to
> call the latter function here.
No, the intel_lrc API is absolute garbage and needs to be taken out the
back and shot. Until that is done, I wouldn't bother continuing to try
and use the interface at all.
All that needs to happen here is:
req = i915_gem_request_alloc(ring, ring->default_context);
and for the request/lrc to go off and dtrt.
> >+ }
> >+
> >+ ringbuf = ctx->engine[ring->id].ringbuf;
> >+
> >+ ret = i915_gem_object_pin(obj, vm, PAGE_SIZE, 0);
> >+ if (ret)
> >+ return ret;
> >+
> >+ if (obj->tiling_mode && INTEL_INFO(dev)->gen <= 3) {
> >+ ret = i915_gem_object_put_fence(obj);
> >+ if (ret)
> >+ goto unpin;
> >+ }
>
> Why is this needed?
Because it's a requirement of the op being used on those gen. Later gen
can keep the fence.
> Could it be called unconditionally and still work?
>
> At least I would recommend a comment explaining it.
>
> >+ ret = i915_gem_exec_flush_object(obj, ring, ctx, &req);
> >+ if (ret)
> >+ goto unpin;
>
> As I said above one call to i915_gem_object_set_to_gtt_domain would
> be enough I think.
But wrong ;-)
> >+ if (i915.enable_execlists) {
> >+ if (dev_priv->info.gen >= 8) {
> >+ ret = intel_logical_ring_begin(req, 8);
> >+ if (ret)
> >+ goto unpin;
> >+
> >+ intel_logical_ring_emit(ringbuf, GEN8_COLOR_BLT_CMD |
> >+ BLT_WRITE_RGBA |
> >+ (7-2));
> >+ intel_logical_ring_emit(ringbuf, BPP_32 |
> >+ ROP_FILL_COPY |
> >+ PAGE_SIZE);
> >+ intel_logical_ring_emit(ringbuf, 0);
> >+ intel_logical_ring_emit(ringbuf,
> >+ obj->base.size >> PAGE_SHIFT
> >+ << 16 | PAGE_SIZE / 4);
> >+ intel_logical_ring_emit(ringbuf,
> >+ i915_gem_obj_offset(obj, vm));
> >+ intel_logical_ring_emit(ringbuf, 0);
> >+ intel_logical_ring_emit(ringbuf, 0);
> >+ intel_logical_ring_emit(ringbuf, MI_NOOP);
> >+
> >+ intel_logical_ring_advance(ringbuf);
> >+ } else {
> >+ DRM_ERROR("Execlists not supported for gen %d\n",
> >+ dev_priv->info.gen);
> >+ ret = -EINVAL;
>
> I would put a WARN_ON_ONCE here, or even just return -EINVAL. If the
> driver is so messed up in general that execlists are enabled < gen8
> I think there is no point logging errors about it from here. Would
> also save you one indentation level.
I would just rewrite this to have a logical interface to the rings. Oh
wait, I did.
> >+ __intel_ring_advance(ring);
> >+ }
> >+
> >+ i915_gem_exec_dirty_object(obj, ring, vm, req);
>
> Where is this request actually submitted?
True, that's a rebase error.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
2015-07-01 16:30 ` Chris Wilson
@ 2015-07-02 9:30 ` Tvrtko Ursulin
2015-07-02 9:50 ` Chris Wilson
0 siblings, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-02 9:30 UTC (permalink / raw)
To: Chris Wilson, ankitprasad.r.sharma, intel-gfx, akash.goel,
shashidhar.hiremath
On 07/01/2015 05:30 PM, Chris Wilson wrote:
> On Wed, Jul 01, 2015 at 03:54:55PM +0100, Tvrtko Ursulin wrote:
>>> +static int i915_gem_exec_flush_object(struct drm_i915_gem_object *obj,
>>> + struct intel_engine_cs *ring,
>>> + struct intel_context *ctx,
>>> + struct drm_i915_gem_request **req)
>>> +{
>>> + int ret;
>>> +
>>> + ret = i915_gem_object_sync(obj, ring, req);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) {
>>> + if (i915_gem_clflush_object(obj, false))
>>> + i915_gem_chipset_flush(obj->base.dev);
>>> + obj->base.write_domain &= ~I915_GEM_DOMAIN_CPU;
>>> + }
>>> + if (obj->base.write_domain & I915_GEM_DOMAIN_GTT) {
>>> + wmb();
>>> + obj->base.write_domain &= ~I915_GEM_DOMAIN_GTT;
>>> + }
>>
>> All this could be replaced with i915_gem_object_set_to_gtt_domain, no?
>
> No. Technically this is i915_gem_execbuffer_move_to_gpu().
Aha.. I see now what was my confusion. It doesn't help that
i915_gem_execbuffer_move_to_gpu and execlist_move_to_gpu are implemented
at different places logically.
It would be nice to extract the loop body then call it something like
i915_gem_execbuffer_move_vma_to_gpu, it would avoid at least three
instances of the same code.
>>> +
>>> + return i915.enable_execlists ?
>>> + logical_ring_invalidate_all_caches(*req) :
>>> + intel_ring_invalidate_all_caches(*req);
>>
>> And this is done on actual submission for you by the lower levels so
>> you don't need to call it directly.
>
> What submission? We don't build a batch, we are building a raw request
> to do the operation from the ring.
I was confused to where execlist_move_to_gpu is in the stack.
>>> + lockdep_assert_held(&dev->struct_mutex);
>>
>> It think there was some guidance that lockdep_assert_held is
>> compiled out when lockdep is not in the kernel and that WARN_ON is
>> preferred. In this case that would probably be WARN_ON_ONCE and
>> return error.
>
> Hah, this predates that and I still disagree.
Predates or not is not relevant. :) It is not a clean cut situation I
agree. Maybe we need our own amalgamation on WARN_ON_ONCE and
lockdep_assert_held but I think we either check for these things or not,
or have a really good assurance of test coverage with lockdep enabled
during QA.
>>> + ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
>>> + ctx = i915_gem_context_get(file_priv, DEFAULT_CONTEXT_HANDLE);
>>> + /* Allocate a request for this operation nice and early. */
>>> + ret = i915_gem_request_alloc(ring, ctx, &req);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + if (ctx->ppgtt)
>>> + vm = &ctx->ppgtt->base;
>>> + else
>>> + vm = &dev_priv->gtt.base;
>>> +
>>> + if (i915.enable_execlists && !ctx->engine[ring->id].state) {
>>> + ret = intel_lr_context_deferred_create(ctx, ring);
>>
>> i915_gem_context_get above and this call are very similar to what
>> i915_gem_validate_context does. It seems to me it would be better to
>> call the latter function here.
>
> No, the intel_lrc API is absolute garbage and needs to be taken out the
> back and shot. Until that is done, I wouldn't bother continuing to try
> and use the interface at all.
>
> All that needs to happen here is:
>
> req = i915_gem_request_alloc(ring, ring->default_context);
>
> and for the request/lrc to go off and dtrt.
Well.. I the meantime why duplicate it when i915_gem_validate_context
does i915_gem_context_get and deferred create if needed. I don't see the
downside. Snippet from above becomes:
ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
ctx = i915_gem_validate_context(dev, file, ring,
DFAULT_CONTEXT_HANDLE);
... handle error...
/* Allocate a request for this operation nice and early. */
ret = i915_gem_request_alloc(ring, ctx, &req);
Why would this code have to know about deferred create.
>>> + }
>>> +
>>> + ringbuf = ctx->engine[ring->id].ringbuf;
>>> +
>>> + ret = i915_gem_object_pin(obj, vm, PAGE_SIZE, 0);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + if (obj->tiling_mode && INTEL_INFO(dev)->gen <= 3) {
>>> + ret = i915_gem_object_put_fence(obj);
>>> + if (ret)
>>> + goto unpin;
>>> + }
>>
>> Why is this needed?
>
> Because it's a requirement of the op being used on those gen. Later gen
> can keep the fence.
>
>> Could it be called unconditionally and still work?
>>
>> At least I would recommend a comment explaining it.
It is ugly to sprinkle platform knowledge to the callers - I think I saw
a callsites which call i915_gem_object_put_fence unconditionally so why
would that not work here?
>>> + if (i915.enable_execlists) {
>>> + if (dev_priv->info.gen >= 8) {
>>> + ret = intel_logical_ring_begin(req, 8);
>>> + if (ret)
>>> + goto unpin;
>>> +
>>> + intel_logical_ring_emit(ringbuf, GEN8_COLOR_BLT_CMD |
>>> + BLT_WRITE_RGBA |
>>> + (7-2));
>>> + intel_logical_ring_emit(ringbuf, BPP_32 |
>>> + ROP_FILL_COPY |
>>> + PAGE_SIZE);
>>> + intel_logical_ring_emit(ringbuf, 0);
>>> + intel_logical_ring_emit(ringbuf,
>>> + obj->base.size >> PAGE_SHIFT
>>> + << 16 | PAGE_SIZE / 4);
>>> + intel_logical_ring_emit(ringbuf,
>>> + i915_gem_obj_offset(obj, vm));
>>> + intel_logical_ring_emit(ringbuf, 0);
>>> + intel_logical_ring_emit(ringbuf, 0);
>>> + intel_logical_ring_emit(ringbuf, MI_NOOP);
>>> +
>>> + intel_logical_ring_advance(ringbuf);
>>> + } else {
>>> + DRM_ERROR("Execlists not supported for gen %d\n",
>>> + dev_priv->info.gen);
>>> + ret = -EINVAL;
>>
>> I would put a WARN_ON_ONCE here, or even just return -EINVAL. If the
>> driver is so messed up in general that execlists are enabled < gen8
>> I think there is no point logging errors about it from here. Would
>> also save you one indentation level.
>
> I would just rewrite this to have a logical interface to the rings. Oh
> wait, I did.
That is out of my jurisdiction, but I think my comment to the above is
not an unreasonable one since it indicates total driver confusion and
could/should be handled somewhere else.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects
2015-07-01 16:19 ` Chris Wilson
@ 2015-07-02 9:37 ` Tvrtko Ursulin
2015-07-02 9:43 ` Chris Wilson
0 siblings, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-02 9:37 UTC (permalink / raw)
To: Chris Wilson, ankitprasad.r.sharma, intel-gfx, akash.goel,
shashidhar.hiremath
On 07/01/2015 05:19 PM, Chris Wilson wrote:
> On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote:
>>> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
>>> index c5349fa..6045749 100644
>>> --- a/drivers/gpu/drm/i915/i915_dma.c
>>> +++ b/drivers/gpu/drm/i915/i915_dma.c
>>> @@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
>>> value = i915.enable_hangcheck &&
>>> intel_has_gpu_reset(dev);
>>> break;
>>> + case I915_PARAM_CREATE_VERSION:
>>> + value = 1;
>>
>> Shouldn't it be 2?
>
> But 1 is the 2nd number, discounting all those pesky negative versions :)
It would be more obvious I think, even though I915_PARAM_CREATE_VERSION
which returns 1 would never exist.
>>> + ret = i915_gem_exec_clear_object(obj, file->driver_priv);
>>
>> I would put a comment here saying why it is important to clear
>> stolen memory.
>
> Userspace ABI (and kernel ABI in general) is that we do not hand back
> uncleared buffers. Something to do with bank card details I guess.
> So just:
Yes thats obvious - but where it is done for normal objects? Can't find
it... is it hidden in shmemfs somewhere? If so reinforces the need for a
comment here.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects
2015-07-02 9:37 ` Tvrtko Ursulin
@ 2015-07-02 9:43 ` Chris Wilson
0 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2015-07-02 9:43 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: ankitprasad.r.sharma, intel-gfx, akash.goel, shashidhar.hiremath
On Thu, Jul 02, 2015 at 10:37:56AM +0100, Tvrtko Ursulin wrote:
>
> On 07/01/2015 05:19 PM, Chris Wilson wrote:
> >On Wed, Jul 01, 2015 at 04:06:49PM +0100, Tvrtko Ursulin wrote:
> >>>diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> >>>index c5349fa..6045749 100644
> >>>--- a/drivers/gpu/drm/i915/i915_dma.c
> >>>+++ b/drivers/gpu/drm/i915/i915_dma.c
> >>>@@ -167,6 +167,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> >>> value = i915.enable_hangcheck &&
> >>> intel_has_gpu_reset(dev);
> >>> break;
> >>>+ case I915_PARAM_CREATE_VERSION:
> >>>+ value = 1;
> >>
> >>Shouldn't it be 2?
> >
> >But 1 is the 2nd number, discounting all those pesky negative versions :)
>
> It would be more obvious I think, even though
> I915_PARAM_CREATE_VERSION which returns 1 would never exist.
I don't disagree. I was just used to always starting my versions at 1, 0
being the unversioned.
> >>>+ ret = i915_gem_exec_clear_object(obj, file->driver_priv);
> >>
> >>I would put a comment here saying why it is important to clear
> >>stolen memory.
> >
> >Userspace ABI (and kernel ABI in general) is that we do not hand back
> >uncleared buffers. Something to do with bank card details I guess.
> >So just:
>
> Yes thats obvious - but where it is done for normal objects? Can't
> find it... is it hidden in shmemfs somewhere? If so reinforces the
> need for a comment here.
Yes, it is deep within shmemfs, shmem_getpage_gfp() will call
clear_highpage() on new pages. It is an unfortunate cost that we want to
avoid for frequently allocated internal objects (such as contexts, pdp)
where we just want a simple allocator rather than shmemfs.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
2015-07-02 9:30 ` Tvrtko Ursulin
@ 2015-07-02 9:50 ` Chris Wilson
2015-07-07 7:42 ` Ankitprasad Sharma
0 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2015-07-02 9:50 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: ankitprasad.r.sharma, intel-gfx, akash.goel, shashidhar.hiremath
On Thu, Jul 02, 2015 at 10:30:43AM +0100, Tvrtko Ursulin wrote:
> Well.. I the meantime why duplicate it when
> i915_gem_validate_context does i915_gem_context_get and deferred
> create if needed. I don't see the downside. Snippet from above
> becomes:
>
> ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
> ctx = i915_gem_validate_context(dev, file, ring,
> DFAULT_CONTEXT_HANDLE);
> ... handle error...
> /* Allocate a request for this operation nice and early. */
> ret = i915_gem_request_alloc(ring, ctx, &req);
>
> Why would this code have to know about deferred create.
This one is irrelevant. Since the default_context is always allocated
and available via the ring, we don't need to pretend we are inside
userspace and do the idr lookup and validation, we can just use it
directly.
> >>Why is this needed?
> >
> >Because it's a requirement of the op being used on those gen. Later gen
> >can keep the fence.
> >
> >>Could it be called unconditionally and still work?
> >>
> >>At least I would recommend a comment explaining it.
>
> It is ugly to sprinkle platform knowledge to the callers - I think I
> saw a callsites which call i915_gem_object_put_fence unconditionally
> so why would that not work here?
That's access via the GTT though. This is access via the GPU using GPU
instructions, which sometimes use fences and sometimes don't. That
knowledge is already baked into the choice of command.
What I would actually support would be to just use CPU GTT clearing.
That will run at memory speeds, only stall for a small fraction of the
second, and later if the workloads demand it, we can do GPU clearing.
It's much simpler, and I would say for any real workload the stolen
objects will be cached to avoid reallocations.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-01 9:25 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects ankitprasad.r.sharma
2015-07-01 9:54 ` Chris Wilson
@ 2015-07-02 10:42 ` Tvrtko Ursulin
2015-07-02 11:00 ` Chris Wilson
2015-07-03 5:07 ` shuang.he
2 siblings, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-02 10:42 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel, shashidhar.hiremath
On 07/01/2015 10:25 AM, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> This patch adds support for extending the pread/pwrite functionality
> for objects not backed by shmem. The access will be made through
> gtt interface.
> This will cover prime objects as well as stolen memory backed objects
> but for userptr objects it is still forbidden.
>
> v2: drop locks around slow_user_access, prefault the pages before
> access (Chris)
>
> v3: Rebased to the latest drm-intel-nightly (Ankit)
>
> testcase: igt/gem_stolen
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 137 +++++++++++++++++++++++++++++++++++-----
> 1 file changed, 120 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 4acf331..4be6eb4 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -629,6 +629,102 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
> return ret ? - EFAULT : 0;
> }
>
> +static inline int
> +slow_user_access(struct io_mapping *mapping,
> + loff_t page_base, int page_offset,
> + char __user *user_data,
> + int length, bool write)
> +{
> + void __iomem *vaddr_inatomic;
> + void *vaddr;
> + unsigned long unwritten;
> +
> + vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> + /* We can use the cpu mem copy function because this is X86. */
> + vaddr = (void __force *)vaddr_inatomic + page_offset;
> + if (write)
> + unwritten = __copy_from_user(vaddr, user_data, length);
> + else
> + unwritten = __copy_to_user(user_data, vaddr, length);
> +
> + io_mapping_unmap(vaddr_inatomic);
> + return unwritten;
> +}
I am not super familiar with low level mapping business. But it looks
correct to me. Just one question would be if there are any downsides to
WC mapping? If in the read case it would be any advantage not to ask for WC?
> +static int
> +i915_gem_gtt_pread_pwrite(struct drm_device *dev,
> + struct drm_i915_gem_object *obj, uint64_t size,
> + uint64_t data_offset, uint64_t data_ptr, bool write)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + char __user *user_data;
> + ssize_t remain;
> + loff_t offset, page_base;
> + int page_offset, page_length, ret = 0;
> +
> + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> + if (ret)
> + goto out;
> +
> + ret = i915_gem_object_set_to_gtt_domain(obj, write);
> + if (ret)
> + goto out_unpin;
> +
> + ret = i915_gem_object_put_fence(obj);
> + if (ret)
> + goto out_unpin;
> +
> + user_data = to_user_ptr(data_ptr);
> + remain = size;
Strictly speaking uint64_t can overflow ssize_t, compiler does not care
in this case?
> +
> + offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> +
> + if (write)
> + intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> +
> + mutex_unlock(&dev->struct_mutex);
> + if (!write && likely(!i915.prefault_disable))
> + ret = fault_in_multipages_writeable(user_data, remain);
A bit confusing read/write inversion. :) But correct. Just wondering if
it would make sense to invert the boolean at least in slow_user_access.
Or in fact just call it pwrite instead of write to reflect pwrite ==
true is _reading_ from user memory.
> + while (remain > 0) {
> + /* Operation in this page
> + *
> + * page_base = page offset within aperture
> + * page_offset = offset within page
> + * page_length = bytes to copy for this page
> + */
> + page_base = offset & PAGE_MASK;
> + page_offset = offset_in_page(offset);
> + page_length = remain;
> + if ((page_offset + remain) > PAGE_SIZE)
> + page_length = PAGE_SIZE - page_offset;
It would save some arithmetics and branching to pull out the first
potentially unaligned copy out of the loop and then page_offset would be
zero for 2nd page onwards.
> + /* This is a slow read/write as it tries to read from
> + * and write to user memory which may result into page
> + * faults
> + */
> + ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> + page_offset, user_data,
> + page_length, write);
> +
> + if (ret) {
> + ret = -EINVAL;
> + break;
> + }
> +
> + remain -= page_length;
> + user_data += page_length;
> + offset += page_length;
> + }
> +
> + mutex_lock(&dev->struct_mutex);
Caller had it interruptible.
> +
> +out_unpin:
> + i915_gem_object_ggtt_unpin(obj);
> +out:
> + return ret;
> +}
> +
> static int
> i915_gem_shmem_pread(struct drm_device *dev,
> struct drm_i915_gem_object *obj,
> @@ -752,17 +848,19 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
> goto out;
> }
>
> - /* prime objects have no backing filp to GEM pread/pwrite
> - * pages from.
> - */
> - if (!obj->base.filp) {
> - ret = -EINVAL;
> - goto out;
> - }
> -
> trace_i915_gem_object_pread(obj, args->offset, args->size);
>
> - ret = i915_gem_shmem_pread(dev, obj, args, file);
> + /* pread for non shmem backed objects */
> + if (!obj->base.filp) {
> + if (obj->tiling_mode == I915_TILING_NONE)
> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
> + args->offset,
> + args->data_ptr,
> + false);
> + else
> + ret = -EINVAL;
> + } else
> + ret = i915_gem_shmem_pread(dev, obj, args, file);
Same coding style disagreement on whether or not both blocks should have
braces.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-02 10:42 ` Tvrtko Ursulin
@ 2015-07-02 11:00 ` Chris Wilson
2015-07-02 11:27 ` Tvrtko Ursulin
0 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2015-07-02 11:00 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: ankitprasad.r.sharma, intel-gfx, akash.goel, shashidhar.hiremath
On Thu, Jul 02, 2015 at 11:42:44AM +0100, Tvrtko Ursulin wrote:
> I am not super familiar with low level mapping business. But it
> looks correct to me. Just one question would be if there are any
> downsides to WC mapping? If in the read case it would be any
> advantage not to ask for WC?
Read-side WC behaves as UC. It's bad, but since we have no other
coherent access we have no choice. We rely on userspace not to do this,
and our powers of persuasion that copying to a coherent buffer first
using the GPU and then using the CPU for readback is about 8x faster
(incl. the sync overhead) then having to use UC.
> >+static int
> >+i915_gem_gtt_pread_pwrite(struct drm_device *dev,
> >+ struct drm_i915_gem_object *obj, uint64_t size,
> >+ uint64_t data_offset, uint64_t data_ptr, bool write)
> >+{
> >+ struct drm_i915_private *dev_priv = dev->dev_private;
> >+ char __user *user_data;
> >+ ssize_t remain;
> >+ loff_t offset, page_base;
> >+ int page_offset, page_length, ret = 0;
> >+
> >+ ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> >+ if (ret)
> >+ goto out;
> >+
> >+ ret = i915_gem_object_set_to_gtt_domain(obj, write);
> >+ if (ret)
> >+ goto out_unpin;
> >+
> >+ ret = i915_gem_object_put_fence(obj);
> >+ if (ret)
> >+ goto out_unpin;
> >+
> >+ user_data = to_user_ptr(data_ptr);
> >+ remain = size;
>
> Strictly speaking uint64_t can overflow ssize_t, compiler does not
> care in this case?
compiler is too quiet at times. Killing ssize_t and even loff_t here
makes sense.
> >+ offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> >+
> >+ if (write)
> >+ intel_fb_obj_invalidate(obj, ORIGIN_GTT);
Smells wrong, better off combining the invalidate with the right flags
in the caller.
> >+ mutex_unlock(&dev->struct_mutex);
> >+ if (!write && likely(!i915.prefault_disable))
> >+ ret = fault_in_multipages_writeable(user_data, remain);
>
> A bit confusing read/write inversion. :) But correct. Just wondering
> if it would make sense to invert the boolean at least in
> slow_user_access. Or in fact just call it pwrite instead of write to
> reflect pwrite == true is _reading_ from user memory.
>
> >+ while (remain > 0) {
> >+ /* Operation in this page
> >+ *
> >+ * page_base = page offset within aperture
> >+ * page_offset = offset within page
> >+ * page_length = bytes to copy for this page
> >+ */
> >+ page_base = offset & PAGE_MASK;
> >+ page_offset = offset_in_page(offset);
> >+ page_length = remain;
> >+ if ((page_offset + remain) > PAGE_SIZE)
> >+ page_length = PAGE_SIZE - page_offset;
>
> It would save some arithmetics and branching to pull out the first
> potentially unaligned copy out of the loop and then page_offset
> would be zero for 2nd page onwards.
Honestly, keeping the loop simple is preferable :) What I do locally if
page_offset = offset_in_page();
offset &= PAGE_MASK;
while (remain > 0) {
len = min(remain, PAGE_SIZE - page_offset);
....
user_data += len;
remain -= len;
offset += PAGE_SIZE;
page_offset = 0;
}
>
> >+ /* This is a slow read/write as it tries to read from
> >+ * and write to user memory which may result into page
> >+ * faults
> >+ */
> >+ ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> >+ page_offset, user_data,
> >+ page_length, write);
> >+
> >+ if (ret) {
> >+ ret = -EINVAL;
Would be better to return the right errno from slow_user_access or if it
is a bool, treat is a bool!
> >+ break;
> >+ }
> >+
> >+ remain -= page_length;
> >+ user_data += page_length;
> >+ offset += page_length;
> >+ }
> >+
> >+ mutex_lock(&dev->struct_mutex);
>
> Caller had it interruptible.
On second access, it is preferrable not to risk interruption -
especially when catching up with state to return to userspace.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-02 11:00 ` Chris Wilson
@ 2015-07-02 11:27 ` Tvrtko Ursulin
2015-07-02 11:58 ` Chris Wilson
0 siblings, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-02 11:27 UTC (permalink / raw)
To: Chris Wilson, ankitprasad.r.sharma, intel-gfx, akash.goel,
shashidhar.hiremath
On 07/02/2015 12:00 PM, Chris Wilson wrote:
>>> + /* This is a slow read/write as it tries to read from
>>> + * and write to user memory which may result into page
>>> + * faults
>>> + */
>>> + ret = slow_user_access(dev_priv->gtt.mappable, page_base,
>>> + page_offset, user_data,
>>> + page_length, write);
>>> +
>>> + if (ret) {
>>> + ret = -EINVAL;
>
> Would be better to return the right errno from slow_user_access or if it
> is a bool, treat is a bool!
It returns a number of unwritten bytes.
Actually just spotted the return type for slow_user_access is also
wrong, it is an int, to which it implicitly casts an unsigned long.
And it is customary to return -EFAULT on rather than -EINVAL so I
suggest changing that as well.
>>> + break;
>>> + }
>>> +
>>> + remain -= page_length;
>>> + user_data += page_length;
>>> + offset += page_length;
>>> + }
>>> +
>>> + mutex_lock(&dev->struct_mutex);
>>
>> Caller had it interruptible.
>
> On second access, it is preferrable not to risk interruption -
> especially when catching up with state to return to userspace.
Second access is only in pwrite case and more so, it makes no sense to
make such decisions on behalf of the caller. If it is a real concern,
caller should do it.
Yes I see that there is a problem where this helper doesn't know what
type of mutex caller had, but, it is still bad imho and could be fixed
in a different way.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-02 11:27 ` Tvrtko Ursulin
@ 2015-07-02 11:58 ` Chris Wilson
0 siblings, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2015-07-02 11:58 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: ankitprasad.r.sharma, intel-gfx, akash.goel, shashidhar.hiremath
On Thu, Jul 02, 2015 at 12:27:42PM +0100, Tvrtko Ursulin wrote:
>
> On 07/02/2015 12:00 PM, Chris Wilson wrote:
> >>>+ /* This is a slow read/write as it tries to read from
> >>>+ * and write to user memory which may result into page
> >>>+ * faults
> >>>+ */
> >>>+ ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> >>>+ page_offset, user_data,
> >>>+ page_length, write);
> >>>+
> >>>+ if (ret) {
> >>>+ ret = -EINVAL;
> >
> >Would be better to return the right errno from slow_user_access or if it
> >is a bool, treat is a bool!
>
> It returns a number of unwritten bytes.
>
> Actually just spotted the return type for slow_user_access is also
> wrong, it is an int, to which it implicitly casts an unsigned long.
Ugh, our convention for those has to been to do the unwritten check in
the subroutine and convert it back to the errno. Saves all the guessing
about the failure mode in the caller.
> And it is customary to return -EFAULT on rather than -EINVAL so I
> suggest changing that as well.
Yup.
> >>>+ break;
> >>>+ }
> >>>+
> >>>+ remain -= page_length;
> >>>+ user_data += page_length;
> >>>+ offset += page_length;
> >>>+ }
> >>>+
> >>>+ mutex_lock(&dev->struct_mutex);
> >>
> >>Caller had it interruptible.
> >
> >On second access, it is preferrable not to risk interruption -
> >especially when catching up with state to return to userspace.
>
>
> Second access is only in pwrite case and more so, it makes no sense
> to make such decisions on behalf of the caller. If it is a real
> concern, caller should do it.
That's odd. We must have taken it on both pwrite/pread paths to have
prepared the object for reading/writing. We can only dropped it under
difficult circumstances (lots of dancing required if we do to make sure
that the object hasn't been modified in the meantime).
> Yes I see that there is a problem where this helper doesn't know
> what type of mutex caller had, but, it is still bad imho and could
> be fixed in a different way.
Smells fishy indeed.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-01 9:25 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects ankitprasad.r.sharma
2015-07-01 9:54 ` Chris Wilson
2015-07-02 10:42 ` Tvrtko Ursulin
@ 2015-07-03 5:07 ` shuang.he
2 siblings, 0 replies; 36+ messages in thread
From: shuang.he @ 2015-07-03 5:07 UTC (permalink / raw)
To: shuang.he, lei.a.liu, intel-gfx, ankitprasad.r.sharma
Tested-By: Intel Graphics QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6697
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
ILK 302/302 302/302
SNB 312/316 312/316
IVB 343/343 343/343
BYT -1 287/287 286/287
HSW 380/380 380/380
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
*BYT igt@gem_tiled_partial_pwrite_pread@reads PASS(1) FAIL(1)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
2015-07-02 9:50 ` Chris Wilson
@ 2015-07-07 7:42 ` Ankitprasad Sharma
2015-07-07 8:46 ` Chris Wilson
0 siblings, 1 reply; 36+ messages in thread
From: Ankitprasad Sharma @ 2015-07-07 7:42 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, akash.goel, shashidhar.hiremath
On Thu, 2015-07-02 at 10:50 +0100, Chris Wilson wrote:
> On Thu, Jul 02, 2015 at 10:30:43AM +0100, Tvrtko Ursulin wrote:
> > Well.. I the meantime why duplicate it when
> > i915_gem_validate_context does i915_gem_context_get and deferred
> > create if needed. I don't see the downside. Snippet from above
> > becomes:
> >
> > ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
> > ctx = i915_gem_validate_context(dev, file, ring,
> > DFAULT_CONTEXT_HANDLE);
> > ... handle error...
> > /* Allocate a request for this operation nice and early. */
> > ret = i915_gem_request_alloc(ring, ctx, &req);
> >
> > Why would this code have to know about deferred create.
>
> This one is irrelevant. Since the default_context is always allocated
> and available via the ring, we don't need to pretend we are inside
> userspace and do the idr lookup and validation, we can just use it
> directly.
>
> > >>Why is this needed?
> > >
> > >Because it's a requirement of the op being used on those gen. Later gen
> > >can keep the fence.
> > >
> > >>Could it be called unconditionally and still work?
> > >>
> > >>At least I would recommend a comment explaining it.
> >
> > It is ugly to sprinkle platform knowledge to the callers - I think I
> > saw a callsites which call i915_gem_object_put_fence unconditionally
> > so why would that not work here?
>
> That's access via the GTT though. This is access via the GPU using GPU
> instructions, which sometimes use fences and sometimes don't. That
> knowledge is already baked into the choice of command.
>
> What I would actually support would be to just use CPU GTT clearing.
But, we have verified earlier here that for large objects GPU clearing
is faster (here hoping that stolen region will be used mainly for
framebuffers). Is it ok to do this conditionally based on the size of
the objects? and GPU clearing is asynchronous.
> That will run at memory speeds, only stall for a small fraction of the
> second, and later if the workloads demand it, we can do GPU clearing.
Also how do you suggest to bring the workload in as a deciding factor?
may be checking the current running frequency or based on the number of
pending requests?
Or are you suggesting to use CPU GTT clearing completely?
>
> It's much simpler, and I would say for any real workload the stolen
> objects will be cached to avoid reallocations.
> -Chris
>
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
2015-07-07 7:42 ` Ankitprasad Sharma
@ 2015-07-07 8:46 ` Chris Wilson
2015-07-07 8:52 ` Ankitprasad Sharma
0 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2015-07-07 8:46 UTC (permalink / raw)
To: Ankitprasad Sharma; +Cc: intel-gfx, akash.goel, shashidhar.hiremath
On Tue, Jul 07, 2015 at 01:12:11PM +0530, Ankitprasad Sharma wrote:
> On Thu, 2015-07-02 at 10:50 +0100, Chris Wilson wrote:
> > On Thu, Jul 02, 2015 at 10:30:43AM +0100, Tvrtko Ursulin wrote:
> > > Well.. I the meantime why duplicate it when
> > > i915_gem_validate_context does i915_gem_context_get and deferred
> > > create if needed. I don't see the downside. Snippet from above
> > > becomes:
> > >
> > > ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
> > > ctx = i915_gem_validate_context(dev, file, ring,
> > > DFAULT_CONTEXT_HANDLE);
> > > ... handle error...
> > > /* Allocate a request for this operation nice and early. */
> > > ret = i915_gem_request_alloc(ring, ctx, &req);
> > >
> > > Why would this code have to know about deferred create.
> >
> > This one is irrelevant. Since the default_context is always allocated
> > and available via the ring, we don't need to pretend we are inside
> > userspace and do the idr lookup and validation, we can just use it
> > directly.
> >
> > > >>Why is this needed?
> > > >
> > > >Because it's a requirement of the op being used on those gen. Later gen
> > > >can keep the fence.
> > > >
> > > >>Could it be called unconditionally and still work?
> > > >>
> > > >>At least I would recommend a comment explaining it.
> > >
> > > It is ugly to sprinkle platform knowledge to the callers - I think I
> > > saw a callsites which call i915_gem_object_put_fence unconditionally
> > > so why would that not work here?
> >
> > That's access via the GTT though. This is access via the GPU using GPU
> > instructions, which sometimes use fences and sometimes don't. That
> > knowledge is already baked into the choice of command.
> >
> > What I would actually support would be to just use CPU GTT clearing.
> But, we have verified earlier here that for large objects GPU clearing
> is faster (here hoping that stolen region will be used mainly for
> framebuffers). Is it ok to do this conditionally based on the size of
> the objects? and GPU clearing is asynchronous.
Hmm, this will be the GTT overhead which we can't avoid since we are
banned from accessing stolen directly (on byt).
Honestly this is the ugliest patch in the series. If we can do without
it it would make accepting it much easier. And then you have a nice
performance argument for doing via the blitter. Though I would be
surprised if the userspace cache was doing such a bad job that frequent
reallocations from stolen were required.
> > That will run at memory speeds, only stall for a small fraction of the
> > second, and later if the workloads demand it, we can do GPU clearing.
> Also how do you suggest to bring the workload in as a deciding factor?
> may be checking the current running frequency or based on the number of
> pending requests?
> Or are you suggesting to use CPU GTT clearing completely?
> >
> > It's much simpler, and I would say for any real workload the stolen
> > objects will be cached to avoid reallocations.
Yes. A quick patch to do an unconditional memset() of a pinned GTT
stolen object should be about 20 lines. For the benefit of getting
create2 upsteam with little fuss, I think that is worth it.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine
2015-07-07 8:46 ` Chris Wilson
@ 2015-07-07 8:52 ` Ankitprasad Sharma
0 siblings, 0 replies; 36+ messages in thread
From: Ankitprasad Sharma @ 2015-07-07 8:52 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, akash.goel, shashidhar.hiremath
On Tue, 2015-07-07 at 09:46 +0100, Chris Wilson wrote:
> On Tue, Jul 07, 2015 at 01:12:11PM +0530, Ankitprasad Sharma wrote:
> > On Thu, 2015-07-02 at 10:50 +0100, Chris Wilson wrote:
> > > On Thu, Jul 02, 2015 at 10:30:43AM +0100, Tvrtko Ursulin wrote:
> > > > Well.. I the meantime why duplicate it when
> > > > i915_gem_validate_context does i915_gem_context_get and deferred
> > > > create if needed. I don't see the downside. Snippet from above
> > > > becomes:
> > > >
> > > > ring = &dev_priv->ring[HAS_BLT(dev) ? BCS : RCS];
> > > > ctx = i915_gem_validate_context(dev, file, ring,
> > > > DFAULT_CONTEXT_HANDLE);
> > > > ... handle error...
> > > > /* Allocate a request for this operation nice and early. */
> > > > ret = i915_gem_request_alloc(ring, ctx, &req);
> > > >
> > > > Why would this code have to know about deferred create.
> > >
> > > This one is irrelevant. Since the default_context is always allocated
> > > and available via the ring, we don't need to pretend we are inside
> > > userspace and do the idr lookup and validation, we can just use it
> > > directly.
> > >
> > > > >>Why is this needed?
> > > > >
> > > > >Because it's a requirement of the op being used on those gen. Later gen
> > > > >can keep the fence.
> > > > >
> > > > >>Could it be called unconditionally and still work?
> > > > >>
> > > > >>At least I would recommend a comment explaining it.
> > > >
> > > > It is ugly to sprinkle platform knowledge to the callers - I think I
> > > > saw a callsites which call i915_gem_object_put_fence unconditionally
> > > > so why would that not work here?
> > >
> > > That's access via the GTT though. This is access via the GPU using GPU
> > > instructions, which sometimes use fences and sometimes don't. That
> > > knowledge is already baked into the choice of command.
> > >
> > > What I would actually support would be to just use CPU GTT clearing.
> > But, we have verified earlier here that for large objects GPU clearing
> > is faster (here hoping that stolen region will be used mainly for
> > framebuffers). Is it ok to do this conditionally based on the size of
> > the objects? and GPU clearing is asynchronous.
>
> Hmm, this will be the GTT overhead which we can't avoid since we are
> banned from accessing stolen directly (on byt).
>
> Honestly this is the ugliest patch in the series. If we can do without
> it it would make accepting it much easier. And then you have a nice
> performance argument for doing via the blitter. Though I would be
> surprised if the userspace cache was doing such a bad job that frequent
> reallocations from stolen were required.
>
> > > That will run at memory speeds, only stall for a small fraction of the
> > > second, and later if the workloads demand it, we can do GPU clearing.
> > Also how do you suggest to bring the workload in as a deciding factor?
> > may be checking the current running frequency or based on the number of
> > pending requests?
> > Or are you suggesting to use CPU GTT clearing completely?
> > >
> > > It's much simpler, and I would say for any real workload the stolen
> > > objects will be cached to avoid reallocations.
>
> Yes. A quick patch to do an unconditional memset() of a pinned GTT
> stolen object should be about 20 lines. For the benefit of getting
> create2 upsteam with little fuss, I think that is worth it.
So this ugly patch itself will go away :( and I will add a new function
in i915_gem_stolen.c to do CPU (GTT) based clearing of the object in
stolen.
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-22 13:51 [PATCH v5 0/4] Support for creating/using Stolen memory " ankitprasad.r.sharma
@ 2015-07-22 13:51 ` ankitprasad.r.sharma
2015-07-22 14:39 ` Chris Wilson
2015-07-22 15:46 ` Tvrtko Ursulin
0 siblings, 2 replies; 36+ messages in thread
From: ankitprasad.r.sharma @ 2015-07-22 13:51 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel, shashidhar.hiremath
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch adds support for extending the pread/pwrite functionality
for objects not backed by shmem. The access will be made through
gtt interface.
This will cover prime objects as well as stolen memory backed objects
but for userptr objects it is still forbidden.
v2: Drop locks around slow_user_access, prefault the pages before
access (Chris)
v3: Rebased to the latest drm-intel-nightly (Ankit)
v4: Moved page base & offset calculations outside the copy loop,
corrected data types for size and offset variables, corrected if-else
braces format (Tvrtko/kerneldocs)
Testcase: igt/gem_stolen
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 138 +++++++++++++++++++++++++++++++++++-----
1 file changed, 121 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9e7e182..4321178 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -631,6 +631,102 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
return ret ? - EFAULT : 0;
}
+static inline uint64_t
+slow_user_access(struct io_mapping *mapping,
+ uint64_t page_base, int page_offset,
+ char __user *user_data,
+ int length, bool pwrite)
+{
+ void __iomem *vaddr_inatomic;
+ void *vaddr;
+ uint64_t unwritten;
+
+ vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
+ /* We can use the cpu mem copy function because this is X86. */
+ vaddr = (void __force *)vaddr_inatomic + page_offset;
+ if (pwrite)
+ unwritten = __copy_from_user(vaddr, user_data, length);
+ else
+ unwritten = __copy_to_user(user_data, vaddr, length);
+
+ io_mapping_unmap(vaddr_inatomic);
+ return unwritten;
+}
+
+static int
+i915_gem_gtt_pread_pwrite(struct drm_device *dev,
+ struct drm_i915_gem_object *obj, uint64_t size,
+ uint64_t data_offset, uint64_t data_ptr, bool pwrite)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ char __user *user_data;
+ uint64_t remain;
+ uint64_t offset, page_base;
+ int page_offset, page_length, ret = 0;
+
+ ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+ if (ret)
+ goto out;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, pwrite);
+ if (ret)
+ goto out_unpin;
+
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ goto out_unpin;
+
+ user_data = to_user_ptr(data_ptr);
+ remain = size;
+ offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
+
+ if (pwrite)
+ intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+
+ mutex_unlock(&dev->struct_mutex);
+ if (!pwrite && likely(!i915.prefault_disable))
+ ret = fault_in_multipages_writeable(user_data, remain);
+
+ /*
+ * page_offset = offset within page
+ * page_base = page offset within aperture
+ */
+ page_offset = offset_in_page(offset);
+ page_base = offset & PAGE_MASK;
+
+ while (remain > 0) {
+ /* page_length = bytes to copy for this page */
+ page_length = remain;
+ if ((page_offset + remain) > PAGE_SIZE)
+ page_length = PAGE_SIZE - page_offset;
+
+ /* This is a slow read/write as it tries to read from
+ * and write to user memory which may result into page
+ * faults
+ */
+ ret = slow_user_access(dev_priv->gtt.mappable, page_base,
+ page_offset, user_data,
+ page_length, pwrite);
+
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+
+ remain -= page_length;
+ user_data += page_length;
+ page_base += page_length;
+ page_offset = 0;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+
+out_unpin:
+ i915_gem_object_ggtt_unpin(obj);
+out:
+ return ret;
+}
+
static int
i915_gem_shmem_pread(struct drm_device *dev,
struct drm_i915_gem_object *obj,
@@ -754,17 +850,20 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pread(obj, args->offset, args->size);
- ret = i915_gem_shmem_pread(dev, obj, args, file);
+ /* pread for non shmem backed objects */
+ if (!obj->base.filp) {
+ if (obj->tiling_mode == I915_TILING_NONE)
+ ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+ args->offset,
+ args->data_ptr,
+ false);
+ else
+ ret = -EINVAL;
+ } else {
+ ret = i915_gem_shmem_pread(dev, obj, args, file);
+ }
out:
drm_gem_object_unreference(&obj->base);
@@ -1105,17 +1204,22 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
ret = -EFAULT;
+
+ /* pwrite for non shmem backed objects */
+ if (!obj->base.filp) {
+ if (obj->tiling_mode == I915_TILING_NONE)
+ ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+ args->offset,
+ args->data_ptr,
+ true);
+ else
+ ret = -EINVAL;
+
+ goto out;
+ }
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get
* different detiling behavior between reading and writing.
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-22 13:51 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem " ankitprasad.r.sharma
@ 2015-07-22 14:39 ` Chris Wilson
2015-07-31 13:16 ` Goel, Akash
2015-07-22 15:46 ` Tvrtko Ursulin
1 sibling, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2015-07-22 14:39 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel, shashidhar.hiremath
On Wed, Jul 22, 2015 at 07:21:49PM +0530, ankitprasad.r.sharma@intel.com wrote:
> static int
> i915_gem_shmem_pread(struct drm_device *dev,
> struct drm_i915_gem_object *obj,
> @@ -754,17 +850,20 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
> goto out;
> }
>
> - /* prime objects have no backing filp to GEM pread/pwrite
> - * pages from.
> - */
> - if (!obj->base.filp) {
> - ret = -EINVAL;
> - goto out;
> - }
> -
> trace_i915_gem_object_pread(obj, args->offset, args->size);
>
> - ret = i915_gem_shmem_pread(dev, obj, args, file);
> + /* pread for non shmem backed objects */
> + if (!obj->base.filp) {
> + if (obj->tiling_mode == I915_TILING_NONE)
pread/pwrite is defined as a cpu linear, the restriction upon tiling is
a simplification of handling swizzling.
> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
> + args->offset,
> + args->data_ptr,
> + false);
> + else
> + ret = -EINVAL;
> + } else {
> + ret = i915_gem_shmem_pread(dev, obj, args, file);
> + }
>
> out:
> drm_gem_object_unreference(&obj->base);
> @@ -1105,17 +1204,22 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> goto out;
> }
>
> - /* prime objects have no backing filp to GEM pread/pwrite
> - * pages from.
> - */
> - if (!obj->base.filp) {
> - ret = -EINVAL;
> - goto out;
> - }
> -
> trace_i915_gem_object_pwrite(obj, args->offset, args->size);
>
> ret = -EFAULT;
> +
> + /* pwrite for non shmem backed objects */
> + if (!obj->base.filp) {
> + if (obj->tiling_mode == I915_TILING_NONE)
> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
> + args->offset,
> + args->data_ptr,
> + true);
> + else
> + ret = -EINVAL;
> +
> + goto out;
The fast pwrite code always works for obj->base.filp == NULL. To easily
make it generic and handle the cases where we cannot fallback to shem,
undo the PIN_NONBLOCK.
Then you just want something like
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d3016f37cd4d..f2284a27dd6d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1114,8 +1114,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
* perspective, requiring manual detiling by the client.
*/
if (obj->tiling_mode == I915_TILING_NONE &&
- obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
- cpu_write_needs_clflush(obj)) {
+ (obj->base.filp == NULL ||
+ (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
+ cpu_write_needs_clflush(obj)))) {
ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
/* Note that the gtt paths might fail with non-page-backed user
* pointers (e.g. gtt mappings when moving data between
@@ -1125,7 +1126,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
if (ret == -EFAULT || ret == -ENOSPC) {
if (obj->phys_handle)
ret = i915_gem_phys_pwrite(obj, args, file);
- else
+ else if (obj->filp)
ret = i915_gem_shmem_pwrite(dev, obj, args, file);
}
to enable pwrite access to stolen.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-22 13:51 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem " ankitprasad.r.sharma
2015-07-22 14:39 ` Chris Wilson
@ 2015-07-22 15:46 ` Tvrtko Ursulin
2015-07-22 16:05 ` Daniel Vetter
1 sibling, 1 reply; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-22 15:46 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel, shashidhar.hiremath
Hi,
On 07/22/2015 02:51 PM, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> This patch adds support for extending the pread/pwrite functionality
> for objects not backed by shmem. The access will be made through
> gtt interface.
> This will cover prime objects as well as stolen memory backed objects
> but for userptr objects it is still forbidden.
>
> v2: Drop locks around slow_user_access, prefault the pages before
> access (Chris)
>
> v3: Rebased to the latest drm-intel-nightly (Ankit)
>
> v4: Moved page base & offset calculations outside the copy loop,
> corrected data types for size and offset variables, corrected if-else
> braces format (Tvrtko/kerneldocs)
>
> Testcase: igt/gem_stolen
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 138 +++++++++++++++++++++++++++++++++++-----
> 1 file changed, 121 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 9e7e182..4321178 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -631,6 +631,102 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
> return ret ? - EFAULT : 0;
> }
>
> +static inline uint64_t
> +slow_user_access(struct io_mapping *mapping,
> + uint64_t page_base, int page_offset,
> + char __user *user_data,
> + int length, bool pwrite)
> +{
> + void __iomem *vaddr_inatomic;
> + void *vaddr;
> + uint64_t unwritten;
> +
> + vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
> + /* We can use the cpu mem copy function because this is X86. */
> + vaddr = (void __force *)vaddr_inatomic + page_offset;
> + if (pwrite)
> + unwritten = __copy_from_user(vaddr, user_data, length);
> + else
> + unwritten = __copy_to_user(user_data, vaddr, length);
> +
> + io_mapping_unmap(vaddr_inatomic);
> + return unwritten;
> +}
> +
> +static int
> +i915_gem_gtt_pread_pwrite(struct drm_device *dev,
> + struct drm_i915_gem_object *obj, uint64_t size,
> + uint64_t data_offset, uint64_t data_ptr, bool pwrite)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + char __user *user_data;
> + uint64_t remain;
> + uint64_t offset, page_base;
> + int page_offset, page_length, ret = 0;
> +
> + ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
> + if (ret)
> + goto out;
> +
> + ret = i915_gem_object_set_to_gtt_domain(obj, pwrite);
> + if (ret)
> + goto out_unpin;
> +
> + ret = i915_gem_object_put_fence(obj);
> + if (ret)
> + goto out_unpin;
> +
> + user_data = to_user_ptr(data_ptr);
> + remain = size;
> + offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
> +
> + if (pwrite)
> + intel_fb_obj_invalidate(obj, ORIGIN_GTT);
> +
> + mutex_unlock(&dev->struct_mutex);
> + if (!pwrite && likely(!i915.prefault_disable))
> + ret = fault_in_multipages_writeable(user_data, remain);
> +
> + /*
> + * page_offset = offset within page
> + * page_base = page offset within aperture
> + */
> + page_offset = offset_in_page(offset);
> + page_base = offset & PAGE_MASK;
> +
> + while (remain > 0) {
> + /* page_length = bytes to copy for this page */
> + page_length = remain;
> + if ((page_offset + remain) > PAGE_SIZE)
> + page_length = PAGE_SIZE - page_offset;
> +
> + /* This is a slow read/write as it tries to read from
> + * and write to user memory which may result into page
> + * faults
> + */
> + ret = slow_user_access(dev_priv->gtt.mappable, page_base,
> + page_offset, user_data,
> + page_length, pwrite);
> +
> + if (ret) {
> + ret = -EFAULT;
> + break;
> + }
> +
> + remain -= page_length;
> + user_data += page_length;
> + page_base += page_length;
> + page_offset = 0;
> + }
> +
> + mutex_lock(&dev->struct_mutex);
My objection here was that we are re-acquiring the mutex in
non-interruptible mode, while the caller had it interruptible. It am not
100% what the conclusion back then was?
But also, why it is even necessary to drop the mutex here?
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-22 15:46 ` Tvrtko Ursulin
@ 2015-07-22 16:05 ` Daniel Vetter
2015-07-22 16:17 ` Tvrtko Ursulin
0 siblings, 1 reply; 36+ messages in thread
From: Daniel Vetter @ 2015-07-22 16:05 UTC (permalink / raw)
To: Tvrtko Ursulin
Cc: ankitprasad.r.sharma, intel-gfx, akash.goel, shashidhar.hiremath
On Wed, Jul 22, 2015 at 04:46:00PM +0100, Tvrtko Ursulin wrote:
> But also, why it is even necessary to drop the mutex here?
Locking inversion with our own pagefault handler. Happens since at least
mesa can return gtt mmaps to gl clients, which can then in turn try to
upload that with pwrite somewhere else. Also userspace could be just plain
nasty and try to deadlock the kernel ;-)
Dropping the struct_mutex is SOP for the copy_from/to_user slowpath.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-22 16:05 ` Daniel Vetter
@ 2015-07-22 16:17 ` Tvrtko Ursulin
0 siblings, 0 replies; 36+ messages in thread
From: Tvrtko Ursulin @ 2015-07-22 16:17 UTC (permalink / raw)
To: Daniel Vetter
Cc: ankitprasad.r.sharma, intel-gfx, akash.goel, shashidhar.hiremath
On 07/22/2015 05:05 PM, Daniel Vetter wrote:
> On Wed, Jul 22, 2015 at 04:46:00PM +0100, Tvrtko Ursulin wrote:
>> But also, why it is even necessary to drop the mutex here?
>
> Locking inversion with our own pagefault handler. Happens since at least
> mesa can return gtt mmaps to gl clients, which can then in turn try to
> upload that with pwrite somewhere else. Also userspace could be just plain
> nasty and try to deadlock the kernel ;-)
Yeah blame userspace. ;D
> Dropping the struct_mutex is SOP for the copy_from/to_user slowpath.
I suspected something like that but did not feel like thinking too much.
:) When encountering unusual things like dropping and re-acquiring locks
I expect to see comments explaining it.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-22 14:39 ` Chris Wilson
@ 2015-07-31 13:16 ` Goel, Akash
2015-09-10 17:50 ` Ankitprasad Sharma
2015-09-15 9:58 ` Chris Wilson
0 siblings, 2 replies; 36+ messages in thread
From: Goel, Akash @ 2015-07-31 13:16 UTC (permalink / raw)
To: Chris Wilson, ankitprasad.r.sharma
Cc: intel-gfx@lists.freedesktop.org, shashidhar.hiremath
On 7/22/2015 8:09 PM, Chris Wilson wrote:
> On Wed, Jul 22, 2015 at 07:21:49PM +0530, ankitprasad.r.sharma@intel.com wrote:
>> static int
>> i915_gem_shmem_pread(struct drm_device *dev,
>> struct drm_i915_gem_object *obj,
>> @@ -754,17 +850,20 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
>> goto out;
>> }
>>
>> - /* prime objects have no backing filp to GEM pread/pwrite
>> - * pages from.
>> - */
>> - if (!obj->base.filp) {
>> - ret = -EINVAL;
>> - goto out;
>> - }
>> -
>> trace_i915_gem_object_pread(obj, args->offset, args->size);
>>
>> - ret = i915_gem_shmem_pread(dev, obj, args, file);
>> + /* pread for non shmem backed objects */
>> + if (!obj->base.filp) {
>> + if (obj->tiling_mode == I915_TILING_NONE)
>
> pread/pwrite is defined as a cpu linear, the restriction upon tiling is
> a simplification of handling swizzling.
>
>> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
>> + args->offset,
>> + args->data_ptr,
>> + false);
>> + else
>> + ret = -EINVAL;
>> + } else {
>> + ret = i915_gem_shmem_pread(dev, obj, args, file);
>> + }
>>
>> out:
>> drm_gem_object_unreference(&obj->base);
>> @@ -1105,17 +1204,22 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
>> goto out;
>> }
>>
>> - /* prime objects have no backing filp to GEM pread/pwrite
>> - * pages from.
>> - */
>> - if (!obj->base.filp) {
>> - ret = -EINVAL;
>> - goto out;
>> - }
>> -
>> trace_i915_gem_object_pwrite(obj, args->offset, args->size);
>>
>> ret = -EFAULT;
>> +
>> + /* pwrite for non shmem backed objects */
>> + if (!obj->base.filp) {
>> + if (obj->tiling_mode == I915_TILING_NONE)
>> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
>> + args->offset,
>> + args->data_ptr,
>> + true);
>> + else
>> + ret = -EINVAL;
>> +
>> + goto out;
>
> The fast pwrite code always works for obj->base.filp == NULL. To easily
> make it generic and handle the cases where we cannot fallback to shem,
> undo the PIN_NONBLOCK.
>
> Then you just want something like
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d3016f37cd4d..f2284a27dd6d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1114,8 +1114,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> * perspective, requiring manual detiling by the client.
> */
> if (obj->tiling_mode == I915_TILING_NONE &&
Since the suggestion is to reuse the gtt_pwrite_fast function only for
non-shmem backed objects, can we also relax the Tiling constraint here,
apart from removing the PIN_NONBLOCK flag. As anyways there is a
put_fence already being done in gtt_pwrite_fast function.
Also currently the gtt_pwrite_fast function is non-tolerant to faults,
incurred while accessing the User space buffer, so can that also be
relaxed (at least for the non-shmem backed objects) ??
Best regards
Akash
> - obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> - cpu_write_needs_clflush(obj)) {
> + (obj->base.filp == NULL ||
> + (obj->base.write_domain != I915_GEM_DOMAIN_CPU &&
> + cpu_write_needs_clflush(obj)))) {
> ret = i915_gem_gtt_pwrite_fast(dev_priv, obj, args, file);
> /* Note that the gtt paths might fail with non-page-backed user
> * pointers (e.g. gtt mappings when moving data between
> @@ -1125,7 +1126,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> if (ret == -EFAULT || ret == -ENOSPC) {
> if (obj->phys_handle)
> ret = i915_gem_phys_pwrite(obj, args, file);
> - else
> + else if (obj->filp)
> ret = i915_gem_shmem_pwrite(dev, obj, args, file);
> }
>
>
> to enable pwrite access to stolen.
> -Chris
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-31 13:16 ` Goel, Akash
@ 2015-09-10 17:50 ` Ankitprasad Sharma
2015-09-15 9:58 ` Chris Wilson
1 sibling, 0 replies; 36+ messages in thread
From: Ankitprasad Sharma @ 2015-09-10 17:50 UTC (permalink / raw)
To: Goel, Akash; +Cc: intel-gfx@lists.freedesktop.org, shashidhar.hiremath
On Fri, 2015-07-31 at 18:46 +0530, Goel, Akash wrote:
>
> On 7/22/2015 8:09 PM, Chris Wilson wrote:
> > On Wed, Jul 22, 2015 at 07:21:49PM +0530, ankitprasad.r.sharma@intel.com wrote:
> >> static int
> >> i915_gem_shmem_pread(struct drm_device *dev,
> >> struct drm_i915_gem_object *obj,
> >> @@ -754,17 +850,20 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
> >> goto out;
> >> }
> >>
> >> - /* prime objects have no backing filp to GEM pread/pwrite
> >> - * pages from.
> >> - */
> >> - if (!obj->base.filp) {
> >> - ret = -EINVAL;
> >> - goto out;
> >> - }
> >> -
> >> trace_i915_gem_object_pread(obj, args->offset, args->size);
> >>
> >> - ret = i915_gem_shmem_pread(dev, obj, args, file);
> >> + /* pread for non shmem backed objects */
> >> + if (!obj->base.filp) {
> >> + if (obj->tiling_mode == I915_TILING_NONE)
> >
> > pread/pwrite is defined as a cpu linear, the restriction upon tiling is
> > a simplification of handling swizzling.
> >
> >> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
> >> + args->offset,
> >> + args->data_ptr,
> >> + false);
> >> + else
> >> + ret = -EINVAL;
> >> + } else {
> >> + ret = i915_gem_shmem_pread(dev, obj, args, file);
> >> + }
> >>
> >> out:
> >> drm_gem_object_unreference(&obj->base);
> >> @@ -1105,17 +1204,22 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> >> goto out;
> >> }
> >>
> >> - /* prime objects have no backing filp to GEM pread/pwrite
> >> - * pages from.
> >> - */
> >> - if (!obj->base.filp) {
> >> - ret = -EINVAL;
> >> - goto out;
> >> - }
> >> -
> >> trace_i915_gem_object_pwrite(obj, args->offset, args->size);
> >>
> >> ret = -EFAULT;
> >> +
> >> + /* pwrite for non shmem backed objects */
> >> + if (!obj->base.filp) {
> >> + if (obj->tiling_mode == I915_TILING_NONE)
> >> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
> >> + args->offset,
> >> + args->data_ptr,
> >> + true);
> >> + else
> >> + ret = -EINVAL;
> >> +
> >> + goto out;
> >
> > The fast pwrite code always works for obj->base.filp == NULL. To easily
> > make it generic and handle the cases where we cannot fallback to shem,
> > undo the PIN_NONBLOCK.
> >
> > Then you just want something like
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index d3016f37cd4d..f2284a27dd6d 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -1114,8 +1114,9 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> > * perspective, requiring manual detiling by the client.
> > */
> > if (obj->tiling_mode == I915_TILING_NONE &&
>
> Since the suggestion is to reuse the gtt_pwrite_fast function only for
> non-shmem backed objects, can we also relax the Tiling constraint here,
> apart from removing the PIN_NONBLOCK flag. As anyways there is a
> put_fence already being done in gtt_pwrite_fast function.
>
> Also currently the gtt_pwrite_fast function is non-tolerant to faults,
> incurred while accessing the User space buffer, so can that also be
> relaxed (at least for the non-shmem backed objects) ??
Even if we reuse the gtt_pwrite_fast we will have to handle page-faults
for non-shmem backed objects, that will contradict the purpose of
gtt_pwrite_fast. The gtt_pread_pwrite will still be around for pread
purpose.
Also, I think it should be ok to relax the tiling constraint as well, as
we put_fence everytime we try to pread/pwrite from/to a non-shmem-backed
object here.
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-09-15 8:33 [PATCH v6 0/4] Support for creating/using Stolen memory " ankitprasad.r.sharma
@ 2015-09-15 8:33 ` ankitprasad.r.sharma
2015-09-15 9:54 ` Chris Wilson
0 siblings, 1 reply; 36+ messages in thread
From: ankitprasad.r.sharma @ 2015-09-15 8:33 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel, shashidhar.hiremath
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch adds support for extending the pread/pwrite functionality
for objects not backed by shmem. The access will be made through
gtt interface.
This will cover prime objects as well as stolen memory backed objects
but for userptr objects it is still forbidden.
v2: Drop locks around slow_user_access, prefault the pages before
access (Chris)
v3: Rebased to the latest drm-intel-nightly (Ankit)
v4: Moved page base & offset calculations outside the copy loop,
corrected data types for size and offset variables, corrected if-else
braces format (Tvrtko/kerneldocs)
v5: Enabled pread/pwrite for all non-shmem backed objects including
without tiling restrictions (Ankit)
Testcase: igt/gem_stolen
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_gem.c | 128 ++++++++++++++++++++++++++++++++++------
1 file changed, 111 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 85025b1..2df37ac 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -614,6 +614,102 @@ shmem_pread_slow(struct page *page, int shmem_page_offset, int page_length,
return ret ? - EFAULT : 0;
}
+static inline uint64_t
+slow_user_access(struct io_mapping *mapping,
+ uint64_t page_base, int page_offset,
+ char __user *user_data,
+ int length, bool pwrite)
+{
+ void __iomem *vaddr_inatomic;
+ void *vaddr;
+ uint64_t unwritten;
+
+ vaddr_inatomic = io_mapping_map_wc(mapping, page_base);
+ /* We can use the cpu mem copy function because this is X86. */
+ vaddr = (void __force *)vaddr_inatomic + page_offset;
+ if (pwrite)
+ unwritten = __copy_from_user(vaddr, user_data, length);
+ else
+ unwritten = __copy_to_user(user_data, vaddr, length);
+
+ io_mapping_unmap(vaddr_inatomic);
+ return unwritten;
+}
+
+static int
+i915_gem_gtt_pread_pwrite(struct drm_device *dev,
+ struct drm_i915_gem_object *obj, uint64_t size,
+ uint64_t data_offset, uint64_t data_ptr, bool pwrite)
+{
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ char __user *user_data;
+ uint64_t remain;
+ uint64_t offset, page_base;
+ int page_offset, page_length, ret = 0;
+
+ ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
+ if (ret)
+ goto out;
+
+ ret = i915_gem_object_set_to_gtt_domain(obj, pwrite);
+ if (ret)
+ goto out_unpin;
+
+ ret = i915_gem_object_put_fence(obj);
+ if (ret)
+ goto out_unpin;
+
+ user_data = to_user_ptr(data_ptr);
+ remain = size;
+ offset = i915_gem_obj_ggtt_offset(obj) + data_offset;
+
+ if (pwrite)
+ intel_fb_obj_invalidate(obj, ORIGIN_GTT);
+
+ mutex_unlock(&dev->struct_mutex);
+ if (!pwrite && likely(!i915.prefault_disable))
+ ret = fault_in_multipages_writeable(user_data, remain);
+
+ /*
+ * page_offset = offset within page
+ * page_base = page offset within aperture
+ */
+ page_offset = offset_in_page(offset);
+ page_base = offset & PAGE_MASK;
+
+ while (remain > 0) {
+ /* page_length = bytes to copy for this page */
+ page_length = remain;
+ if ((page_offset + remain) > PAGE_SIZE)
+ page_length = PAGE_SIZE - page_offset;
+
+ /* This is a slow read/write as it tries to read from
+ * and write to user memory which may result into page
+ * faults
+ */
+ ret = slow_user_access(dev_priv->gtt.mappable, page_base,
+ page_offset, user_data,
+ page_length, pwrite);
+
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+
+ remain -= page_length;
+ user_data += page_length;
+ page_base += page_length;
+ page_offset = 0;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+
+out_unpin:
+ i915_gem_object_ggtt_unpin(obj);
+out:
+ return ret;
+}
+
static int
i915_gem_shmem_pread(struct drm_device *dev,
struct drm_i915_gem_object *obj,
@@ -737,17 +833,15 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pread(obj, args->offset, args->size);
- ret = i915_gem_shmem_pread(dev, obj, args, file);
+ /* pread for non shmem backed objects */
+ if (!obj->base.filp)
+ ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+ args->offset, args->data_ptr,
+ false);
+ else
+ ret = i915_gem_shmem_pread(dev, obj, args, file);
out:
drm_gem_object_unreference(&obj->base);
@@ -1090,17 +1184,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
goto out;
}
- /* prime objects have no backing filp to GEM pread/pwrite
- * pages from.
- */
- if (!obj->base.filp) {
- ret = -EINVAL;
- goto out;
- }
-
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
ret = -EFAULT;
+
+ /* pwrite for non shmem backed objects */
+ if (!obj->base.filp) {
+ ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
+ args->offset, args->data_ptr,
+ true);
+ goto out;
+ }
/* We can only do the GTT pwrite on untiled buffers, as otherwise
* it would end up going through the fenced access, and we'll get
* different detiling behavior between reading and writing.
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-09-15 8:33 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem " ankitprasad.r.sharma
@ 2015-09-15 9:54 ` Chris Wilson
2015-09-15 10:35 ` Ankitprasad Sharma
0 siblings, 1 reply; 36+ messages in thread
From: Chris Wilson @ 2015-09-15 9:54 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel, shashidhar.hiremath
On Tue, Sep 15, 2015 at 02:03:27PM +0530, ankitprasad.r.sharma@intel.com wrote:
> @@ -1090,17 +1184,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> goto out;
> }
>
> - /* prime objects have no backing filp to GEM pread/pwrite
> - * pages from.
> - */
> - if (!obj->base.filp) {
> - ret = -EINVAL;
> - goto out;
> - }
> -
> trace_i915_gem_object_pwrite(obj, args->offset, args->size);
>
> ret = -EFAULT;
> +
> + /* pwrite for non shmem backed objects */
> + if (!obj->base.filp) {
> + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
> + args->offset, args->data_ptr,
> + true);
> + goto out;
> + }
There already exists a GTT write path, along with a more correct
description of its limitations.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-07-31 13:16 ` Goel, Akash
2015-09-10 17:50 ` Ankitprasad Sharma
@ 2015-09-15 9:58 ` Chris Wilson
1 sibling, 0 replies; 36+ messages in thread
From: Chris Wilson @ 2015-09-15 9:58 UTC (permalink / raw)
To: Goel, Akash
Cc: ankitprasad.r.sharma, intel-gfx@lists.freedesktop.org,
shashidhar.hiremath
On Fri, Jul 31, 2015 at 06:46:20PM +0530, Goel, Akash wrote:
> Since the suggestion is to reuse the gtt_pwrite_fast function only
> for non-shmem backed objects, can we also relax the Tiling
> constraint here, apart from removing the PIN_NONBLOCK flag. As
> anyways there is a put_fence already being done in gtt_pwrite_fast
> function.
The tiling restraint is due to a hardware limitation (the effect of
swizzling), you cannot simply drop it.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
* Re: [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects
2015-09-15 9:54 ` Chris Wilson
@ 2015-09-15 10:35 ` Ankitprasad Sharma
0 siblings, 0 replies; 36+ messages in thread
From: Ankitprasad Sharma @ 2015-09-15 10:35 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, akash.goel, shashidhar.hiremath
On Tue, 2015-09-15 at 10:54 +0100, Chris Wilson wrote:
> On Tue, Sep 15, 2015 at 02:03:27PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > @@ -1090,17 +1184,17 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
> > goto out;
> > }
> >
> > - /* prime objects have no backing filp to GEM pread/pwrite
> > - * pages from.
> > - */
> > - if (!obj->base.filp) {
> > - ret = -EINVAL;
> > - goto out;
> > - }
> > -
> > trace_i915_gem_object_pwrite(obj, args->offset, args->size);
> >
> > ret = -EFAULT;
> > +
> > + /* pwrite for non shmem backed objects */
> > + if (!obj->base.filp) {
> > + ret = i915_gem_gtt_pread_pwrite(dev, obj, args->size,
> > + args->offset, args->data_ptr,
> > + true);
> > + goto out;
> > + }
>
> There already exists a GTT write path, along with a more correct
> description of its limitations.
Then it would look something like this, making i915_gem_gtt_pwrite_fast
to handle pagefaults for non-shmem backed objects
@@ -831,10 +925,16 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
* retry in the slow path.
*/
- if (fast_user_write(dev_priv->gtt.mappable, page_base,
+ if (obj->base.filp &&
+ fast_user_write(dev_priv->gtt.mappable, page_base,
page_offset, user_data, page_length)) {
ret = -EFAULT;
goto out_flush;
+ } else if (slow_user_access(dev_priv->gtt.mappable,
+ page_base, page_offset,
+ user_data, page_length, true)) {
+ ret = -EFAULT;
+ goto out_flush;
}
Thanks
-Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 36+ messages in thread
end of thread, other threads:[~2015-09-15 10:52 UTC | newest]
Thread overview: 36+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-07-01 9:25 [PATCH v4 0/4] Support for creating/using Stolen memory backed objects ankitprasad.r.sharma
2015-07-01 9:25 ` [PATCH 1/4] drm/i915: Clearing buffer objects via blitter engine ankitprasad.r.sharma
2015-07-01 14:54 ` Tvrtko Ursulin
2015-07-01 16:30 ` Chris Wilson
2015-07-02 9:30 ` Tvrtko Ursulin
2015-07-02 9:50 ` Chris Wilson
2015-07-07 7:42 ` Ankitprasad Sharma
2015-07-07 8:46 ` Chris Wilson
2015-07-07 8:52 ` Ankitprasad Sharma
2015-07-01 9:25 ` [PATCH 2/4] drm/i915: Support for creating Stolen memory backed objects ankitprasad.r.sharma
2015-07-01 15:06 ` Tvrtko Ursulin
2015-07-01 16:19 ` Chris Wilson
2015-07-02 9:37 ` Tvrtko Ursulin
2015-07-02 9:43 ` Chris Wilson
2015-07-01 16:20 ` Tvrtko Ursulin
2015-07-01 9:25 ` [PATCH 3/4] drm/i915: Add support for stealing purgable stolen pages ankitprasad.r.sharma
2015-07-01 16:17 ` Tvrtko Ursulin
2015-07-01 9:25 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem backed objects ankitprasad.r.sharma
2015-07-01 9:54 ` Chris Wilson
2015-07-02 10:42 ` Tvrtko Ursulin
2015-07-02 11:00 ` Chris Wilson
2015-07-02 11:27 ` Tvrtko Ursulin
2015-07-02 11:58 ` Chris Wilson
2015-07-03 5:07 ` shuang.he
-- strict thread matches above, loose matches on Subject: below --
2015-09-15 8:33 [PATCH v6 0/4] Support for creating/using Stolen memory " ankitprasad.r.sharma
2015-09-15 8:33 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem " ankitprasad.r.sharma
2015-09-15 9:54 ` Chris Wilson
2015-09-15 10:35 ` Ankitprasad Sharma
2015-07-22 13:51 [PATCH v5 0/4] Support for creating/using Stolen memory " ankitprasad.r.sharma
2015-07-22 13:51 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem " ankitprasad.r.sharma
2015-07-22 14:39 ` Chris Wilson
2015-07-31 13:16 ` Goel, Akash
2015-09-10 17:50 ` Ankitprasad Sharma
2015-09-15 9:58 ` Chris Wilson
2015-07-22 15:46 ` Tvrtko Ursulin
2015-07-22 16:05 ` Daniel Vetter
2015-07-22 16:17 ` Tvrtko Ursulin
2015-05-06 10:15 [PATCH v3 0/4] Support for creating/using Stolen memory " ankitprasad.r.sharma
2015-05-06 10:16 ` [PATCH 4/4] drm/i915: Support for pread/pwrite from/to non shmem " ankitprasad.r.sharma
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).