* [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects
@ 2012-11-10 15:15 Chris Wilson
2012-11-10 15:15 ` [PATCH 2/4] drm/i915: Move the execbuffer objects list from the stack into the tracker Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 10+ messages in thread
From: Chris Wilson @ 2012-11-10 15:15 UTC (permalink / raw)
To: intel-gfx
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 86 +++++++++++++++-------------
1 file changed, 46 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index b33d525..c491ab1 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -73,6 +73,46 @@ eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
&eb->buckets[obj->exec_handle & eb->and]);
}
+static int
+eb_lookup_objects(struct eb_objects *eb,
+ struct drm_i915_gem_exec_object2 *exec,
+ int count,
+ struct drm_file *file,
+ struct list_head *objects)
+{
+ int i;
+
+ spin_lock(&file->table_lock);
+ for (i = 0; i < count; i++) {
+ struct drm_i915_gem_object *obj;
+
+ obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
+ if (obj == NULL) {
+ spin_unlock(&file->table_lock);
+ DRM_DEBUG("Invalid object handle %d at index %d\n",
+ exec[i].handle, i);
+ return -ENOENT;
+ }
+
+ if (!list_empty(&obj->exec_list)) {
+ spin_unlock(&file->table_lock);
+ DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
+ obj, exec[i].handle, i);
+ return -EINVAL;
+ }
+
+ drm_gem_object_reference(&obj->base);
+ list_add_tail(&obj->exec_list, objects);
+
+ obj->exec_handle = exec[i].handle;
+ obj->exec_entry = &exec[i];
+ eb_add_object(eb, obj);
+ }
+ spin_unlock(&file->table_lock);
+
+ return 0;
+}
+
static struct drm_i915_gem_object *
eb_get_object(struct eb_objects *eb, unsigned long handle)
{
@@ -583,21 +623,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
/* reacquire the objects */
eb_reset(eb);
- for (i = 0; i < count; i++) {
- obj = to_intel_bo(drm_gem_object_lookup(dev, file,
- exec[i].handle));
- if (&obj->base == NULL) {
- DRM_DEBUG("Invalid object handle %d at index %d\n",
- exec[i].handle, i);
- ret = -ENOENT;
- goto err;
- }
-
- list_add_tail(&obj->exec_list, objects);
- obj->exec_handle = exec[i].handle;
- obj->exec_entry = &exec[i];
- eb_add_object(eb, obj);
- }
+ ret = eb_lookup_objects(eb, exec, count, file, objects);
+ if (ret)
+ goto err;
ret = i915_gem_execbuffer_reserve(ring, file, objects);
if (ret)
@@ -949,31 +977,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* Look up object handles */
INIT_LIST_HEAD(&objects);
- for (i = 0; i < args->buffer_count; i++) {
- struct drm_i915_gem_object *obj;
-
- obj = to_intel_bo(drm_gem_object_lookup(dev, file,
- exec[i].handle));
- if (&obj->base == NULL) {
- DRM_DEBUG("Invalid object handle %d at index %d\n",
- exec[i].handle, i);
- /* prevent error path from reading uninitialized data */
- ret = -ENOENT;
- goto err;
- }
-
- if (!list_empty(&obj->exec_list)) {
- DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
- obj, exec[i].handle, i);
- ret = -EINVAL;
- goto err;
- }
-
- list_add_tail(&obj->exec_list, &objects);
- obj->exec_handle = exec[i].handle;
- obj->exec_entry = &exec[i];
- eb_add_object(eb, obj);
- }
+ ret = eb_lookup_objects(eb, exec, args->buffer_count, file, &objects);
+ if (ret)
+ goto err;
/* take note of the batch buffer before we might reorder the lists */
batch_obj = list_entry(objects.prev,
--
1.7.10.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 2/4] drm/i915: Move the execbuffer objects list from the stack into the tracker
2012-11-10 15:15 [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects Chris Wilson
@ 2012-11-10 15:15 ` Chris Wilson
2012-11-10 15:15 ` [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known Chris Wilson
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2012-11-10 15:15 UTC (permalink / raw)
To: intel-gfx
Instead of passing around the eb-objects hashtable and a separate object
list, we can include the object list into the eb-objects structure for
convenience.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 58 +++++++++++++---------------
1 file changed, 27 insertions(+), 31 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c491ab1..1738ebd 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -38,6 +38,7 @@
#define __EXEC_NEEDS_CHIPSET_FLUSH (1<<29)
struct eb_objects {
+ struct list_head objects;
int and;
struct hlist_head buckets[0];
};
@@ -57,6 +58,7 @@ eb_create(int size)
return eb;
eb->and = count - 1;
+ INIT_LIST_HEAD(&eb->objects);
return eb;
}
@@ -77,8 +79,7 @@ static int
eb_lookup_objects(struct eb_objects *eb,
struct drm_i915_gem_exec_object2 *exec,
int count,
- struct drm_file *file,
- struct list_head *objects)
+ struct drm_file *file)
{
int i;
@@ -102,7 +103,7 @@ eb_lookup_objects(struct eb_objects *eb,
}
drm_gem_object_reference(&obj->base);
- list_add_tail(&obj->exec_list, objects);
+ list_add_tail(&obj->exec_list, &eb->objects);
obj->exec_handle = exec[i].handle;
obj->exec_entry = &exec[i];
@@ -133,6 +134,15 @@ eb_get_object(struct eb_objects *eb, unsigned long handle)
static void
eb_destroy(struct eb_objects *eb)
{
+ while (!list_empty(&eb->objects)) {
+ struct drm_i915_gem_object *obj;
+
+ obj = list_first_entry(&eb->objects,
+ struct drm_i915_gem_object,
+ exec_list);
+ list_del_init(&obj->exec_list);
+ drm_gem_object_unreference(&obj->base);
+ }
kfree(eb);
}
@@ -377,8 +387,7 @@ i915_gem_execbuffer_relocate_object_slow(struct drm_i915_gem_object *obj,
static int
i915_gem_execbuffer_relocate(struct drm_device *dev,
- struct eb_objects *eb,
- struct list_head *objects)
+ struct eb_objects *eb)
{
struct drm_i915_gem_object *obj;
int ret = 0;
@@ -391,7 +400,7 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
* lockdep complains vehemently.
*/
pagefault_disable();
- list_for_each_entry(obj, objects, exec_list) {
+ list_for_each_entry(obj, &eb->objects, exec_list) {
ret = i915_gem_execbuffer_relocate_object(obj, eb);
if (ret)
break;
@@ -564,7 +573,6 @@ static int
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
struct drm_file *file,
struct intel_ring_buffer *ring,
- struct list_head *objects,
struct eb_objects *eb,
struct drm_i915_gem_exec_object2 *exec,
int count)
@@ -575,8 +583,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
int i, total, ret;
/* We may process another execbuffer during the unlock... */
- while (!list_empty(objects)) {
- obj = list_first_entry(objects,
+ while (!list_empty(&eb->objects)) {
+ obj = list_first_entry(&eb->objects,
struct drm_i915_gem_object,
exec_list);
list_del_init(&obj->exec_list);
@@ -623,15 +631,15 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
/* reacquire the objects */
eb_reset(eb);
- ret = eb_lookup_objects(eb, exec, count, file, objects);
+ ret = eb_lookup_objects(eb, exec, count, file);
if (ret)
goto err;
- ret = i915_gem_execbuffer_reserve(ring, file, objects);
+ ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
if (ret)
goto err;
- list_for_each_entry(obj, objects, exec_list) {
+ list_for_each_entry(obj, &eb->objects, exec_list) {
int offset = obj->exec_entry - exec;
ret = i915_gem_execbuffer_relocate_object_slow(obj, eb,
reloc + reloc_offset[offset]);
@@ -834,7 +842,6 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct drm_i915_gem_exec_object2 *exec)
{
drm_i915_private_t *dev_priv = dev->dev_private;
- struct list_head objects;
struct eb_objects *eb;
struct drm_i915_gem_object *batch_obj;
struct drm_clip_rect *cliprects = NULL;
@@ -976,28 +983,26 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
}
/* Look up object handles */
- INIT_LIST_HEAD(&objects);
- ret = eb_lookup_objects(eb, exec, args->buffer_count, file, &objects);
+ ret = eb_lookup_objects(eb, exec, args->buffer_count, file);
if (ret)
goto err;
/* take note of the batch buffer before we might reorder the lists */
- batch_obj = list_entry(objects.prev,
+ batch_obj = list_entry(eb->objects.prev,
struct drm_i915_gem_object,
exec_list);
/* Move the objects en-masse into the GTT, evicting if necessary. */
- ret = i915_gem_execbuffer_reserve(ring, file, &objects);
+ ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
if (ret)
goto err;
/* The objects are in their final locations, apply the relocations. */
- ret = i915_gem_execbuffer_relocate(dev, eb, &objects);
+ ret = i915_gem_execbuffer_relocate(dev, eb);
if (ret) {
if (ret == -EFAULT) {
ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
- &objects, eb,
- exec,
+ eb, exec,
args->buffer_count);
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
}
@@ -1020,7 +1025,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);
- ret = i915_gem_execbuffer_move_to_gpu(ring, &objects);
+ ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->objects);
if (ret)
goto err;
@@ -1090,20 +1095,11 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto err;
}
- i915_gem_execbuffer_move_to_active(&objects, ring, seqno);
+ i915_gem_execbuffer_move_to_active(&eb->objects, ring, seqno);
i915_gem_execbuffer_retire_commands(dev, file, ring);
err:
eb_destroy(eb);
- while (!list_empty(&objects)) {
- struct drm_i915_gem_object *obj;
-
- obj = list_first_entry(&objects,
- struct drm_i915_gem_object,
- exec_list);
- list_del_init(&obj->exec_list);
- drm_gem_object_unreference(&obj->base);
- }
mutex_unlock(&dev->struct_mutex);
--
1.7.10.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known
2012-11-10 15:15 [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects Chris Wilson
2012-11-10 15:15 ` [PATCH 2/4] drm/i915: Move the execbuffer objects list from the stack into the tracker Chris Wilson
@ 2012-11-10 15:15 ` Chris Wilson
2012-11-10 17:16 ` Daniel Vetter
2012-11-10 15:15 ` [PATCH 4/4] drm/i915: Use the reloc.handle as an index into the execbuffer array Chris Wilson
2012-11-10 20:24 ` [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects Eric Anholt
3 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2012-11-10 15:15 UTC (permalink / raw)
To: intel-gfx
Userspace is able to hint to the kernel that its command stream and
auxiliary state buffers already hold the correct presumed addresses and
so the relocation process may be skipped if the kernel does not need to
move any buffers in preparation for the execbuffer. Thus for the common
case where the allotment of buffers is static between batches, we can
avoid the overhead of individually checking the relocation entries.
Note that this requires userspace to supply the domain tracking and
requests for workarounds itself that would otherwise be computed based
upon the relocation entries.
Using copywinwin10 as an example that is dependent upon emitting a lot
of relocations (2 per operation), we see improvements of:
c2d/gm45: 618000.0/sec to 632000.0/sec.
i3-330m: 748000.0/sec to 830000.0/sec.
(measured relative to a baseline with neither optimisations applied).
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_dma.c | 3 ++
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 50 ++++++++++++++++++----------
include/uapi/drm/i915_drm.h | 11 ++++++
3 files changed, 46 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 4beb7bc..a0c4b4f 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1017,6 +1017,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_SECURE_BATCHES:
value = capable(CAP_SYS_ADMIN);
break;
+ case I915_PARAM_HAS_EXEC_NO_RELOC:
+ value = 1;
+ break;
default:
DRM_DEBUG_DRIVER("Unknown parameter %d\n",
param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 1738ebd..30beea6 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -412,7 +412,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
static int
i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
- struct intel_ring_buffer *ring)
+ struct intel_ring_buffer *ring,
+ bool *need_reloc)
{
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
@@ -452,7 +453,18 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
obj->has_aliasing_ppgtt_mapping = 1;
}
- entry->offset = obj->gtt_offset;
+ if (entry->offset != obj->gtt_offset) {
+ entry->offset = obj->gtt_offset;
+ *need_reloc = true;
+ }
+
+ obj->base.pending_read_domains = entry->rsvd1 & 0xffffffff;
+ obj->base.pending_write_domain = (entry->rsvd1 >> 32) & 0xffffffff;
+
+ if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
+ !obj->has_global_gtt_mapping)
+ i915_gem_gtt_bind_object(obj, obj->cache_level);
+
return 0;
}
@@ -478,7 +490,8 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct drm_file *file,
- struct list_head *objects)
+ struct list_head *objects,
+ bool *need_relocs)
{
struct drm_i915_gem_object *obj;
struct list_head ordered_objects;
@@ -502,8 +515,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
else
list_move_tail(&obj->exec_list, &ordered_objects);
- obj->base.pending_read_domains = 0;
- obj->base.pending_write_domain = 0;
obj->pending_fenced_gpu_access = false;
}
list_splice(&ordered_objects, objects);
@@ -541,7 +552,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
(need_fence && !obj->map_and_fenceable))
ret = i915_gem_object_unbind(obj);
else
- ret = i915_gem_execbuffer_reserve_object(obj, ring);
+ ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
if (ret)
goto err;
}
@@ -551,7 +562,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
if (obj->gtt_space)
continue;
- ret = i915_gem_execbuffer_reserve_object(obj, ring);
+ ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
if (ret)
goto err;
}
@@ -571,16 +582,18 @@ err: /* Decrement pin count for bound objects */
static int
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
+ struct drm_i915_gem_execbuffer2 *args,
struct drm_file *file,
struct intel_ring_buffer *ring,
struct eb_objects *eb,
- struct drm_i915_gem_exec_object2 *exec,
- int count)
+ struct drm_i915_gem_exec_object2 *exec)
{
struct drm_i915_gem_relocation_entry *reloc;
struct drm_i915_gem_object *obj;
+ bool need_relocs;
int *reloc_offset;
int i, total, ret;
+ int count = args->buffer_count;
/* We may process another execbuffer during the unlock... */
while (!list_empty(&eb->objects)) {
@@ -635,7 +648,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
if (ret)
goto err;
- ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
+ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
+ ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
if (ret)
goto err;
@@ -848,10 +862,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
struct intel_ring_buffer *ring;
u32 ctx_id = i915_execbuffer2_get_context_id(*args);
u32 exec_start, exec_len;
- u32 seqno;
- u32 mask;
- u32 flags;
+ u32 seqno, mask, flags;
int ret, mode, i;
+ bool need_relocs;
if (!i915_gem_check_execbuffer(args)) {
DRM_DEBUG("execbuf with invalid offset/length\n");
@@ -993,17 +1006,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
exec_list);
/* Move the objects en-masse into the GTT, evicting if necessary. */
- ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
+ need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
+ ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
if (ret)
goto err;
/* The objects are in their final locations, apply the relocations. */
- ret = i915_gem_execbuffer_relocate(dev, eb);
+ if (need_relocs)
+ ret = i915_gem_execbuffer_relocate(dev, eb);
if (ret) {
if (ret == -EFAULT) {
- ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
- eb, exec,
- args->buffer_count);
+ ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
+ eb, exec);
BUG_ON(!mutex_is_locked(&dev->struct_mutex));
}
if (ret)
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 067b98e..7657d3e 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21
#define I915_PARAM_RSVD_FOR_FUTURE_USE 22
#define I915_PARAM_HAS_SECURE_BATCHES 23
+#define I915_PARAM_HAS_EXEC_NO_RELOC 24
typedef struct drm_i915_getparam {
int param;
@@ -629,7 +630,10 @@ struct drm_i915_gem_exec_object2 {
__u64 offset;
#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
+#define EXEC_OBJECT_NEEDS_GTT (1<<1)
__u64 flags;
+
+ /** Low 32-bits for read-domains, high 32-bits for write-domains. */
__u64 rsvd1;
__u64 rsvd2;
};
@@ -679,6 +683,13 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_SECURE (1<<9)
+/** Provide a hint to the kernel that the command stream and auxilliary
+ * state buffers already holds the correct presumed addresses and so the
+ * relocation process may be skipped if no buffers need to be moved in
+ * preparation for the execbuffer.
+ */
+#define I915_EXEC_NO_RELOC (1<<10)
+
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
--
1.7.10.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* [PATCH 4/4] drm/i915: Use the reloc.handle as an index into the execbuffer array
2012-11-10 15:15 [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects Chris Wilson
2012-11-10 15:15 ` [PATCH 2/4] drm/i915: Move the execbuffer objects list from the stack into the tracker Chris Wilson
2012-11-10 15:15 ` [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known Chris Wilson
@ 2012-11-10 15:15 ` Chris Wilson
2012-11-10 20:24 ` [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects Eric Anholt
3 siblings, 0 replies; 10+ messages in thread
From: Chris Wilson @ 2012-11-10 15:15 UTC (permalink / raw)
To: intel-gfx
Using copywinwin10 as an example that is dependent upon emitting a lot
of relocations (2 per operation), we see improvements of:
c2d/gm45: 618000.0/sec to 623000.0/sec.
i3-330m: 748000.0/sec to 789000.0/sec.
(measured relative to a baseline with neither optimisations applied).
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_dma.c | 3 +
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 90 +++++++++++++++++-----------
include/uapi/drm/i915_drm.h | 6 ++
3 files changed, 63 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index a0c4b4f..a35217d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -1020,6 +1020,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_EXEC_NO_RELOC:
value = 1;
break;
+ case I915_PARAM_HAS_EXEC_HANDLE_LUT:
+ value = 1;
+ break;
default:
DRM_DEBUG_DRIVER("Unknown parameter %d\n",
param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 30beea6..9ccd860 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -39,25 +39,40 @@
struct eb_objects {
struct list_head objects;
- int and;
- struct hlist_head buckets[0];
+ unsigned int and;
+ union {
+ struct drm_i915_gem_object *lut[0];
+ struct hlist_head buckets[0];
+ };
};
static struct eb_objects *
-eb_create(int size)
+eb_create(struct drm_i915_gem_execbuffer2 *args)
{
- struct eb_objects *eb;
- int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
- BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
- while (count > size)
- count >>= 1;
- eb = kzalloc(count*sizeof(struct hlist_head) +
- sizeof(struct eb_objects),
- GFP_KERNEL);
- if (eb == NULL)
- return eb;
-
- eb->and = count - 1;
+ struct eb_objects *eb = NULL;
+
+ if (args->flags & I915_EXEC_HANDLE_LUT) {
+ int size = args->buffer_count;
+ size *= sizeof(struct drm_i915_gem_object);
+ size += sizeof(struct eb_objects);
+ eb = kzalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
+ }
+
+ if (eb == NULL) {
+ int size = args->buffer_count;
+ int count = (PAGE_SIZE - sizeof(struct eb_objects)) / sizeof(struct hlist_head);
+ BUILD_BUG_ON(!is_power_of_2(PAGE_SIZE / sizeof(struct hlist_head)));
+ while (count > 2*size)
+ count >>= 1;
+ eb = kzalloc(count*sizeof(struct hlist_head) +
+ sizeof(struct eb_objects),
+ GFP_TEMPORARY);
+ if (eb == NULL)
+ return eb;
+
+ eb->and = count - 1;
+ }
+
INIT_LIST_HEAD(&eb->objects);
return eb;
}
@@ -65,14 +80,8 @@ eb_create(int size)
static void
eb_reset(struct eb_objects *eb)
{
- memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
-}
-
-static void
-eb_add_object(struct eb_objects *eb, struct drm_i915_gem_object *obj)
-{
- hlist_add_head(&obj->exec_node,
- &eb->buckets[obj->exec_handle & eb->and]);
+ if (eb->and)
+ memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
}
static int
@@ -105,9 +114,14 @@ eb_lookup_objects(struct eb_objects *eb,
drm_gem_object_reference(&obj->base);
list_add_tail(&obj->exec_list, &eb->objects);
- obj->exec_handle = exec[i].handle;
obj->exec_entry = &exec[i];
- eb_add_object(eb, obj);
+ if (eb->and == 0) {
+ eb->lut[i] = obj;
+ } else {
+ obj->exec_handle = exec[i].handle;
+ hlist_add_head(&obj->exec_node,
+ &eb->buckets[exec[i].handle & eb->and]);
+ }
}
spin_unlock(&file->table_lock);
@@ -117,18 +131,22 @@ eb_lookup_objects(struct eb_objects *eb,
static struct drm_i915_gem_object *
eb_get_object(struct eb_objects *eb, unsigned long handle)
{
- struct hlist_head *head;
- struct hlist_node *node;
- struct drm_i915_gem_object *obj;
+ if (eb->and == 0) {
+ return eb->lut[handle];
+ } else {
+ struct hlist_head *head;
+ struct hlist_node *node;
- head = &eb->buckets[handle & eb->and];
- hlist_for_each(node, head) {
- obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
- if (obj->exec_handle == handle)
- return obj;
- }
+ head = &eb->buckets[handle & eb->and];
+ hlist_for_each(node, head) {
+ struct drm_i915_gem_object *obj;
- return NULL;
+ obj = hlist_entry(node, struct drm_i915_gem_object, exec_node);
+ if (obj->exec_handle == handle)
+ return obj;
+ }
+ return NULL;
+ }
}
static void
@@ -988,7 +1006,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
goto pre_mutex_err;
}
- eb = eb_create(args->buffer_count);
+ eb = eb_create(args);
if (eb == NULL) {
mutex_unlock(&dev->struct_mutex);
ret = -ENOMEM;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7657d3e..82c1088 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -310,6 +310,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_RSVD_FOR_FUTURE_USE 22
#define I915_PARAM_HAS_SECURE_BATCHES 23
#define I915_PARAM_HAS_EXEC_NO_RELOC 24
+#define I915_PARAM_HAS_EXEC_HANDLE_LUT 25
typedef struct drm_i915_getparam {
int param;
@@ -690,6 +691,11 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_NO_RELOC (1<<10)
+/** Use the reloc.handle as an index into the exec object array rather
+ * than as the per-file handle.
+ */
+#define I915_EXEC_HANDLE_LUT (1<<11)
+
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
(eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
--
1.7.10.4
^ permalink raw reply related [flat|nested] 10+ messages in thread
* Re: [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known
2012-11-10 15:15 ` [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known Chris Wilson
@ 2012-11-10 17:16 ` Daniel Vetter
2012-11-10 18:08 ` Chris Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Daniel Vetter @ 2012-11-10 17:16 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Sat, Nov 10, 2012 at 03:15:46PM +0000, Chris Wilson wrote:
> Userspace is able to hint to the kernel that its command stream and
> auxiliary state buffers already hold the correct presumed addresses and
> so the relocation process may be skipped if the kernel does not need to
> move any buffers in preparation for the execbuffer. Thus for the common
> case where the allotment of buffers is static between batches, we can
> avoid the overhead of individually checking the relocation entries.
>
> Note that this requires userspace to supply the domain tracking and
> requests for workarounds itself that would otherwise be computed based
> upon the relocation entries.
>
> Using copywinwin10 as an example that is dependent upon emitting a lot
> of relocations (2 per operation), we see improvements of:
>
> c2d/gm45: 618000.0/sec to 632000.0/sec.
> i3-330m: 748000.0/sec to 830000.0/sec.
>
> (measured relative to a baseline with neither optimisations applied).
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Nice stuff! A few requests from you bastard maintainer though for this
patch series:
- An i-g-t testcase for the reloc.handle checking, both for the
out-of-bounds case for the LUT and the "no such object" for the old
case. At least I couldn't find anything like that on a quick check.
- Also an i-g-t test to check whether the gen6 "global gtt mapping for
pipe control writes" w/a works would be great, since we've fumbled that.
Again for both the old and new ways of doing things.
- I think we should start checking for not yet defined flag values
everywhere in execbuf and return -EINVAL - both to make the ioctl more
robust against broken userspace (we've already had such an OOPS-causing
issue with cpu domains in relocations). And to ensure that we can keep
on extending things. Obviously needs to come with an i-g-t check. I know
that every extension will then break old i-g-t versions of that tests,
but imo the more rigid abi checking is worth that tradeoff.
The above i-g-t stuff could be a nice exercise for reviewers ... </hint>
On the patch itself:
- Some small comment on EXEC_OBJECT_NEEDS_GTT explaing that we need this
for a gen6 w/a and that it's valid only with the NO_RELOC mode.
- Can we abolish the pending_read/write_domains and just go with a
per-object GPU_WRITE flag? Afaik that's all we need with the
flushing_list gone. To avoid a massive rewrite of the code I'm thinking
of just keeping around a pending_gpu_write bool (since reads are
implicit) and then using that to fill out generic gpu domains in
i915_gem_execbuffer_move_to_active. E.g. set all gpu read domains if
there is no write, otherwise just set the render domain in both.
While reading through the code I've also noticed that we could replace the
obj->write_domain check in there (for updating the write_seqno) with a
pending_gpu_write check - the former was required due to the delayed
flushing caused by the flushing_list.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_dma.c | 3 ++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 50 ++++++++++++++++++----------
> include/uapi/drm/i915_drm.h | 11 ++++++
> 3 files changed, 46 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 4beb7bc..a0c4b4f 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -1017,6 +1017,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_SECURE_BATCHES:
> value = capable(CAP_SYS_ADMIN);
> break;
> + case I915_PARAM_HAS_EXEC_NO_RELOC:
> + value = 1;
> + break;
> default:
> DRM_DEBUG_DRIVER("Unknown parameter %d\n",
> param->param);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 1738ebd..30beea6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -412,7 +412,8 @@ i915_gem_execbuffer_relocate(struct drm_device *dev,
>
> static int
> i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
> - struct intel_ring_buffer *ring)
> + struct intel_ring_buffer *ring,
> + bool *need_reloc)
> {
> struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> struct drm_i915_gem_exec_object2 *entry = obj->exec_entry;
> @@ -452,7 +453,18 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj,
> obj->has_aliasing_ppgtt_mapping = 1;
> }
>
> - entry->offset = obj->gtt_offset;
> + if (entry->offset != obj->gtt_offset) {
> + entry->offset = obj->gtt_offset;
> + *need_reloc = true;
> + }
> +
> + obj->base.pending_read_domains = entry->rsvd1 & 0xffffffff;
> + obj->base.pending_write_domain = (entry->rsvd1 >> 32) & 0xffffffff;
> +
> + if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
> + !obj->has_global_gtt_mapping)
> + i915_gem_gtt_bind_object(obj, obj->cache_level);
> +
> return 0;
> }
>
> @@ -478,7 +490,8 @@ i915_gem_execbuffer_unreserve_object(struct drm_i915_gem_object *obj)
> static int
> i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
> struct drm_file *file,
> - struct list_head *objects)
> + struct list_head *objects,
> + bool *need_relocs)
> {
> struct drm_i915_gem_object *obj;
> struct list_head ordered_objects;
> @@ -502,8 +515,6 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
> else
> list_move_tail(&obj->exec_list, &ordered_objects);
>
> - obj->base.pending_read_domains = 0;
> - obj->base.pending_write_domain = 0;
> obj->pending_fenced_gpu_access = false;
> }
> list_splice(&ordered_objects, objects);
> @@ -541,7 +552,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
> (need_fence && !obj->map_and_fenceable))
> ret = i915_gem_object_unbind(obj);
> else
> - ret = i915_gem_execbuffer_reserve_object(obj, ring);
> + ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
> if (ret)
> goto err;
> }
> @@ -551,7 +562,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
> if (obj->gtt_space)
> continue;
>
> - ret = i915_gem_execbuffer_reserve_object(obj, ring);
> + ret = i915_gem_execbuffer_reserve_object(obj, ring, need_relocs);
> if (ret)
> goto err;
> }
> @@ -571,16 +582,18 @@ err: /* Decrement pin count for bound objects */
>
> static int
> i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
> + struct drm_i915_gem_execbuffer2 *args,
> struct drm_file *file,
> struct intel_ring_buffer *ring,
> struct eb_objects *eb,
> - struct drm_i915_gem_exec_object2 *exec,
> - int count)
> + struct drm_i915_gem_exec_object2 *exec)
> {
> struct drm_i915_gem_relocation_entry *reloc;
> struct drm_i915_gem_object *obj;
> + bool need_relocs;
> int *reloc_offset;
> int i, total, ret;
> + int count = args->buffer_count;
>
> /* We may process another execbuffer during the unlock... */
> while (!list_empty(&eb->objects)) {
> @@ -635,7 +648,8 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
> if (ret)
> goto err;
>
> - ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
> + need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
> + ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
> if (ret)
> goto err;
>
> @@ -848,10 +862,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> struct intel_ring_buffer *ring;
> u32 ctx_id = i915_execbuffer2_get_context_id(*args);
> u32 exec_start, exec_len;
> - u32 seqno;
> - u32 mask;
> - u32 flags;
> + u32 seqno, mask, flags;
> int ret, mode, i;
> + bool need_relocs;
>
> if (!i915_gem_check_execbuffer(args)) {
> DRM_DEBUG("execbuf with invalid offset/length\n");
> @@ -993,17 +1006,18 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> exec_list);
>
> /* Move the objects en-masse into the GTT, evicting if necessary. */
> - ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects);
> + need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
> + ret = i915_gem_execbuffer_reserve(ring, file, &eb->objects, &need_relocs);
> if (ret)
> goto err;
>
> /* The objects are in their final locations, apply the relocations. */
> - ret = i915_gem_execbuffer_relocate(dev, eb);
> + if (need_relocs)
> + ret = i915_gem_execbuffer_relocate(dev, eb);
> if (ret) {
> if (ret == -EFAULT) {
> - ret = i915_gem_execbuffer_relocate_slow(dev, file, ring,
> - eb, exec,
> - args->buffer_count);
> + ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
> + eb, exec);
> BUG_ON(!mutex_is_locked(&dev->struct_mutex));
> }
> if (ret)
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 067b98e..7657d3e 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -309,6 +309,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_PARAM_HAS_PRIME_VMAP_FLUSH 21
> #define I915_PARAM_RSVD_FOR_FUTURE_USE 22
> #define I915_PARAM_HAS_SECURE_BATCHES 23
> +#define I915_PARAM_HAS_EXEC_NO_RELOC 24
>
> typedef struct drm_i915_getparam {
> int param;
> @@ -629,7 +630,10 @@ struct drm_i915_gem_exec_object2 {
> __u64 offset;
>
> #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
> +#define EXEC_OBJECT_NEEDS_GTT (1<<1)
> __u64 flags;
> +
> + /** Low 32-bits for read-domains, high 32-bits for write-domains. */
> __u64 rsvd1;
> __u64 rsvd2;
> };
> @@ -679,6 +683,13 @@ struct drm_i915_gem_execbuffer2 {
> */
> #define I915_EXEC_SECURE (1<<9)
>
> +/** Provide a hint to the kernel that the command stream and auxilliary
> + * state buffers already holds the correct presumed addresses and so the
> + * relocation process may be skipped if no buffers need to be moved in
> + * preparation for the execbuffer.
> + */
> +#define I915_EXEC_NO_RELOC (1<<10)
> +
> #define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
> #define i915_execbuffer2_set_context_id(eb2, context) \
> (eb2).rsvd1 = context & I915_EXEC_CONTEXT_ID_MASK
> --
> 1.7.10.4
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known
2012-11-10 17:16 ` Daniel Vetter
@ 2012-11-10 18:08 ` Chris Wilson
2012-11-10 20:17 ` Daniel Vetter
0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2012-11-10 18:08 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On Sat, 10 Nov 2012 18:16:16 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> - Can we abolish the pending_read/write_domains and just go with a
> per-object GPU_WRITE flag? Afaik that's all we need with the
> flushing_list gone. To avoid a massive rewrite of the code I'm thinking
> of just keeping around a pending_gpu_write bool (since reads are
> implicit) and then using that to fill out generic gpu domains in
> i915_gem_execbuffer_move_to_active. E.g. set all gpu read domains if
> there is no write, otherwise just set the render domain in both.
Just a quick comment. I started with just a WRITE flag, then realised I
needed to mark the read domains as well (for GPU activity tracking). So
I concluded that just passing along the domains was easy enough for
userspace and better for future-proofing. That is definitely one of the
places where we need to be careful with designing the API - more ideas
are welcome.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known
2012-11-10 18:08 ` Chris Wilson
@ 2012-11-10 20:17 ` Daniel Vetter
2012-11-11 12:10 ` Chris Wilson
0 siblings, 1 reply; 10+ messages in thread
From: Daniel Vetter @ 2012-11-10 20:17 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Sat, Nov 10, 2012 at 7:08 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Sat, 10 Nov 2012 18:16:16 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
>> - Can we abolish the pending_read/write_domains and just go with a
>> per-object GPU_WRITE flag? Afaik that's all we need with the
>> flushing_list gone. To avoid a massive rewrite of the code I'm thinking
>> of just keeping around a pending_gpu_write bool (since reads are
>> implicit) and then using that to fill out generic gpu domains in
>> i915_gem_execbuffer_move_to_active. E.g. set all gpu read domains if
>> there is no write, otherwise just set the render domain in both.
>
> Just a quick comment. I started with just a WRITE flag, then realised I
> needed to mark the read domains as well (for GPU activity tracking). So
> I concluded that just passing along the domains was easy enough for
> userspace and better for future-proofing. That is definitely one of the
> places where we need to be careful with designing the API - more ideas
> are welcome.
Hm, I've thought we could get away for the gpu activity tracking by
unconditionally assuming that any passed-in object is in is in all
read domains. For cache flushing it doesn't matter what we set anyway
since we invalidate/flush unconditionally all caches before/after each
batch. And for activity tracking we unconditionally put all objects in
a given execbuf to the front of the active list (with updated
ring/seqno). The only important thing is to keep the write tracking to
not break our various read/read optimizations. Since we allow in-batch
write domain changes already (and hence more than one write domain
set) we could even do a simple
obj->base.read_domains = GPU_DOMAINS;
if (obj->pending_gpu_write) obj->base.write_domain = GPU_DOMAINS;
For ring-sync/semaphores we already do an unconditional call to
i915_gem_object_sync. So what do I miss?
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects
2012-11-10 15:15 [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects Chris Wilson
` (2 preceding siblings ...)
2012-11-10 15:15 ` [PATCH 4/4] drm/i915: Use the reloc.handle as an index into the execbuffer array Chris Wilson
@ 2012-11-10 20:24 ` Eric Anholt
3 siblings, 0 replies; 10+ messages in thread
From: Eric Anholt @ 2012-11-10 20:24 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
[-- Attachment #1.1: Type: text/plain, Size: 354 bytes --]
Chris Wilson <chris@chris-wilson.co.uk> writes:
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Wouldn't we be able to get rid of the whole eb hash table if we just
made the idr be protected by a proper mutex that we could hold across
the relocation process? Seems like that would be way better from an
execution cost and cache perspective.
[-- Attachment #1.2: Type: application/pgp-signature, Size: 197 bytes --]
[-- Attachment #2: Type: text/plain, Size: 159 bytes --]
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known
2012-11-10 20:17 ` Daniel Vetter
@ 2012-11-11 12:10 ` Chris Wilson
2012-11-11 13:09 ` Daniel Vetter
0 siblings, 1 reply; 10+ messages in thread
From: Chris Wilson @ 2012-11-11 12:10 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On Sat, 10 Nov 2012 21:17:05 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
> Hm, I've thought we could get away for the gpu activity tracking by
> unconditionally assuming that any passed-in object is in is in all
> read domains. For cache flushing it doesn't matter what we set anyway
> since we invalidate/flush unconditionally all caches before/after each
> batch. And for activity tracking we unconditionally put all objects in
> a given execbuf to the front of the active list (with updated
> ring/seqno). The only important thing is to keep the write tracking to
> not break our various read/read optimizations.
One overlooked aspect is that the domain tracking helps with debugging
and identifying what the buffers are used for. It has paid dividends
many times when reading error-states, and certainly will prove to be
useful again in future.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known
2012-11-11 12:10 ` Chris Wilson
@ 2012-11-11 13:09 ` Daniel Vetter
0 siblings, 0 replies; 10+ messages in thread
From: Daniel Vetter @ 2012-11-11 13:09 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Sun, Nov 11, 2012 at 1:10 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Sat, 10 Nov 2012 21:17:05 +0100, Daniel Vetter <daniel@ffwll.ch> wrote:
>> Hm, I've thought we could get away for the gpu activity tracking by
>> unconditionally assuming that any passed-in object is in is in all
>> read domains. For cache flushing it doesn't matter what we set anyway
>> since we invalidate/flush unconditionally all caches before/after each
>> batch. And for activity tracking we unconditionally put all objects in
>> a given execbuf to the front of the active list (with updated
>> ring/seqno). The only important thing is to keep the write tracking to
>> not break our various read/read optimizations.
>
> One overlooked aspect is that the domain tracking helps with debugging
> and identifying what the buffers are used for. It has paid dividends
> many times when reading error-states, and certainly will prove to be
> useful again in future.
Hm, good point. Otoh why don't we allow userspace to simply pass in an
arbitrary debug tag in that u64, carry the latest one around in the bo
and dump that everywhere (in addition to the domains)?
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2012-11-11 13:09 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-11-10 15:15 [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects Chris Wilson
2012-11-10 15:15 ` [PATCH 2/4] drm/i915: Move the execbuffer objects list from the stack into the tracker Chris Wilson
2012-11-10 15:15 ` [PATCH 3/4] drm/i915: Allow userspace to hint that the relocations were known Chris Wilson
2012-11-10 17:16 ` Daniel Vetter
2012-11-10 18:08 ` Chris Wilson
2012-11-10 20:17 ` Daniel Vetter
2012-11-11 12:10 ` Chris Wilson
2012-11-11 13:09 ` Daniel Vetter
2012-11-10 15:15 ` [PATCH 4/4] drm/i915: Use the reloc.handle as an index into the execbuffer array Chris Wilson
2012-11-10 20:24 ` [PATCH 1/4] drm/i915: Take the handle idr spinlock once for looking up the exec objects Eric Anholt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox