* [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
@ 2015-10-06 10:53 Chris Wilson
2015-10-06 10:53 ` [PATCH 2/3] drm/i915: Allow the user to pass a context to any ring Chris Wilson
` (3 more replies)
0 siblings, 4 replies; 27+ messages in thread
From: Chris Wilson @ 2015-10-06 10:53 UTC (permalink / raw)
To: intel-gfx; +Cc: dri-devel
In addition to the last-in/first-out stack for accessing drm_mm nodes,
we occasionally and in the future often want to find a drm_mm_node by an
address. To do so efficiently we need to track the nodes in an interval
tree - lookups for a particular address will then be O(lg(N)), where N
is the number of nodes in the range manager as opposed to O(N).
Insertion however gains an extra O(lg(N)) step for all nodes
irrespective of whether the interval tree is in use. For future i915
patches, eliminating the linear walk is a significant improvement.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: dri-devel@lists.freedesktop.org
---
drivers/gpu/drm/Kconfig | 1 +
drivers/gpu/drm/drm_mm.c | 71 ++++++++++++++++++++++++++++++++----------------
include/drm/drm_mm.h | 5 ++++
3 files changed, 54 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 06ae5008c5ed..e25050a5a843 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -12,6 +12,7 @@ menuconfig DRM
select I2C
select I2C_ALGOBIT
select DMA_SHARED_BUFFER
+ select INTERVAL_TREE
help
Kernel-level support for the Direct Rendering Infrastructure (DRI)
introduced in XFree86 4.0. If you say Y here, you need to select
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index 04de6fd88f8c..e3acd860f738 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -153,6 +153,10 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
INIT_LIST_HEAD(&node->hole_stack);
list_add(&node->node_list, &hole_node->node_list);
+ node->it.start = node->start;
+ node->it.last = node->start + size - 1;
+ interval_tree_insert(&node->it, &mm->interval_tree);
+
BUG_ON(node->start + node->size > adj_end);
node->hole_follows = 0;
@@ -178,39 +182,53 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
*/
int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
{
- struct drm_mm_node *hole;
u64 end = node->start + node->size;
- u64 hole_start;
- u64 hole_end;
-
- BUG_ON(node == NULL);
+ struct interval_tree_node *it;
+ struct drm_mm_node *hole;
+ u64 hole_start, hole_end;
/* Find the relevant hole to add our node to */
- drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
- if (hole_start > node->start || hole_end < end)
- continue;
+ it = interval_tree_iter_first(&mm->interval_tree,
+ node->start, (u64)-1);
+ if (it == NULL) {
+ hole = list_last_entry(&mm->head_node.node_list,
+ struct drm_mm_node, node_list);
+ } else {
+ hole = container_of(it, typeof(*hole), it);
+ if (hole->start <= node->start)
+ return -ENOSPC;
+
+ hole = list_last_entry(&hole->node_list,
+ struct drm_mm_node, node_list);
+ }
- node->mm = mm;
- node->allocated = 1;
+ hole_start = drm_mm_hole_node_start(hole);
+ hole_end = drm_mm_hole_node_end(hole);
+ if (hole_start > node->start || hole_end < end)
+ return -ENOSPC;
- INIT_LIST_HEAD(&node->hole_stack);
- list_add(&node->node_list, &hole->node_list);
+ node->mm = mm;
+ node->allocated = 1;
- if (node->start == hole_start) {
- hole->hole_follows = 0;
- list_del_init(&hole->hole_stack);
- }
+ INIT_LIST_HEAD(&node->hole_stack);
+ list_add(&node->node_list, &hole->node_list);
- node->hole_follows = 0;
- if (end != hole_end) {
- list_add(&node->hole_stack, &mm->hole_stack);
- node->hole_follows = 1;
- }
+ node->it.start = node->start;
+ node->it.last = node->start + node->size - 1;
+ interval_tree_insert(&node->it, &mm->interval_tree);
- return 0;
+ if (node->start == hole_start) {
+ hole->hole_follows = 0;
+ list_del_init(&hole->hole_stack);
}
- return -ENOSPC;
+ node->hole_follows = 0;
+ if (end != hole_end) {
+ list_add(&node->hole_stack, &mm->hole_stack);
+ node->hole_follows = 1;
+ }
+
+ return 0;
}
EXPORT_SYMBOL(drm_mm_reserve_node);
@@ -300,6 +318,10 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
INIT_LIST_HEAD(&node->hole_stack);
list_add(&node->node_list, &hole_node->node_list);
+ node->it.start = node->start;
+ node->it.last = node->start + node->size - 1;
+ interval_tree_insert(&node->it, &mm->interval_tree);
+
BUG_ON(node->start < start);
BUG_ON(node->start < adj_start);
BUG_ON(node->start + node->size > adj_end);
@@ -388,6 +410,7 @@ void drm_mm_remove_node(struct drm_mm_node *node)
} else
list_move(&prev_node->hole_stack, &mm->hole_stack);
+ interval_tree_remove(&node->it, &mm->interval_tree);
list_del(&node->node_list);
node->allocated = 0;
}
@@ -756,6 +779,8 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
mm->head_node.size = start - mm->head_node.start;
list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
+ mm->interval_tree = RB_ROOT;
+
mm->color_adjust = NULL;
}
EXPORT_SYMBOL(drm_mm_init);
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 0de6290df4da..6d531fe529bf 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -37,6 +37,7 @@
* Generic range manager structs
*/
#include <linux/bug.h>
+#include <linux/interval_tree.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/spinlock.h>
@@ -61,6 +62,7 @@ enum drm_mm_allocator_flags {
struct drm_mm_node {
struct list_head node_list;
struct list_head hole_stack;
+ struct interval_tree_node it;
unsigned hole_follows : 1;
unsigned scanned_block : 1;
unsigned scanned_prev_free : 1;
@@ -79,6 +81,9 @@ struct drm_mm {
/* head_node.node_list is the list of all memory nodes, ordered
* according to the (increasing) start address of the memory node. */
struct drm_mm_node head_node;
+ /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */
+ struct rb_root interval_tree;
+
unsigned int scan_check_range : 1;
unsigned scan_alignment;
unsigned long scan_color;
--
2.6.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH 2/3] drm/i915: Allow the user to pass a context to any ring
2015-10-06 10:53 [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Chris Wilson
@ 2015-10-06 10:53 ` Chris Wilson
2015-10-06 13:57 ` Daniel, Thomas
2015-10-06 10:53 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
` (2 subsequent siblings)
3 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2015-10-06 10:53 UTC (permalink / raw)
To: intel-gfx
With full-ppgtt, we want the user to have full control over their memory
layout, with a separate instance per context. Forcing them to use a
shared memory layout for !RCS not only duplicates the amount of work we
have to do, but also defeats the memory segregation on offer.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index a01c1ebe47ca..19dd6b05ee1d 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1033,18 +1033,13 @@ static struct intel_context *
i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
struct intel_engine_cs *ring, const u32 ctx_id)
{
- struct intel_context *ctx = NULL;
- struct i915_ctx_hang_stats *hs;
-
- if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
- return ERR_PTR(-EINVAL);
+ struct intel_context *ctx;
ctx = i915_gem_context_get(file->driver_priv, ctx_id);
if (IS_ERR(ctx))
return ctx;
- hs = &ctx->hang_stats;
- if (hs->banned) {
+ if (ctx->hang_stats.banned) {
DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
return ERR_PTR(-EIO);
}
--
2.6.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH 2/3] drm/i915: Allow the user to pass a context to any ring
2015-10-06 10:53 ` [PATCH 2/3] drm/i915: Allow the user to pass a context to any ring Chris Wilson
@ 2015-10-06 13:57 ` Daniel, Thomas
2015-12-02 13:42 ` Tvrtko Ursulin
0 siblings, 1 reply; 27+ messages in thread
From: Daniel, Thomas @ 2015-10-06 13:57 UTC (permalink / raw)
To: Chris Wilson, intel-gfx@lists.freedesktop.org
> -----Original Message-----
> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of
> Chris Wilson
> Sent: Tuesday, October 6, 2015 11:53 AM
> To: intel-gfx@lists.freedesktop.org
> Subject: [Intel-gfx] [PATCH 2/3] drm/i915: Allow the user to pass a context to
> any ring
>
> With full-ppgtt, we want the user to have full control over their memory
> layout, with a separate instance per context. Forcing them to use a
> shared memory layout for !RCS not only duplicates the amount of work we
> have to do, but also defeats the memory segregation on offer.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Reviewed-by: Thomas Daniel <thomas.daniel@intel.com>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 2/3] drm/i915: Allow the user to pass a context to any ring
2015-10-06 13:57 ` Daniel, Thomas
@ 2015-12-02 13:42 ` Tvrtko Ursulin
0 siblings, 0 replies; 27+ messages in thread
From: Tvrtko Ursulin @ 2015-12-02 13:42 UTC (permalink / raw)
To: Daniel, Thomas, Chris Wilson, intel-gfx@lists.freedesktop.org
Hi,
On 06/10/15 14:57, Daniel, Thomas wrote:
>> -----Original Message-----
>> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of
>> Chris Wilson
>> Sent: Tuesday, October 6, 2015 11:53 AM
>> To: intel-gfx@lists.freedesktop.org
>> Subject: [Intel-gfx] [PATCH 2/3] drm/i915: Allow the user to pass a context to
>> any ring
>>
>> With full-ppgtt, we want the user to have full control over their memory
>> layout, with a separate instance per context. Forcing them to use a
>> shared memory layout for !RCS not only duplicates the amount of work we
>> have to do, but also defeats the memory segregation on offer.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>
> Reviewed-by: Thomas Daniel <thomas.daniel@intel.com>
There is no IGT coverage for this and it is quite a big change so I
think we really need some.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-06 10:53 [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Chris Wilson
2015-10-06 10:53 ` [PATCH 2/3] drm/i915: Allow the user to pass a context to any ring Chris Wilson
@ 2015-10-06 10:53 ` Chris Wilson
2015-10-06 13:59 ` Daniel, Thomas
` (2 more replies)
2015-10-06 11:11 ` [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Daniel Vetter
2015-10-21 15:11 ` Daniel Vetter
3 siblings, 3 replies; 27+ messages in thread
From: Chris Wilson @ 2015-10-06 10:53 UTC (permalink / raw)
To: intel-gfx
Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.
v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary
and fixed objects within the same batch
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Daniel, Thomas" <thomas.daniel@intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 3 ++
drivers/gpu/drm/i915/i915_drv.h | 10 +++--
drivers/gpu/drm/i915/i915_gem.c | 68 +++++++++++++++++++++---------
drivers/gpu/drm/i915/i915_gem_evict.c | 61 +++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++-
drivers/gpu/drm/i915/i915_trace.h | 23 ++++++++++
include/uapi/drm/i915_drm.h | 4 +-
7 files changed, 151 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index ab37d1121be8..cd79ef114b8e 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_HAS_RESOURCE_STREAMER:
value = HAS_RESOURCE_STREAMER(dev);
break;
+ case I915_PARAM_HAS_EXEC_SOFTPIN:
+ value = 1;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a0ce011a5dc0..7d351d991022 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2778,10 +2778,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
#define PIN_NONBLOCK (1<<1)
#define PIN_GLOBAL (1<<2)
#define PIN_OFFSET_BIAS (1<<3)
-#define PIN_USER (1<<4)
-#define PIN_UPDATE (1<<5)
-#define PIN_ZONE_4G (1<<6)
-#define PIN_HIGH (1<<7)
+#define PIN_OFFSET_FIXED (1<<4)
+#define PIN_USER (1<<5)
+#define PIN_UPDATE (1<<6)
+#define PIN_ZONE_4G (1<<7)
+#define PIN_HIGH (1<<8)
#define PIN_OFFSET_MASK (~4095)
int __must_check
i915_gem_object_pin(struct drm_i915_gem_object *obj,
@@ -3127,6 +3128,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
unsigned long start,
unsigned long end,
unsigned flags);
+int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags);
int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
/* belongs in i915_gem_gtt.h */
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 8fe3df0cdcb8..82efd6a6dee0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3334,7 +3334,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
u64 start, end;
- u32 search_flag, alloc_flag;
struct i915_vma *vma;
int ret;
@@ -3409,30 +3408,53 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
goto err_unpin;
- if (flags & PIN_HIGH) {
- search_flag = DRM_MM_SEARCH_BELOW;
- alloc_flag = DRM_MM_CREATE_TOP;
+ if (flags & PIN_OFFSET_FIXED) {
+ uint64_t offset = flags & PIN_OFFSET_MASK;
+ if (offset & (alignment - 1) || offset + size > end) {
+ vma = ERR_PTR(-EINVAL);
+ goto err_free_vma;
+ }
+ vma->node.start = offset;
+ vma->node.size = size;
+ vma->node.color = obj->cache_level;
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ if (ret) {
+ ret = i915_gem_evict_for_vma(vma, flags);
+ if (ret == 0)
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ }
+ if (ret) {
+ vma = ERR_PTR(ret);
+ goto err_free_vma;
+ }
} else {
- search_flag = DRM_MM_SEARCH_DEFAULT;
- alloc_flag = DRM_MM_CREATE_DEFAULT;
- }
+ u32 search_flag, alloc_flag;
+
+ if (flags & PIN_HIGH) {
+ search_flag = DRM_MM_SEARCH_BELOW;
+ alloc_flag = DRM_MM_CREATE_TOP;
+ } else {
+ search_flag = DRM_MM_SEARCH_DEFAULT;
+ alloc_flag = DRM_MM_CREATE_DEFAULT;
+ }
search_free:
- ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
- size, alignment,
- obj->cache_level,
- start, end,
- search_flag,
- alloc_flag);
- if (ret) {
- ret = i915_gem_evict_something(dev, vm, size, alignment,
- obj->cache_level,
- start, end,
- flags);
- if (ret == 0)
- goto search_free;
+ ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+ size, alignment,
+ obj->cache_level,
+ start, end,
+ search_flag,
+ alloc_flag);
+ if (ret) {
+ ret = i915_gem_evict_something(dev, vm, size, alignment,
+ obj->cache_level,
+ start, end,
+ flags);
+ if (ret == 0)
+ goto search_free;
- goto err_free_vma;
+ goto err_free_vma;
+ }
}
if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
ret = -EINVAL;
@@ -3970,6 +3992,10 @@ i915_vma_misplaced(struct i915_vma *vma,
vma->node.start < (flags & PIN_OFFSET_MASK))
return true;
+ if (flags & PIN_OFFSET_FIXED &&
+ vma->node.start != (flags & PIN_OFFSET_MASK))
+ return true;
+
return false;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index d71a133ceff5..60450a95a491 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -199,6 +199,67 @@ found:
return ret;
}
+int
+i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
+{
+ struct list_head eviction_list;
+ struct interval_tree_node *it;
+ u64 end = target->node.start + target->node.size;
+ struct drm_mm_node *node;
+ struct i915_vma *vma, *next;
+ int ret;
+
+ trace_i915_gem_evict_vma(target, flags);
+
+ it = interval_tree_iter_first(&target->vm->mm.interval_tree,
+ target->node.start, end -1);
+ if (it == NULL)
+ return 0;
+
+ INIT_LIST_HEAD(&eviction_list);
+ node = container_of(it, typeof(*node), it);
+ list_for_each_entry_from(node,
+ &target->vm->mm.head_node.node_list,
+ node_list) {
+ if (node->start >= end)
+ break;
+
+ vma = container_of(node, typeof(*vma), node);
+ if (flags & PIN_NONBLOCK &&
+ (vma->pin_count || vma->obj->active)) {
+ ret = -ENOSPC;
+ break;
+ }
+
+ if (vma->exec_entry &&
+ vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
+ /* Overlapping pinned objects in the same batch */
+ ret = -EINVAL;
+ break;
+ }
+
+ if (vma->pin_count) {
+ /* We may need to evict an buffer in the same batch */
+ ret = vma->exec_entry ? -ENOSPC : -EBUSY;
+ break;
+ }
+
+ list_add(&vma->exec_list, &eviction_list);
+ drm_gem_object_reference(&vma->obj->base);
+ }
+
+ ret = 0;
+ list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
+ struct drm_i915_gem_object *obj = vma->obj;
+ list_del_init(&vma->exec_list);
+ if (ret == 0)
+ ret = i915_vma_unbind(vma);
+ drm_gem_object_unreference(&obj->base);
+ }
+
+ return ret;
+}
+
/**
* i915_gem_evict_vm - Evict all idle vmas from a vm
* @vm: Address space to cleanse
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 19dd6b05ee1d..c35c9dc526e7 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -603,6 +603,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
if ((flags & PIN_MAPPABLE) == 0)
flags |= PIN_HIGH;
+ if (entry->flags & EXEC_OBJECT_PINNED)
+ flags |= entry->offset | PIN_OFFSET_FIXED;
}
ret = i915_gem_object_pin(obj, vma->vm,
@@ -679,6 +681,10 @@ eb_vma_misplaced(struct i915_vma *vma)
if (vma->node.size < entry->pad_to_size)
return true;
+ if (entry->flags & EXEC_OBJECT_PINNED &&
+ vma->node.start != entry->offset)
+ return true;
+
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
vma->node.start < BATCH_OFFSET_BIAS)
return true;
@@ -1328,7 +1334,8 @@ eb_get_batch(struct eb_vmas *eb)
* Note that actual hangs have only been observed on gen7, but for
* paranoia do it everywhere.
*/
- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
return vma->obj;
}
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index b1dcee718640..ef2387c01fa9 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -459,6 +459,29 @@ TRACE_EVENT(i915_gem_evict_vm,
TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
);
+TRACE_EVENT(i915_gem_evict_vma,
+ TP_PROTO(struct i915_vma *vma, unsigned flags),
+ TP_ARGS(vma, flags),
+
+ TP_STRUCT__entry(
+ __field(u32, dev)
+ __field(struct i915_address_space *, vm)
+ __field(u64, start)
+ __field(u64, size)
+ __field(unsigned, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->dev = vma->vm->dev->primary->index;
+ __entry->vm = vma->vm;
+ __entry->start = vma->node.start;
+ __entry->size = vma->node.size;
+ __entry->flags = flags;
+ ),
+
+ TP_printk("dev=%d, vm=%p, start=%llx size=%llx, flags=%x", __entry->dev, __entry->vm, (long long)__entry->start, (long long)__entry->size, __entry->flags)
+);
+
TRACE_EVENT(i915_gem_ring_sync_to,
TP_PROTO(struct drm_i915_gem_request *to_req,
struct intel_engine_cs *from,
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 5e2fc61e7d88..766aa4fb8264 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_EU_TOTAL 34
#define I915_PARAM_HAS_GPU_RESET 35
#define I915_PARAM_HAS_RESOURCE_STREAMER 36
+#define I915_PARAM_HAS_EXEC_SOFTPIN 37
typedef struct drm_i915_getparam {
s32 param;
@@ -692,7 +693,8 @@ struct drm_i915_gem_exec_object2 {
#define EXEC_OBJECT_WRITE (1<<2)
#define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
#define EXEC_OBJECT_PAD_TO_SIZE (1<<4)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
+#define EXEC_OBJECT_PINNED (1<<5)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
__u64 flags;
__u64 pad_to_size;
--
2.6.0
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 27+ messages in thread* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-06 10:53 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
@ 2015-10-06 13:59 ` Daniel, Thomas
2015-10-21 15:07 ` Daniel Vetter
2015-10-27 11:51 ` akash goel
2015-11-05 17:51 ` Kristian Høgsberg
2 siblings, 1 reply; 27+ messages in thread
From: Daniel, Thomas @ 2015-10-06 13:59 UTC (permalink / raw)
To: Chris Wilson, intel-gfx@lists.freedesktop.org
> -----Original Message-----
> From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> Sent: Tuesday, October 6, 2015 11:53 AM
> To: intel-gfx@lists.freedesktop.org
> Cc: Chris Wilson; Daniel, Thomas
> Subject: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
>
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
>
> v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary
> and fixed objects within the same batch
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: "Daniel, Thomas" <thomas.daniel@intel.com>
This didn't apply cleanly to my tree pulled today (after patches 1 and 2 of this series).
Are you going to post a rebase?
Cheers,
Thomas.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-06 13:59 ` Daniel, Thomas
@ 2015-10-21 15:07 ` Daniel Vetter
2015-10-21 15:11 ` Daniel, Thomas
0 siblings, 1 reply; 27+ messages in thread
From: Daniel Vetter @ 2015-10-21 15:07 UTC (permalink / raw)
To: Daniel, Thomas; +Cc: intel-gfx@lists.freedesktop.org
On Tue, Oct 06, 2015 at 01:59:12PM +0000, Daniel, Thomas wrote:
> > -----Original Message-----
> > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > Sent: Tuesday, October 6, 2015 11:53 AM
> > To: intel-gfx@lists.freedesktop.org
> > Cc: Chris Wilson; Daniel, Thomas
> > Subject: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
> >
> > Userspace can pass in an offset that it presumes the object is located
> > at. The kernel will then do its utmost to fit the object into that
> > location. The assumption is that userspace is handling its own object
> > locations (for example along with full-ppgtt) and that the kernel will
> > rarely have to make space for the user's requests.
> >
> > v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary
> > and fixed objects within the same batch
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: "Daniel, Thomas" <thomas.daniel@intel.com>
>
> This didn't apply cleanly to my tree pulled today (after patches 1 and 2 of this series).
> Are you going to post a rebase?
It's a really trivial conflict in the uapi flag allocation. Happens all
the time with interface extensions.
What I'm looking for here is the userspace for this new interface. And the
testcases.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-21 15:07 ` Daniel Vetter
@ 2015-10-21 15:11 ` Daniel, Thomas
2015-10-23 2:31 ` Yang, Rong R
0 siblings, 1 reply; 27+ messages in thread
From: Daniel, Thomas @ 2015-10-21 15:11 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Belgaumkar, Vinay, intel-gfx@lists.freedesktop.org
> -----Original Message-----
> From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of Daniel Vetter
> Sent: Wednesday, October 21, 2015 4:08 PM
> To: Daniel, Thomas
> Cc: Chris Wilson; intel-gfx@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH 3/3] drm/i915: Add soft-pinning API for
> execbuffer
>
> On Tue, Oct 06, 2015 at 01:59:12PM +0000, Daniel, Thomas wrote:
> > > -----Original Message-----
> > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > > Sent: Tuesday, October 6, 2015 11:53 AM
> > > To: intel-gfx@lists.freedesktop.org
> > > Cc: Chris Wilson; Daniel, Thomas
> > > Subject: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
> > >
> > > Userspace can pass in an offset that it presumes the object is located
> > > at. The kernel will then do its utmost to fit the object into that
> > > location. The assumption is that userspace is handling its own object
> > > locations (for example along with full-ppgtt) and that the kernel will
> > > rarely have to make space for the user's requests.
> > >
> > > v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary
> > > and fixed objects within the same batch
> > >
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: "Daniel, Thomas" <thomas.daniel@intel.com>
> >
> > This didn't apply cleanly to my tree pulled today (after patches 1 and 2 of this
> series).
> > Are you going to post a rebase?
>
> It's a really trivial conflict in the uapi flag allocation. Happens all
> the time with interface extensions.
>
> What I'm looking for here is the userspace for this new interface. And the
> testcases.
Hm I thought the beignet guys had already posted.
Vinay has written i-g-t for this
> -Daniel
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-21 15:11 ` Daniel, Thomas
@ 2015-10-23 2:31 ` Yang, Rong R
0 siblings, 0 replies; 27+ messages in thread
From: Yang, Rong R @ 2015-10-23 2:31 UTC (permalink / raw)
To: Daniel, Thomas, Daniel Vetter
Cc: Belgaumkar, Vinay, intel-gfx@lists.freedesktop.org
> -----Original Message-----
> From: Daniel, Thomas
> Sent: Wednesday, October 21, 2015 23:11
> To: Daniel Vetter
> Cc: Chris Wilson; intel-gfx@lists.freedesktop.org; Belgaumkar, Vinay; Yang,
> Rong R
> Subject: RE: [Intel-gfx] [PATCH 3/3] drm/i915: Add soft-pinning API for
> execbuffer
>
> > -----Original Message-----
> > From: Daniel Vetter [mailto:daniel.vetter@ffwll.ch] On Behalf Of
> > Daniel Vetter
> > Sent: Wednesday, October 21, 2015 4:08 PM
> > To: Daniel, Thomas
> > Cc: Chris Wilson; intel-gfx@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH 3/3] drm/i915: Add soft-pinning API
> > for execbuffer
> >
> > On Tue, Oct 06, 2015 at 01:59:12PM +0000, Daniel, Thomas wrote:
> > > > -----Original Message-----
> > > > From: Chris Wilson [mailto:chris@chris-wilson.co.uk]
> > > > Sent: Tuesday, October 6, 2015 11:53 AM
> > > > To: intel-gfx@lists.freedesktop.org
> > > > Cc: Chris Wilson; Daniel, Thomas
> > > > Subject: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
> > > >
> > > > Userspace can pass in an offset that it presumes the object is
> > > > located at. The kernel will then do its utmost to fit the object
> > > > into that location. The assumption is that userspace is handling
> > > > its own object locations (for example along with full-ppgtt) and
> > > > that the kernel will rarely have to make space for the user's requests.
> > > >
> > > > v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle
> > > > ordinary and fixed objects within the same batch
> > > >
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > > Cc: "Daniel, Thomas" <thomas.daniel@intel.com>
> > >
> > > This didn't apply cleanly to my tree pulled today (after patches 1
> > > and 2 of this
> > series).
> > > Are you going to post a rebase?
> >
> > It's a really trivial conflict in the uapi flag allocation. Happens
> > all the time with interface extensions.
> >
> > What I'm looking for here is the userspace for this new interface. And
> > the testcases.
> Hm I thought the beignet guys had already posted.
> Vinay has written i-g-t for this
Beignet svm patch haven't post, because the beignet's svm patch only work on i386 linux now, the x86_64 svm depends on 48bits pointer support in Beignet compiler's backend.
If the i386 svm patch is worthy for this patch, I will send it out.
>
> > -Daniel
> > --
> > Daniel Vetter
> > Software Engineer, Intel Corporation
> > http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-06 10:53 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
2015-10-06 13:59 ` Daniel, Thomas
@ 2015-10-27 11:51 ` akash goel
2015-11-05 10:57 ` Daniel, Thomas
2015-12-02 13:28 ` Chris Wilson
2015-11-05 17:51 ` Kristian Høgsberg
2 siblings, 2 replies; 27+ messages in thread
From: akash goel @ 2015-10-27 11:51 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, Goel, Akash
On Tue, Oct 6, 2015 at 4:23 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
>
> v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary
> and fixed objects within the same batch
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: "Daniel, Thomas" <thomas.daniel@intel.com>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 3 ++
> drivers/gpu/drm/i915/i915_drv.h | 10 +++--
> drivers/gpu/drm/i915/i915_gem.c | 68 +++++++++++++++++++++---------
> drivers/gpu/drm/i915/i915_gem_evict.c | 61 +++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++-
> drivers/gpu/drm/i915/i915_trace.h | 23 ++++++++++
> include/uapi/drm/i915_drm.h | 4 +-
> 7 files changed, 151 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index ab37d1121be8..cd79ef114b8e 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_RESOURCE_STREAMER:
> value = HAS_RESOURCE_STREAMER(dev);
> break;
> + case I915_PARAM_HAS_EXEC_SOFTPIN:
> + value = 1;
> + break;
> default:
> DRM_DEBUG("Unknown parameter %d\n", param->param);
> return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a0ce011a5dc0..7d351d991022 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2778,10 +2778,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
> #define PIN_NONBLOCK (1<<1)
> #define PIN_GLOBAL (1<<2)
> #define PIN_OFFSET_BIAS (1<<3)
> -#define PIN_USER (1<<4)
> -#define PIN_UPDATE (1<<5)
> -#define PIN_ZONE_4G (1<<6)
> -#define PIN_HIGH (1<<7)
> +#define PIN_OFFSET_FIXED (1<<4)
> +#define PIN_USER (1<<5)
> +#define PIN_UPDATE (1<<6)
> +#define PIN_ZONE_4G (1<<7)
> +#define PIN_HIGH (1<<8)
> #define PIN_OFFSET_MASK (~4095)
> int __must_check
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> @@ -3127,6 +3128,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
> unsigned long start,
> unsigned long end,
> unsigned flags);
> +int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags);
> int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>
> /* belongs in i915_gem_gtt.h */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8fe3df0cdcb8..82efd6a6dee0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3334,7 +3334,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> struct drm_device *dev = obj->base.dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> u64 start, end;
> - u32 search_flag, alloc_flag;
> struct i915_vma *vma;
> int ret;
>
> @@ -3409,30 +3408,53 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> if (IS_ERR(vma))
> goto err_unpin;
>
> - if (flags & PIN_HIGH) {
> - search_flag = DRM_MM_SEARCH_BELOW;
> - alloc_flag = DRM_MM_CREATE_TOP;
> + if (flags & PIN_OFFSET_FIXED) {
> + uint64_t offset = flags & PIN_OFFSET_MASK;
> + if (offset & (alignment - 1) || offset + size > end) {
> + vma = ERR_PTR(-EINVAL);
> + goto err_free_vma;
> + }
> + vma->node.start = offset;
> + vma->node.size = size;
> + vma->node.color = obj->cache_level;
> + ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> + if (ret) {
> + ret = i915_gem_evict_for_vma(vma, flags);
> + if (ret == 0)
> + ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> + }
> + if (ret) {
> + vma = ERR_PTR(ret);
> + goto err_free_vma;
> + }
> } else {
> - search_flag = DRM_MM_SEARCH_DEFAULT;
> - alloc_flag = DRM_MM_CREATE_DEFAULT;
> - }
> + u32 search_flag, alloc_flag;
> +
> + if (flags & PIN_HIGH) {
> + search_flag = DRM_MM_SEARCH_BELOW;
> + alloc_flag = DRM_MM_CREATE_TOP;
> + } else {
> + search_flag = DRM_MM_SEARCH_DEFAULT;
> + alloc_flag = DRM_MM_CREATE_DEFAULT;
> + }
>
> search_free:
> - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> - size, alignment,
> - obj->cache_level,
> - start, end,
> - search_flag,
> - alloc_flag);
> - if (ret) {
> - ret = i915_gem_evict_something(dev, vm, size, alignment,
> - obj->cache_level,
> - start, end,
> - flags);
> - if (ret == 0)
> - goto search_free;
> + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> + size, alignment,
> + obj->cache_level,
> + start, end,
> + search_flag,
> + alloc_flag);
> + if (ret) {
> + ret = i915_gem_evict_something(dev, vm, size, alignment,
> + obj->cache_level,
> + start, end,
> + flags);
> + if (ret == 0)
> + goto search_free;
>
> - goto err_free_vma;
> + goto err_free_vma;
> + }
> }
> if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
> ret = -EINVAL;
> @@ -3970,6 +3992,10 @@ i915_vma_misplaced(struct i915_vma *vma,
> vma->node.start < (flags & PIN_OFFSET_MASK))
> return true;
>
> + if (flags & PIN_OFFSET_FIXED &&
> + vma->node.start != (flags & PIN_OFFSET_MASK))
> + return true;
> +
> return false;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d71a133ceff5..60450a95a491 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,67 @@ found:
> return ret;
> }
>
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
> +{
> + struct list_head eviction_list;
> + struct interval_tree_node *it;
> + u64 end = target->node.start + target->node.size;
> + struct drm_mm_node *node;
> + struct i915_vma *vma, *next;
> + int ret;
> +
> + trace_i915_gem_evict_vma(target, flags);
> +
> + it = interval_tree_iter_first(&target->vm->mm.interval_tree,
> + target->node.start, end -1);
> + if (it == NULL)
> + return 0;
> +
> + INIT_LIST_HEAD(&eviction_list);
> + node = container_of(it, typeof(*node), it);
> + list_for_each_entry_from(node,
> + &target->vm->mm.head_node.node_list,
> + node_list) {
> + if (node->start >= end)
> + break;
> +
> + vma = container_of(node, typeof(*vma), node);
> + if (flags & PIN_NONBLOCK &&
> + (vma->pin_count || vma->obj->active)) {
> + ret = -ENOSPC;
> + break;
> + }
> +
> + if (vma->exec_entry &&
> + vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
> + /* Overlapping pinned objects in the same batch */
> + ret = -EINVAL;
> + break;
> + }
> +
> + if (vma->pin_count) {
> + /* We may need to evict an buffer in the same batch */
> + ret = vma->exec_entry ? -ENOSPC : -EBUSY;
> + break;
> + }
> +
> + list_add(&vma->exec_list, &eviction_list);
> + drm_gem_object_reference(&vma->obj->base);
> + }
> +
> + ret = 0;
> + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
> + struct drm_i915_gem_object *obj = vma->obj;
> + list_del_init(&vma->exec_list);
> + if (ret == 0)
> + ret = i915_vma_unbind(vma);
> + drm_gem_object_unreference(&obj->base);
> + }
> +
> + return ret;
> +}
> +
> /**
> * i915_gem_evict_vm - Evict all idle vmas from a vm
> * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 19dd6b05ee1d..c35c9dc526e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -603,6 +603,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> if ((flags & PIN_MAPPABLE) == 0)
> flags |= PIN_HIGH;
> + if (entry->flags & EXEC_OBJECT_PINNED)
> + flags |= entry->offset | PIN_OFFSET_FIXED;
> }
>
> ret = i915_gem_object_pin(obj, vma->vm,
> @@ -679,6 +681,10 @@ eb_vma_misplaced(struct i915_vma *vma)
> if (vma->node.size < entry->pad_to_size)
> return true;
>
> + if (entry->flags & EXEC_OBJECT_PINNED &&
> + vma->node.start != entry->offset)
> + return true;
> +
> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> vma->node.start < BATCH_OFFSET_BIAS)
> return true;
I think would be better to add the following change here.
- if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
+ if (!(entry->flags &
+ (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
(vma->node.start + vma->node.size - 1) >> 32)
return true;
This way, User will not have to necessarily pass the 48B_ADDRESS flag
also along with the OBJECT_PINNED flag, if the offset is > 4 GB.
The OBJECT_PINNED flag will take precedence over 48B_ADDRESS flag.
Best regards
Akash
> @@ -1328,7 +1334,8 @@ eb_get_batch(struct eb_vmas *eb)
> * Note that actual hangs have only been observed on gen7, but for
> * paranoia do it everywhere.
> */
> - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>
> return vma->obj;
> }
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index b1dcee718640..ef2387c01fa9 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -459,6 +459,29 @@ TRACE_EVENT(i915_gem_evict_vm,
> TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
> );
>
> +TRACE_EVENT(i915_gem_evict_vma,
> + TP_PROTO(struct i915_vma *vma, unsigned flags),
> + TP_ARGS(vma, flags),
> +
> + TP_STRUCT__entry(
> + __field(u32, dev)
> + __field(struct i915_address_space *, vm)
> + __field(u64, start)
> + __field(u64, size)
> + __field(unsigned, flags)
> + ),
> +
> + TP_fast_assign(
> + __entry->dev = vma->vm->dev->primary->index;
> + __entry->vm = vma->vm;
> + __entry->start = vma->node.start;
> + __entry->size = vma->node.size;
> + __entry->flags = flags;
> + ),
> +
> + TP_printk("dev=%d, vm=%p, start=%llx size=%llx, flags=%x", __entry->dev, __entry->vm, (long long)__entry->start, (long long)__entry->size, __entry->flags)
> +);
> +
> TRACE_EVENT(i915_gem_ring_sync_to,
> TP_PROTO(struct drm_i915_gem_request *to_req,
> struct intel_engine_cs *from,
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 5e2fc61e7d88..766aa4fb8264 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_PARAM_EU_TOTAL 34
> #define I915_PARAM_HAS_GPU_RESET 35
> #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> +#define I915_PARAM_HAS_EXEC_SOFTPIN 37
>
> typedef struct drm_i915_getparam {
> s32 param;
> @@ -692,7 +693,8 @@ struct drm_i915_gem_exec_object2 {
> #define EXEC_OBJECT_WRITE (1<<2)
> #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> #define EXEC_OBJECT_PAD_TO_SIZE (1<<4)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> +#define EXEC_OBJECT_PINNED (1<<5)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
> __u64 flags;
>
> __u64 pad_to_size;
> --
> 2.6.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-27 11:51 ` akash goel
@ 2015-11-05 10:57 ` Daniel, Thomas
2015-12-02 13:28 ` Chris Wilson
1 sibling, 0 replies; 27+ messages in thread
From: Daniel, Thomas @ 2015-11-05 10:57 UTC (permalink / raw)
To: akash goel, Chris Wilson; +Cc: intel-gfx@lists.freedesktop.org, Goel, Akash
> -----Original Message-----
> From: akash goel [mailto:akash.goels@gmail.com]
> Sent: Tuesday, October 27, 2015 11:52 AM
> To: Chris Wilson
> Cc: intel-gfx@lists.freedesktop.org; Goel, Akash; Daniel, Thomas
> Subject: Re: [Intel-gfx] [PATCH 3/3] drm/i915: Add soft-pinning API for
> execbuffer
>
> On Tue, Oct 6, 2015 at 4:23 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > Userspace can pass in an offset that it presumes the object is located
> > at. The kernel will then do its utmost to fit the object into that
> > location. The assumption is that userspace is handling its own object
> > locations (for example along with full-ppgtt) and that the kernel will
> > rarely have to make space for the user's requests.
> >
> > v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary
> > and fixed objects within the same batch
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: "Daniel, Thomas" <thomas.daniel@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_dma.c | 3 ++
> > drivers/gpu/drm/i915/i915_drv.h | 10 +++--
> > drivers/gpu/drm/i915/i915_gem.c | 68 +++++++++++++++++++++--------
> -
> > drivers/gpu/drm/i915/i915_gem_evict.c | 61
> +++++++++++++++++++++++++++
> > drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++-
> > drivers/gpu/drm/i915/i915_trace.h | 23 ++++++++++
> > include/uapi/drm/i915_drm.h | 4 +-
> > 7 files changed, 151 insertions(+), 27 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c
> b/drivers/gpu/drm/i915/i915_dma.c
> > index ab37d1121be8..cd79ef114b8e 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void
> *data,
> > case I915_PARAM_HAS_RESOURCE_STREAMER:
> > value = HAS_RESOURCE_STREAMER(dev);
> > break;
> > + case I915_PARAM_HAS_EXEC_SOFTPIN:
> > + value = 1;
> > + break;
> > default:
> > DRM_DEBUG("Unknown parameter %d\n", param->param);
> > return -EINVAL;
> > diff --git a/drivers/gpu/drm/i915/i915_drv.h
> b/drivers/gpu/drm/i915/i915_drv.h
> > index a0ce011a5dc0..7d351d991022 100644
> > --- a/drivers/gpu/drm/i915/i915_drv.h
> > +++ b/drivers/gpu/drm/i915/i915_drv.h
> > @@ -2778,10 +2778,11 @@ void i915_gem_vma_destroy(struct i915_vma
> *vma);
> > #define PIN_NONBLOCK (1<<1)
> > #define PIN_GLOBAL (1<<2)
> > #define PIN_OFFSET_BIAS (1<<3)
> > -#define PIN_USER (1<<4)
> > -#define PIN_UPDATE (1<<5)
> > -#define PIN_ZONE_4G (1<<6)
> > -#define PIN_HIGH (1<<7)
> > +#define PIN_OFFSET_FIXED (1<<4)
> > +#define PIN_USER (1<<5)
> > +#define PIN_UPDATE (1<<6)
> > +#define PIN_ZONE_4G (1<<7)
> > +#define PIN_HIGH (1<<8)
> > #define PIN_OFFSET_MASK (~4095)
> > int __must_check
> > i915_gem_object_pin(struct drm_i915_gem_object *obj,
> > @@ -3127,6 +3128,7 @@ int __must_check
> i915_gem_evict_something(struct drm_device *dev,
> > unsigned long start,
> > unsigned long end,
> > unsigned flags);
> > +int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned
> flags);
> > int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
> >
> > /* belongs in i915_gem_gtt.h */
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c
> b/drivers/gpu/drm/i915/i915_gem.c
> > index 8fe3df0cdcb8..82efd6a6dee0 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -3334,7 +3334,6 @@ i915_gem_object_bind_to_vm(struct
> drm_i915_gem_object *obj,
> > struct drm_device *dev = obj->base.dev;
> > struct drm_i915_private *dev_priv = dev->dev_private;
> > u64 start, end;
> > - u32 search_flag, alloc_flag;
> > struct i915_vma *vma;
> > int ret;
> >
> > @@ -3409,30 +3408,53 @@ i915_gem_object_bind_to_vm(struct
> drm_i915_gem_object *obj,
> > if (IS_ERR(vma))
> > goto err_unpin;
> >
> > - if (flags & PIN_HIGH) {
> > - search_flag = DRM_MM_SEARCH_BELOW;
> > - alloc_flag = DRM_MM_CREATE_TOP;
> > + if (flags & PIN_OFFSET_FIXED) {
> > + uint64_t offset = flags & PIN_OFFSET_MASK;
> > + if (offset & (alignment - 1) || offset + size > end) {
> > + vma = ERR_PTR(-EINVAL);
This causes a crash, since the err_free_vma path will get an invalid address in vma.
Should be ret = -EINVAL; goto err_free_vma;
> > + goto err_free_vma;
> > + }
> > + vma->node.start = offset;
> > + vma->node.size = size;
> > + vma->node.color = obj->cache_level;
> > + ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> > + if (ret) {
> > + ret = i915_gem_evict_for_vma(vma, flags);
> > + if (ret == 0)
> > + ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> > + }
> > + if (ret) {
> > + vma = ERR_PTR(ret);
Same again.
Thomas.
> > + goto err_free_vma;
> > + }
> > } else {
> > - search_flag = DRM_MM_SEARCH_DEFAULT;
> > - alloc_flag = DRM_MM_CREATE_DEFAULT;
> > - }
> > + u32 search_flag, alloc_flag;
> > +
> > + if (flags & PIN_HIGH) {
> > + search_flag = DRM_MM_SEARCH_BELOW;
> > + alloc_flag = DRM_MM_CREATE_TOP;
> > + } else {
> > + search_flag = DRM_MM_SEARCH_DEFAULT;
> > + alloc_flag = DRM_MM_CREATE_DEFAULT;
> > + }
> >
> > search_free:
> > - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> > - size, alignment,
> > - obj->cache_level,
> > - start, end,
> > - search_flag,
> > - alloc_flag);
> > - if (ret) {
> > - ret = i915_gem_evict_something(dev, vm, size, alignment,
> > - obj->cache_level,
> > - start, end,
> > - flags);
> > - if (ret == 0)
> > - goto search_free;
> > + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma-
> >node,
> > + size, alignment,
> > + obj->cache_level,
> > + start, end,
> > + search_flag,
> > + alloc_flag);
> > + if (ret) {
> > + ret = i915_gem_evict_something(dev, vm, size, alignment,
> > + obj->cache_level,
> > + start, end,
> > + flags);
> > + if (ret == 0)
> > + goto search_free;
> >
> > - goto err_free_vma;
> > + goto err_free_vma;
> > + }
> > }
> > if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
> > ret = -EINVAL;
> > @@ -3970,6 +3992,10 @@ i915_vma_misplaced(struct i915_vma *vma,
> > vma->node.start < (flags & PIN_OFFSET_MASK))
> > return true;
> >
> > + if (flags & PIN_OFFSET_FIXED &&
> > + vma->node.start != (flags & PIN_OFFSET_MASK))
> > + return true;
> > +
> > return false;
> > }
> >
> > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c
> b/drivers/gpu/drm/i915/i915_gem_evict.c
> > index d71a133ceff5..60450a95a491 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> > @@ -199,6 +199,67 @@ found:
> > return ret;
> > }
> >
> > +int
> > +i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
> > +{
> > + struct list_head eviction_list;
> > + struct interval_tree_node *it;
> > + u64 end = target->node.start + target->node.size;
> > + struct drm_mm_node *node;
> > + struct i915_vma *vma, *next;
> > + int ret;
> > +
> > + trace_i915_gem_evict_vma(target, flags);
> > +
> > + it = interval_tree_iter_first(&target->vm->mm.interval_tree,
> > + target->node.start, end -1);
> > + if (it == NULL)
> > + return 0;
> > +
> > + INIT_LIST_HEAD(&eviction_list);
> > + node = container_of(it, typeof(*node), it);
> > + list_for_each_entry_from(node,
> > + &target->vm->mm.head_node.node_list,
> > + node_list) {
> > + if (node->start >= end)
> > + break;
> > +
> > + vma = container_of(node, typeof(*vma), node);
> > + if (flags & PIN_NONBLOCK &&
> > + (vma->pin_count || vma->obj->active)) {
> > + ret = -ENOSPC;
> > + break;
> > + }
> > +
> > + if (vma->exec_entry &&
> > + vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
> > + /* Overlapping pinned objects in the same batch */
> > + ret = -EINVAL;
> > + break;
> > + }
> > +
> > + if (vma->pin_count) {
> > + /* We may need to evict an buffer in the same batch */
> > + ret = vma->exec_entry ? -ENOSPC : -EBUSY;
> > + break;
> > + }
> > +
> > + list_add(&vma->exec_list, &eviction_list);
> > + drm_gem_object_reference(&vma->obj->base);
> > + }
> > +
> > + ret = 0;
> > + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
> > + struct drm_i915_gem_object *obj = vma->obj;
> > + list_del_init(&vma->exec_list);
> > + if (ret == 0)
> > + ret = i915_vma_unbind(vma);
> > + drm_gem_object_unreference(&obj->base);
> > + }
> > +
> > + return ret;
> > +}
> > +
> > /**
> > * i915_gem_evict_vm - Evict all idle vmas from a vm
> > * @vm: Address space to cleanse
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 19dd6b05ee1d..c35c9dc526e7 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -603,6 +603,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma
> *vma,
> > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> > if ((flags & PIN_MAPPABLE) == 0)
> > flags |= PIN_HIGH;
> > + if (entry->flags & EXEC_OBJECT_PINNED)
> > + flags |= entry->offset | PIN_OFFSET_FIXED;
> > }
> >
> > ret = i915_gem_object_pin(obj, vma->vm,
> > @@ -679,6 +681,10 @@ eb_vma_misplaced(struct i915_vma *vma)
> > if (vma->node.size < entry->pad_to_size)
> > return true;
> >
> > + if (entry->flags & EXEC_OBJECT_PINNED &&
> > + vma->node.start != entry->offset)
> > + return true;
> > +
> > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> > vma->node.start < BATCH_OFFSET_BIAS)
> > return true;
>
>
> I think would be better to add the following change here.
>
> - if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
> + if (!(entry->flags &
> + (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
> (vma->node.start + vma->node.size - 1) >> 32)
> return true;
>
> This way, User will not have to necessarily pass the 48B_ADDRESS flag
> also along with the OBJECT_PINNED flag, if the offset is > 4 GB.
> The OBJECT_PINNED flag will take precedence over 48B_ADDRESS flag.
>
> Best regards
> Akash
>
>
> > @@ -1328,7 +1334,8 @@ eb_get_batch(struct eb_vmas *eb)
> > * Note that actual hangs have only been observed on gen7, but for
> > * paranoia do it everywhere.
> > */
> > - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> > + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> > + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> >
> > return vma->obj;
> > }
> > diff --git a/drivers/gpu/drm/i915/i915_trace.h
> b/drivers/gpu/drm/i915/i915_trace.h
> > index b1dcee718640..ef2387c01fa9 100644
> > --- a/drivers/gpu/drm/i915/i915_trace.h
> > +++ b/drivers/gpu/drm/i915/i915_trace.h
> > @@ -459,6 +459,29 @@ TRACE_EVENT(i915_gem_evict_vm,
> > TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
> > );
> >
> > +TRACE_EVENT(i915_gem_evict_vma,
> > + TP_PROTO(struct i915_vma *vma, unsigned flags),
> > + TP_ARGS(vma, flags),
> > +
> > + TP_STRUCT__entry(
> > + __field(u32, dev)
> > + __field(struct i915_address_space *, vm)
> > + __field(u64, start)
> > + __field(u64, size)
> > + __field(unsigned, flags)
> > + ),
> > +
> > + TP_fast_assign(
> > + __entry->dev = vma->vm->dev->primary->index;
> > + __entry->vm = vma->vm;
> > + __entry->start = vma->node.start;
> > + __entry->size = vma->node.size;
> > + __entry->flags = flags;
> > + ),
> > +
> > + TP_printk("dev=%d, vm=%p, start=%llx size=%llx, flags=%x", __entry-
> >dev, __entry->vm, (long long)__entry->start, (long long)__entry->size, __entry-
> >flags)
> > +);
> > +
> > TRACE_EVENT(i915_gem_ring_sync_to,
> > TP_PROTO(struct drm_i915_gem_request *to_req,
> > struct intel_engine_cs *from,
> > diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> > index 5e2fc61e7d88..766aa4fb8264 100644
> > --- a/include/uapi/drm/i915_drm.h
> > +++ b/include/uapi/drm/i915_drm.h
> > @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
> > #define I915_PARAM_EU_TOTAL 34
> > #define I915_PARAM_HAS_GPU_RESET 35
> > #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> > +#define I915_PARAM_HAS_EXEC_SOFTPIN 37
> >
> > typedef struct drm_i915_getparam {
> > s32 param;
> > @@ -692,7 +693,8 @@ struct drm_i915_gem_exec_object2 {
> > #define EXEC_OBJECT_WRITE (1<<2)
> > #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> > #define EXEC_OBJECT_PAD_TO_SIZE (1<<4)
> > -#define __EXEC_OBJECT_UNKNOWN_FLAGS -
> (EXEC_OBJECT_PAD_TO_SIZE<<1)
> > +#define EXEC_OBJECT_PINNED (1<<5)
> > +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
> > __u64 flags;
> >
> > __u64 pad_to_size;
> > --
> > 2.6.0
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-27 11:51 ` akash goel
2015-11-05 10:57 ` Daniel, Thomas
@ 2015-12-02 13:28 ` Chris Wilson
1 sibling, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2015-12-02 13:28 UTC (permalink / raw)
To: akash goel; +Cc: intel-gfx, Goel, Akash
On Tue, Oct 27, 2015 at 05:21:37PM +0530, akash goel wrote:
> On Tue, Oct 6, 2015 at 4:23 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > index 19dd6b05ee1d..c35c9dc526e7 100644
> > --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> > @@ -603,6 +603,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> > flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> > if ((flags & PIN_MAPPABLE) == 0)
> > flags |= PIN_HIGH;
> > + if (entry->flags & EXEC_OBJECT_PINNED)
> > + flags |= entry->offset | PIN_OFFSET_FIXED;
> > }
> >
> > ret = i915_gem_object_pin(obj, vma->vm,
> > @@ -679,6 +681,10 @@ eb_vma_misplaced(struct i915_vma *vma)
> > if (vma->node.size < entry->pad_to_size)
> > return true;
> >
> > + if (entry->flags & EXEC_OBJECT_PINNED &&
> > + vma->node.start != entry->offset)
> > + return true;
> > +
> > if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> > vma->node.start < BATCH_OFFSET_BIAS)
> > return true;
>
>
> I think would be better to add the following change here.
>
> - if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
> + if (!(entry->flags &
> + (EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED)) &&
> (vma->node.start + vma->node.size - 1) >> 32)
> return true;
>
> This way, User will not have to necessarily pass the 48B_ADDRESS flag
> also along with the OBJECT_PINNED flag, if the offset is > 4 GB.
> The OBJECT_PINNED flag will take precedence over 48B_ADDRESS flag.
No, let's not start having flags mean multiple things if we can avoid
it as that makes the ABI harder to extend in future.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-10-06 10:53 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
2015-10-06 13:59 ` Daniel, Thomas
2015-10-27 11:51 ` akash goel
@ 2015-11-05 17:51 ` Kristian Høgsberg
2015-11-05 18:17 ` Jesse Barnes
2 siblings, 1 reply; 27+ messages in thread
From: Kristian Høgsberg @ 2015-11-05 17:51 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx@lists.freedesktop.org
On Tue, Oct 6, 2015 at 3:53 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> Userspace can pass in an offset that it presumes the object is located
> at. The kernel will then do its utmost to fit the object into that
> location. The assumption is that userspace is handling its own object
> locations (for example along with full-ppgtt) and that the kernel will
> rarely have to make space for the user's requests.
I know the commit message isn't documentation, but the phrase "do its
utmost" makes me uncomfortable. I'd like to be explicit about what
might make it fail (should only be pinned fbs in case of aliased ppgtt
or userspace errors such as overlapping placements), or conversely,
spell out when the flag can be expected to work (full ppgtt).
Kristian
> v2: Fix i915_gem_evict_range() (now evict_for_vma) to handle ordinary
> and fixed objects within the same batch
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: "Daniel, Thomas" <thomas.daniel@intel.com>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 3 ++
> drivers/gpu/drm/i915/i915_drv.h | 10 +++--
> drivers/gpu/drm/i915/i915_gem.c | 68 +++++++++++++++++++++---------
> drivers/gpu/drm/i915/i915_gem_evict.c | 61 +++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++-
> drivers/gpu/drm/i915/i915_trace.h | 23 ++++++++++
> include/uapi/drm/i915_drm.h | 4 +-
> 7 files changed, 151 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index ab37d1121be8..cd79ef114b8e 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -170,6 +170,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
> case I915_PARAM_HAS_RESOURCE_STREAMER:
> value = HAS_RESOURCE_STREAMER(dev);
> break;
> + case I915_PARAM_HAS_EXEC_SOFTPIN:
> + value = 1;
> + break;
> default:
> DRM_DEBUG("Unknown parameter %d\n", param->param);
> return -EINVAL;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index a0ce011a5dc0..7d351d991022 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2778,10 +2778,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
> #define PIN_NONBLOCK (1<<1)
> #define PIN_GLOBAL (1<<2)
> #define PIN_OFFSET_BIAS (1<<3)
> -#define PIN_USER (1<<4)
> -#define PIN_UPDATE (1<<5)
> -#define PIN_ZONE_4G (1<<6)
> -#define PIN_HIGH (1<<7)
> +#define PIN_OFFSET_FIXED (1<<4)
> +#define PIN_USER (1<<5)
> +#define PIN_UPDATE (1<<6)
> +#define PIN_ZONE_4G (1<<7)
> +#define PIN_HIGH (1<<8)
> #define PIN_OFFSET_MASK (~4095)
> int __must_check
> i915_gem_object_pin(struct drm_i915_gem_object *obj,
> @@ -3127,6 +3128,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
> unsigned long start,
> unsigned long end,
> unsigned flags);
> +int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags);
> int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
>
> /* belongs in i915_gem_gtt.h */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 8fe3df0cdcb8..82efd6a6dee0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3334,7 +3334,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> struct drm_device *dev = obj->base.dev;
> struct drm_i915_private *dev_priv = dev->dev_private;
> u64 start, end;
> - u32 search_flag, alloc_flag;
> struct i915_vma *vma;
> int ret;
>
> @@ -3409,30 +3408,53 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
> if (IS_ERR(vma))
> goto err_unpin;
>
> - if (flags & PIN_HIGH) {
> - search_flag = DRM_MM_SEARCH_BELOW;
> - alloc_flag = DRM_MM_CREATE_TOP;
> + if (flags & PIN_OFFSET_FIXED) {
> + uint64_t offset = flags & PIN_OFFSET_MASK;
> + if (offset & (alignment - 1) || offset + size > end) {
> + vma = ERR_PTR(-EINVAL);
> + goto err_free_vma;
> + }
> + vma->node.start = offset;
> + vma->node.size = size;
> + vma->node.color = obj->cache_level;
> + ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> + if (ret) {
> + ret = i915_gem_evict_for_vma(vma, flags);
> + if (ret == 0)
> + ret = drm_mm_reserve_node(&vm->mm, &vma->node);
> + }
> + if (ret) {
> + vma = ERR_PTR(ret);
> + goto err_free_vma;
> + }
> } else {
> - search_flag = DRM_MM_SEARCH_DEFAULT;
> - alloc_flag = DRM_MM_CREATE_DEFAULT;
> - }
> + u32 search_flag, alloc_flag;
> +
> + if (flags & PIN_HIGH) {
> + search_flag = DRM_MM_SEARCH_BELOW;
> + alloc_flag = DRM_MM_CREATE_TOP;
> + } else {
> + search_flag = DRM_MM_SEARCH_DEFAULT;
> + alloc_flag = DRM_MM_CREATE_DEFAULT;
> + }
>
> search_free:
> - ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> - size, alignment,
> - obj->cache_level,
> - start, end,
> - search_flag,
> - alloc_flag);
> - if (ret) {
> - ret = i915_gem_evict_something(dev, vm, size, alignment,
> - obj->cache_level,
> - start, end,
> - flags);
> - if (ret == 0)
> - goto search_free;
> + ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
> + size, alignment,
> + obj->cache_level,
> + start, end,
> + search_flag,
> + alloc_flag);
> + if (ret) {
> + ret = i915_gem_evict_something(dev, vm, size, alignment,
> + obj->cache_level,
> + start, end,
> + flags);
> + if (ret == 0)
> + goto search_free;
>
> - goto err_free_vma;
> + goto err_free_vma;
> + }
> }
> if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
> ret = -EINVAL;
> @@ -3970,6 +3992,10 @@ i915_vma_misplaced(struct i915_vma *vma,
> vma->node.start < (flags & PIN_OFFSET_MASK))
> return true;
>
> + if (flags & PIN_OFFSET_FIXED &&
> + vma->node.start != (flags & PIN_OFFSET_MASK))
> + return true;
> +
> return false;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
> index d71a133ceff5..60450a95a491 100644
> --- a/drivers/gpu/drm/i915/i915_gem_evict.c
> +++ b/drivers/gpu/drm/i915/i915_gem_evict.c
> @@ -199,6 +199,67 @@ found:
> return ret;
> }
>
> +int
> +i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
> +{
> + struct list_head eviction_list;
> + struct interval_tree_node *it;
> + u64 end = target->node.start + target->node.size;
> + struct drm_mm_node *node;
> + struct i915_vma *vma, *next;
> + int ret;
> +
> + trace_i915_gem_evict_vma(target, flags);
> +
> + it = interval_tree_iter_first(&target->vm->mm.interval_tree,
> + target->node.start, end -1);
> + if (it == NULL)
> + return 0;
> +
> + INIT_LIST_HEAD(&eviction_list);
> + node = container_of(it, typeof(*node), it);
> + list_for_each_entry_from(node,
> + &target->vm->mm.head_node.node_list,
> + node_list) {
> + if (node->start >= end)
> + break;
> +
> + vma = container_of(node, typeof(*vma), node);
> + if (flags & PIN_NONBLOCK &&
> + (vma->pin_count || vma->obj->active)) {
> + ret = -ENOSPC;
> + break;
> + }
> +
> + if (vma->exec_entry &&
> + vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
> + /* Overlapping pinned objects in the same batch */
> + ret = -EINVAL;
> + break;
> + }
> +
> + if (vma->pin_count) {
> + /* We may need to evict an buffer in the same batch */
> + ret = vma->exec_entry ? -ENOSPC : -EBUSY;
> + break;
> + }
> +
> + list_add(&vma->exec_list, &eviction_list);
> + drm_gem_object_reference(&vma->obj->base);
> + }
> +
> + ret = 0;
> + list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
> + struct drm_i915_gem_object *obj = vma->obj;
> + list_del_init(&vma->exec_list);
> + if (ret == 0)
> + ret = i915_vma_unbind(vma);
> + drm_gem_object_unreference(&obj->base);
> + }
> +
> + return ret;
> +}
> +
> /**
> * i915_gem_evict_vm - Evict all idle vmas from a vm
> * @vm: Address space to cleanse
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 19dd6b05ee1d..c35c9dc526e7 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -603,6 +603,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
> if ((flags & PIN_MAPPABLE) == 0)
> flags |= PIN_HIGH;
> + if (entry->flags & EXEC_OBJECT_PINNED)
> + flags |= entry->offset | PIN_OFFSET_FIXED;
> }
>
> ret = i915_gem_object_pin(obj, vma->vm,
> @@ -679,6 +681,10 @@ eb_vma_misplaced(struct i915_vma *vma)
> if (vma->node.size < entry->pad_to_size)
> return true;
>
> + if (entry->flags & EXEC_OBJECT_PINNED &&
> + vma->node.start != entry->offset)
> + return true;
> +
> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> vma->node.start < BATCH_OFFSET_BIAS)
> return true;
> @@ -1328,7 +1334,8 @@ eb_get_batch(struct eb_vmas *eb)
> * Note that actual hangs have only been observed on gen7, but for
> * paranoia do it everywhere.
> */
> - vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
> + if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
> + vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
>
> return vma->obj;
> }
> diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
> index b1dcee718640..ef2387c01fa9 100644
> --- a/drivers/gpu/drm/i915/i915_trace.h
> +++ b/drivers/gpu/drm/i915/i915_trace.h
> @@ -459,6 +459,29 @@ TRACE_EVENT(i915_gem_evict_vm,
> TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
> );
>
> +TRACE_EVENT(i915_gem_evict_vma,
> + TP_PROTO(struct i915_vma *vma, unsigned flags),
> + TP_ARGS(vma, flags),
> +
> + TP_STRUCT__entry(
> + __field(u32, dev)
> + __field(struct i915_address_space *, vm)
> + __field(u64, start)
> + __field(u64, size)
> + __field(unsigned, flags)
> + ),
> +
> + TP_fast_assign(
> + __entry->dev = vma->vm->dev->primary->index;
> + __entry->vm = vma->vm;
> + __entry->start = vma->node.start;
> + __entry->size = vma->node.size;
> + __entry->flags = flags;
> + ),
> +
> + TP_printk("dev=%d, vm=%p, start=%llx size=%llx, flags=%x", __entry->dev, __entry->vm, (long long)__entry->start, (long long)__entry->size, __entry->flags)
> +);
> +
> TRACE_EVENT(i915_gem_ring_sync_to,
> TP_PROTO(struct drm_i915_gem_request *to_req,
> struct intel_engine_cs *from,
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 5e2fc61e7d88..766aa4fb8264 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -356,6 +356,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_PARAM_EU_TOTAL 34
> #define I915_PARAM_HAS_GPU_RESET 35
> #define I915_PARAM_HAS_RESOURCE_STREAMER 36
> +#define I915_PARAM_HAS_EXEC_SOFTPIN 37
>
> typedef struct drm_i915_getparam {
> s32 param;
> @@ -692,7 +693,8 @@ struct drm_i915_gem_exec_object2 {
> #define EXEC_OBJECT_WRITE (1<<2)
> #define EXEC_OBJECT_SUPPORTS_48B_ADDRESS (1<<3)
> #define EXEC_OBJECT_PAD_TO_SIZE (1<<4)
> -#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
> +#define EXEC_OBJECT_PINNED (1<<5)
> +#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
> __u64 flags;
>
> __u64 pad_to_size;
> --
> 2.6.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-11-05 17:51 ` Kristian Høgsberg
@ 2015-11-05 18:17 ` Jesse Barnes
2015-11-06 13:38 ` Chris Wilson
0 siblings, 1 reply; 27+ messages in thread
From: Jesse Barnes @ 2015-11-05 18:17 UTC (permalink / raw)
To: Kristian Høgsberg, Chris Wilson; +Cc: intel-gfx@lists.freedesktop.org
On 11/05/2015 09:51 AM, Kristian Høgsberg wrote:
> On Tue, Oct 6, 2015 at 3:53 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> Userspace can pass in an offset that it presumes the object is located
>> at. The kernel will then do its utmost to fit the object into that
>> location. The assumption is that userspace is handling its own object
>> locations (for example along with full-ppgtt) and that the kernel will
>> rarely have to make space for the user's requests.
>
> I know the commit message isn't documentation, but the phrase "do its
> utmost" makes me uncomfortable. I'd like to be explicit about what
> might make it fail (should only be pinned fbs in case of aliased ppgtt
> or userspace errors such as overlapping placements), or conversely,
> spell out when the flag can be expected to work (full ppgtt).
Ooh yeah that would be good to add to the execbuf man page with the
softpin additions. Oh wait, we don't have a man page for execbuf?
Someone should write one!
Jesse
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-11-05 18:17 ` Jesse Barnes
@ 2015-11-06 13:38 ` Chris Wilson
2015-11-06 17:01 ` Jesse Barnes
2015-11-06 23:58 ` Kristian Høgsberg
0 siblings, 2 replies; 27+ messages in thread
From: Chris Wilson @ 2015-11-06 13:38 UTC (permalink / raw)
To: Jesse Barnes; +Cc: intel-gfx@lists.freedesktop.org
On Thu, Nov 05, 2015 at 10:17:56AM -0800, Jesse Barnes wrote:
> On 11/05/2015 09:51 AM, Kristian Høgsberg wrote:
> > On Tue, Oct 6, 2015 at 3:53 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> >> Userspace can pass in an offset that it presumes the object is located
> >> at. The kernel will then do its utmost to fit the object into that
> >> location. The assumption is that userspace is handling its own object
> >> locations (for example along with full-ppgtt) and that the kernel will
> >> rarely have to make space for the user's requests.
> >
> > I know the commit message isn't documentation, but the phrase "do its
> > utmost" makes me uncomfortable. I'd like to be explicit about what
> > might make it fail (should only be pinned fbs in case of aliased ppgtt
> > or userspace errors such as overlapping placements), or conversely,
> > spell out when the flag can be expected to work (full ppgtt).
>
> Ooh yeah that would be good to add to the execbuf man page with the
> softpin additions. Oh wait, we don't have a man page for execbuf?
> Someone should write one!
How about:
This extends the DRM_I915_GEM_EXECBUFFER2 ioctl to do the following:
* if the user supplies a virtual address via the execobject->offset
*and* sets the EXEC_OBJECT_PINNED flag in execobject->flags, then
that object is placed at that offset in the address space selected
by the context specifier in execbuffer.
* the location must be aligned to the GTT page size, 4096 bytes
* as the object is placed exactly as specified, it may be used in this batch
without relocations pointing to it
It may fail to do so if:
* EINVAL is returned if the object does not have a 4096 byte aligned
address
* the object conflicts with another pinned object (either pinned by
hardware in that address space, e.g. scanouts in the aliasing ppgtt)
or within the same batch.
EBUSY is returned if the location is pinned by hardware
EINVAL is returned if the location is already in use by the batch
* EINVAL is returned if the object conflicts with its own alignment (as meets
the hardware requirements) or if the placement of the object does not fit
within the address space
All other execbuffer errors apply.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-11-06 13:38 ` Chris Wilson
@ 2015-11-06 17:01 ` Jesse Barnes
2015-11-06 23:58 ` Kristian Høgsberg
1 sibling, 0 replies; 27+ messages in thread
From: Jesse Barnes @ 2015-11-06 17:01 UTC (permalink / raw)
To: Chris Wilson, Kristian Høgsberg,
intel-gfx@lists.freedesktop.org
On 11/06/2015 05:38 AM, Chris Wilson wrote:
> On Thu, Nov 05, 2015 at 10:17:56AM -0800, Jesse Barnes wrote:
>> On 11/05/2015 09:51 AM, Kristian Høgsberg wrote:
>>> On Tue, Oct 6, 2015 at 3:53 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>>>> Userspace can pass in an offset that it presumes the object is located
>>>> at. The kernel will then do its utmost to fit the object into that
>>>> location. The assumption is that userspace is handling its own object
>>>> locations (for example along with full-ppgtt) and that the kernel will
>>>> rarely have to make space for the user's requests.
>>>
>>> I know the commit message isn't documentation, but the phrase "do its
>>> utmost" makes me uncomfortable. I'd like to be explicit about what
>>> might make it fail (should only be pinned fbs in case of aliased ppgtt
>>> or userspace errors such as overlapping placements), or conversely,
>>> spell out when the flag can be expected to work (full ppgtt).
>>
>> Ooh yeah that would be good to add to the execbuf man page with the
>> softpin additions. Oh wait, we don't have a man page for execbuf?
>> Someone should write one!
>
> How about:
>
> This extends the DRM_I915_GEM_EXECBUFFER2 ioctl to do the following:
> * if the user supplies a virtual address via the execobject->offset
> *and* sets the EXEC_OBJECT_PINNED flag in execobject->flags, then
> that object is placed at that offset in the address space selected
> by the context specifier in execbuffer.
> * the location must be aligned to the GTT page size, 4096 bytes
> * as the object is placed exactly as specified, it may be used in this batch
> without relocations pointing to it
>
> It may fail to do so if:
> * EINVAL is returned if the object does not have a 4096 byte aligned
> address
> * the object conflicts with another pinned object (either pinned by
> hardware in that address space, e.g. scanouts in the aliasing ppgtt)
> or within the same batch.
> EBUSY is returned if the location is pinned by hardware
> EINVAL is returned if the location is already in use by the batch
> * EINVAL is returned if the object conflicts with its own alignment (as meets
> the hardware requirements) or if the placement of the object does not fit
> within the address space
>
> All other execbuffer errors apply.
Looks great, now we just need an existing man page in which to integrate
this additional text.
Jesse
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2015-11-06 13:38 ` Chris Wilson
2015-11-06 17:01 ` Jesse Barnes
@ 2015-11-06 23:58 ` Kristian Høgsberg
1 sibling, 0 replies; 27+ messages in thread
From: Kristian Høgsberg @ 2015-11-06 23:58 UTC (permalink / raw)
To: Chris Wilson, Jesse Barnes, Kristian Høgsberg,
intel-gfx@lists.freedesktop.org
On Fri, Nov 6, 2015 at 5:38 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Thu, Nov 05, 2015 at 10:17:56AM -0800, Jesse Barnes wrote:
>> On 11/05/2015 09:51 AM, Kristian Høgsberg wrote:
>> > On Tue, Oct 6, 2015 at 3:53 AM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
>> >> Userspace can pass in an offset that it presumes the object is located
>> >> at. The kernel will then do its utmost to fit the object into that
>> >> location. The assumption is that userspace is handling its own object
>> >> locations (for example along with full-ppgtt) and that the kernel will
>> >> rarely have to make space for the user's requests.
>> >
>> > I know the commit message isn't documentation, but the phrase "do its
>> > utmost" makes me uncomfortable. I'd like to be explicit about what
>> > might make it fail (should only be pinned fbs in case of aliased ppgtt
>> > or userspace errors such as overlapping placements), or conversely,
>> > spell out when the flag can be expected to work (full ppgtt).
>>
>> Ooh yeah that would be good to add to the execbuf man page with the
>> softpin additions. Oh wait, we don't have a man page for execbuf?
>> Someone should write one!
>
> How about:
>
> This extends the DRM_I915_GEM_EXECBUFFER2 ioctl to do the following:
> * if the user supplies a virtual address via the execobject->offset
> *and* sets the EXEC_OBJECT_PINNED flag in execobject->flags, then
> that object is placed at that offset in the address space selected
> by the context specifier in execbuffer.
> * the location must be aligned to the GTT page size, 4096 bytes
> * as the object is placed exactly as specified, it may be used in this batch
> without relocations pointing to it
>
> It may fail to do so if:
> * EINVAL is returned if the object does not have a 4096 byte aligned
> address
> * the object conflicts with another pinned object (either pinned by
> hardware in that address space, e.g. scanouts in the aliasing ppgtt)
> or within the same batch.
> EBUSY is returned if the location is pinned by hardware
> EINVAL is returned if the location is already in use by the batch
> * EINVAL is returned if the object conflicts with its own alignment (as meets
> the hardware requirements) or if the placement of the object does not fit
> within the address space
>
> All other execbuffer errors apply.
Yes, that looks better.
Thanks,
Kristian
> -Chris
>
> --
> Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-06 10:53 [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Chris Wilson
2015-10-06 10:53 ` [PATCH 2/3] drm/i915: Allow the user to pass a context to any ring Chris Wilson
2015-10-06 10:53 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
@ 2015-10-06 11:11 ` Daniel Vetter
2015-10-06 11:19 ` Chris Wilson
2015-10-21 15:11 ` Daniel Vetter
3 siblings, 1 reply; 27+ messages in thread
From: Daniel Vetter @ 2015-10-06 11:11 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, dri-devel
On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> In addition to the last-in/first-out stack for accessing drm_mm nodes,
> we occasionally and in the future often want to find a drm_mm_node by an
> address. To do so efficiently we need to track the nodes in an interval
> tree - lookups for a particular address will then be O(lg(N)), where N
> is the number of nodes in the range manager as opposed to O(N).
> Insertion however gains an extra O(lg(N)) step for all nodes
> irrespective of whether the interval tree is in use. For future i915
> patches, eliminating the linear walk is a significant improvement.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: dri-devel@lists.freedesktop.org
For the vma manager David Herrman put the interval tree outside of drm_mm.
Whichever way we pick, but I think we should be consistent about this.
-Daniel
> ---
> drivers/gpu/drm/Kconfig | 1 +
> drivers/gpu/drm/drm_mm.c | 71 ++++++++++++++++++++++++++++++++----------------
> include/drm/drm_mm.h | 5 ++++
> 3 files changed, 54 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 06ae5008c5ed..e25050a5a843 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -12,6 +12,7 @@ menuconfig DRM
> select I2C
> select I2C_ALGOBIT
> select DMA_SHARED_BUFFER
> + select INTERVAL_TREE
> help
> Kernel-level support for the Direct Rendering Infrastructure (DRI)
> introduced in XFree86 4.0. If you say Y here, you need to select
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index 04de6fd88f8c..e3acd860f738 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -153,6 +153,10 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
> INIT_LIST_HEAD(&node->hole_stack);
> list_add(&node->node_list, &hole_node->node_list);
>
> + node->it.start = node->start;
> + node->it.last = node->start + size - 1;
> + interval_tree_insert(&node->it, &mm->interval_tree);
> +
> BUG_ON(node->start + node->size > adj_end);
>
> node->hole_follows = 0;
> @@ -178,39 +182,53 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
> */
> int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
> {
> - struct drm_mm_node *hole;
> u64 end = node->start + node->size;
> - u64 hole_start;
> - u64 hole_end;
> -
> - BUG_ON(node == NULL);
> + struct interval_tree_node *it;
> + struct drm_mm_node *hole;
> + u64 hole_start, hole_end;
>
> /* Find the relevant hole to add our node to */
> - drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
> - if (hole_start > node->start || hole_end < end)
> - continue;
> + it = interval_tree_iter_first(&mm->interval_tree,
> + node->start, (u64)-1);
> + if (it == NULL) {
> + hole = list_last_entry(&mm->head_node.node_list,
> + struct drm_mm_node, node_list);
> + } else {
> + hole = container_of(it, typeof(*hole), it);
> + if (hole->start <= node->start)
> + return -ENOSPC;
> +
> + hole = list_last_entry(&hole->node_list,
> + struct drm_mm_node, node_list);
> + }
>
> - node->mm = mm;
> - node->allocated = 1;
> + hole_start = drm_mm_hole_node_start(hole);
> + hole_end = drm_mm_hole_node_end(hole);
> + if (hole_start > node->start || hole_end < end)
> + return -ENOSPC;
>
> - INIT_LIST_HEAD(&node->hole_stack);
> - list_add(&node->node_list, &hole->node_list);
> + node->mm = mm;
> + node->allocated = 1;
>
> - if (node->start == hole_start) {
> - hole->hole_follows = 0;
> - list_del_init(&hole->hole_stack);
> - }
> + INIT_LIST_HEAD(&node->hole_stack);
> + list_add(&node->node_list, &hole->node_list);
>
> - node->hole_follows = 0;
> - if (end != hole_end) {
> - list_add(&node->hole_stack, &mm->hole_stack);
> - node->hole_follows = 1;
> - }
> + node->it.start = node->start;
> + node->it.last = node->start + node->size - 1;
> + interval_tree_insert(&node->it, &mm->interval_tree);
>
> - return 0;
> + if (node->start == hole_start) {
> + hole->hole_follows = 0;
> + list_del_init(&hole->hole_stack);
> }
>
> - return -ENOSPC;
> + node->hole_follows = 0;
> + if (end != hole_end) {
> + list_add(&node->hole_stack, &mm->hole_stack);
> + node->hole_follows = 1;
> + }
> +
> + return 0;
> }
> EXPORT_SYMBOL(drm_mm_reserve_node);
>
> @@ -300,6 +318,10 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
> INIT_LIST_HEAD(&node->hole_stack);
> list_add(&node->node_list, &hole_node->node_list);
>
> + node->it.start = node->start;
> + node->it.last = node->start + node->size - 1;
> + interval_tree_insert(&node->it, &mm->interval_tree);
> +
> BUG_ON(node->start < start);
> BUG_ON(node->start < adj_start);
> BUG_ON(node->start + node->size > adj_end);
> @@ -388,6 +410,7 @@ void drm_mm_remove_node(struct drm_mm_node *node)
> } else
> list_move(&prev_node->hole_stack, &mm->hole_stack);
>
> + interval_tree_remove(&node->it, &mm->interval_tree);
> list_del(&node->node_list);
> node->allocated = 0;
> }
> @@ -756,6 +779,8 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
> mm->head_node.size = start - mm->head_node.start;
> list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
>
> + mm->interval_tree = RB_ROOT;
> +
> mm->color_adjust = NULL;
> }
> EXPORT_SYMBOL(drm_mm_init);
> diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
> index 0de6290df4da..6d531fe529bf 100644
> --- a/include/drm/drm_mm.h
> +++ b/include/drm/drm_mm.h
> @@ -37,6 +37,7 @@
> * Generic range manager structs
> */
> #include <linux/bug.h>
> +#include <linux/interval_tree.h>
> #include <linux/kernel.h>
> #include <linux/list.h>
> #include <linux/spinlock.h>
> @@ -61,6 +62,7 @@ enum drm_mm_allocator_flags {
> struct drm_mm_node {
> struct list_head node_list;
> struct list_head hole_stack;
> + struct interval_tree_node it;
> unsigned hole_follows : 1;
> unsigned scanned_block : 1;
> unsigned scanned_prev_free : 1;
> @@ -79,6 +81,9 @@ struct drm_mm {
> /* head_node.node_list is the list of all memory nodes, ordered
> * according to the (increasing) start address of the memory node. */
> struct drm_mm_node head_node;
> + /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */
> + struct rb_root interval_tree;
> +
> unsigned int scan_check_range : 1;
> unsigned scan_alignment;
> unsigned long scan_color;
> --
> 2.6.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-06 11:11 ` [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Daniel Vetter
@ 2015-10-06 11:19 ` Chris Wilson
2015-10-06 12:01 ` Daniel Vetter
2015-10-07 10:22 ` David Herrmann
0 siblings, 2 replies; 27+ messages in thread
From: Chris Wilson @ 2015-10-06 11:19 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx, dri-devel
On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> > In addition to the last-in/first-out stack for accessing drm_mm nodes,
> > we occasionally and in the future often want to find a drm_mm_node by an
> > address. To do so efficiently we need to track the nodes in an interval
> > tree - lookups for a particular address will then be O(lg(N)), where N
> > is the number of nodes in the range manager as opposed to O(N).
> > Insertion however gains an extra O(lg(N)) step for all nodes
> > irrespective of whether the interval tree is in use. For future i915
> > patches, eliminating the linear walk is a significant improvement.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: dri-devel@lists.freedesktop.org
>
> For the vma manager David Herrman put the interval tree outside of drm_mm.
> Whichever way we pick, but I think we should be consistent about this.
Given that the basis of this patch is that functionality exposed by
drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
there is a measurable perf degradation if we switch over from the mru
stack to using fixed addresses) it makes sense to improve that
functionality. The question is then why the drm_vma_manager didn't use
and improve the existing functionality...
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-06 11:19 ` Chris Wilson
@ 2015-10-06 12:01 ` Daniel Vetter
2015-10-07 10:22 ` David Herrmann
1 sibling, 0 replies; 27+ messages in thread
From: Daniel Vetter @ 2015-10-06 12:01 UTC (permalink / raw)
To: Chris Wilson, Daniel Vetter, intel-gfx, dri-devel
On Tue, Oct 06, 2015 at 12:19:43PM +0100, Chris Wilson wrote:
> On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
> > On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> > > In addition to the last-in/first-out stack for accessing drm_mm nodes,
> > > we occasionally and in the future often want to find a drm_mm_node by an
> > > address. To do so efficiently we need to track the nodes in an interval
> > > tree - lookups for a particular address will then be O(lg(N)), where N
> > > is the number of nodes in the range manager as opposed to O(N).
> > > Insertion however gains an extra O(lg(N)) step for all nodes
> > > irrespective of whether the interval tree is in use. For future i915
> > > patches, eliminating the linear walk is a significant improvement.
> > >
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > Cc: dri-devel@lists.freedesktop.org
> >
> > For the vma manager David Herrman put the interval tree outside of drm_mm.
> > Whichever way we pick, but I think we should be consistent about this.
>
> Given that the basis of this patch is that functionality exposed by
> drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
> there is a measurable perf degradation if we switch over from the mru
> stack to using fixed addresses) it makes sense to improve that
> functionality. The question is then why the drm_vma_manager didn't use
> and improve the existing functionality...
Yeah I'm trying to volunteer you to add a lookup-function and rework the
vma-manager ;-)
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-06 11:19 ` Chris Wilson
2015-10-06 12:01 ` Daniel Vetter
@ 2015-10-07 10:22 ` David Herrmann
2015-10-16 8:54 ` Daniel, Thomas
1 sibling, 1 reply; 27+ messages in thread
From: David Herrmann @ 2015-10-07 10:22 UTC (permalink / raw)
To: Chris Wilson, Daniel Vetter, Intel Graphics Development,
dri-devel@lists.freedesktop.org
Hi
On Tue, Oct 6, 2015 at 1:19 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
>> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
>> > In addition to the last-in/first-out stack for accessing drm_mm nodes,
>> > we occasionally and in the future often want to find a drm_mm_node by an
>> > address. To do so efficiently we need to track the nodes in an interval
>> > tree - lookups for a particular address will then be O(lg(N)), where N
>> > is the number of nodes in the range manager as opposed to O(N).
>> > Insertion however gains an extra O(lg(N)) step for all nodes
>> > irrespective of whether the interval tree is in use. For future i915
>> > patches, eliminating the linear walk is a significant improvement.
>> >
>> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> > Cc: dri-devel@lists.freedesktop.org
>>
>> For the vma manager David Herrman put the interval tree outside of drm_mm.
>> Whichever way we pick, but I think we should be consistent about this.
>
> Given that the basis of this patch is that functionality exposed by
> drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
> there is a measurable perf degradation if we switch over from the mru
> stack to using fixed addresses) it makes sense to improve that
> functionality. The question is then why the drm_vma_manager didn't use
> and improve the existing functionality...
I didn't want to slow down drm_mm operations, so I kept it separate. I
don't mind if it is merged into drm_mm. It'd be trivial to make the
vma-manager use it (only on the top-level, though).
Thanks
David
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-07 10:22 ` David Herrmann
@ 2015-10-16 8:54 ` Daniel, Thomas
2015-10-16 14:26 ` [Intel-gfx] " Daniel Vetter
0 siblings, 1 reply; 27+ messages in thread
From: Daniel, Thomas @ 2015-10-16 8:54 UTC (permalink / raw)
To: David Herrmann, Chris Wilson, Daniel Vetter,
Intel Graphics Development, dri-devel@lists.freedesktop.org
Hi,
> -----Original Message-----
> From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of
> David Herrmann
> Sent: Wednesday, October 7, 2015 11:23 AM
> To: Chris Wilson; Daniel Vetter; Intel Graphics Development; dri-
> devel@lists.freedesktop.org
> Subject: Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval
> tree
>
> Hi
>
> On Tue, Oct 6, 2015 at 1:19 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
> >> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> >> > In addition to the last-in/first-out stack for accessing drm_mm nodes,
> >> > we occasionally and in the future often want to find a drm_mm_node by an
> >> > address. To do so efficiently we need to track the nodes in an interval
> >> > tree - lookups for a particular address will then be O(lg(N)), where N
> >> > is the number of nodes in the range manager as opposed to O(N).
> >> > Insertion however gains an extra O(lg(N)) step for all nodes
> >> > irrespective of whether the interval tree is in use. For future i915
> >> > patches, eliminating the linear walk is a significant improvement.
> >> >
> >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> >> > Cc: dri-devel@lists.freedesktop.org
> >>
> >> For the vma manager David Herrman put the interval tree outside of
> drm_mm.
> >> Whichever way we pick, but I think we should be consistent about this.
> >
> > Given that the basis of this patch is that functionality exposed by
> > drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
> > there is a measurable perf degradation if we switch over from the mru
> > stack to using fixed addresses) it makes sense to improve that
> > functionality. The question is then why the drm_vma_manager didn't use
> > and improve the existing functionality...
>
> I didn't want to slow down drm_mm operations, so I kept it separate. I
> don't mind if it is merged into drm_mm. It'd be trivial to make the
> vma-manager use it (only on the top-level, though).
>
> Thanks
> David
Is there a conclusion to this discussion? I'm under increasing pressure to get the i915 soft-pinning merged and Chris's latest version depends on this interval tree.
I've been told to post a new rebase of the version which doesn't use the interval tree if not.
Cheers,
Thomas.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-16 8:54 ` Daniel, Thomas
@ 2015-10-16 14:26 ` Daniel Vetter
0 siblings, 0 replies; 27+ messages in thread
From: Daniel Vetter @ 2015-10-16 14:26 UTC (permalink / raw)
To: Daniel, Thomas
Cc: Intel Graphics Development, dri-devel@lists.freedesktop.org
On Fri, Oct 16, 2015 at 08:54:20AM +0000, Daniel, Thomas wrote:
> Hi,
>
> > -----Original Message-----
> > From: Intel-gfx [mailto:intel-gfx-bounces@lists.freedesktop.org] On Behalf Of
> > David Herrmann
> > Sent: Wednesday, October 7, 2015 11:23 AM
> > To: Chris Wilson; Daniel Vetter; Intel Graphics Development; dri-
> > devel@lists.freedesktop.org
> > Subject: Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval
> > tree
> >
> > Hi
> >
> > On Tue, Oct 6, 2015 at 1:19 PM, Chris Wilson <chris@chris-wilson.co.uk> wrote:
> > > On Tue, Oct 06, 2015 at 01:11:56PM +0200, Daniel Vetter wrote:
> > >> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> > >> > In addition to the last-in/first-out stack for accessing drm_mm nodes,
> > >> > we occasionally and in the future often want to find a drm_mm_node by an
> > >> > address. To do so efficiently we need to track the nodes in an interval
> > >> > tree - lookups for a particular address will then be O(lg(N)), where N
> > >> > is the number of nodes in the range manager as opposed to O(N).
> > >> > Insertion however gains an extra O(lg(N)) step for all nodes
> > >> > irrespective of whether the interval tree is in use. For future i915
> > >> > patches, eliminating the linear walk is a significant improvement.
> > >> >
> > >> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > >> > Cc: dri-devel@lists.freedesktop.org
> > >>
> > >> For the vma manager David Herrman put the interval tree outside of
> > drm_mm.
> > >> Whichever way we pick, but I think we should be consistent about this.
> > >
> > > Given that the basis of this patch is that functionality exposed by
> > > drm_mm (i.e. drm_mm_reserve_node) is too slow for our use case (i.e.
> > > there is a measurable perf degradation if we switch over from the mru
> > > stack to using fixed addresses) it makes sense to improve that
> > > functionality. The question is then why the drm_vma_manager didn't use
> > > and improve the existing functionality...
> >
> > I didn't want to slow down drm_mm operations, so I kept it separate. I
> > don't mind if it is merged into drm_mm. It'd be trivial to make the
> > vma-manager use it (only on the top-level, though).
> >
> > Thanks
> > David
>
> Is there a conclusion to this discussion? I'm under increasing pressure to get the i915 soft-pinning merged and Chris's latest version depends on this interval tree.
>
> I've been told to post a new rebase of the version which doesn't use the interval tree if not.
In my opiniong pushing the interval tree into drm_mm.c makes tons of
sense, so that we don't have to duplicate this between i915 and the vma
manager.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-06 10:53 [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Chris Wilson
` (2 preceding siblings ...)
2015-10-06 11:11 ` [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Daniel Vetter
@ 2015-10-21 15:11 ` Daniel Vetter
2015-10-21 15:14 ` David Herrmann
3 siblings, 1 reply; 27+ messages in thread
From: Daniel Vetter @ 2015-10-21 15:11 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, dri-devel
On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
> In addition to the last-in/first-out stack for accessing drm_mm nodes,
> we occasionally and in the future often want to find a drm_mm_node by an
> address. To do so efficiently we need to track the nodes in an interval
> tree - lookups for a particular address will then be O(lg(N)), where N
> is the number of nodes in the range manager as opposed to O(N).
> Insertion however gains an extra O(lg(N)) step for all nodes
> irrespective of whether the interval tree is in use. For future i915
> patches, eliminating the linear walk is a significant improvement.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: dri-devel@lists.freedesktop.org
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
I guess for simpler merge ordering we can just pull this into drm-intel
and patch up the vma manager (just need to drop a lot of code and adjust
the search to use the drm_mm internal_tree nodes) later on.
-Daniel
> ---
> drivers/gpu/drm/Kconfig | 1 +
> drivers/gpu/drm/drm_mm.c | 71 ++++++++++++++++++++++++++++++++----------------
> include/drm/drm_mm.h | 5 ++++
> 3 files changed, 54 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
> index 06ae5008c5ed..e25050a5a843 100644
> --- a/drivers/gpu/drm/Kconfig
> +++ b/drivers/gpu/drm/Kconfig
> @@ -12,6 +12,7 @@ menuconfig DRM
> select I2C
> select I2C_ALGOBIT
> select DMA_SHARED_BUFFER
> + select INTERVAL_TREE
> help
> Kernel-level support for the Direct Rendering Infrastructure (DRI)
> introduced in XFree86 4.0. If you say Y here, you need to select
> diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
> index 04de6fd88f8c..e3acd860f738 100644
> --- a/drivers/gpu/drm/drm_mm.c
> +++ b/drivers/gpu/drm/drm_mm.c
> @@ -153,6 +153,10 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
> INIT_LIST_HEAD(&node->hole_stack);
> list_add(&node->node_list, &hole_node->node_list);
>
> + node->it.start = node->start;
> + node->it.last = node->start + size - 1;
> + interval_tree_insert(&node->it, &mm->interval_tree);
> +
> BUG_ON(node->start + node->size > adj_end);
>
> node->hole_follows = 0;
> @@ -178,39 +182,53 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
> */
> int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
> {
> - struct drm_mm_node *hole;
> u64 end = node->start + node->size;
> - u64 hole_start;
> - u64 hole_end;
> -
> - BUG_ON(node == NULL);
> + struct interval_tree_node *it;
> + struct drm_mm_node *hole;
> + u64 hole_start, hole_end;
>
> /* Find the relevant hole to add our node to */
> - drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
> - if (hole_start > node->start || hole_end < end)
> - continue;
> + it = interval_tree_iter_first(&mm->interval_tree,
> + node->start, (u64)-1);
> + if (it == NULL) {
> + hole = list_last_entry(&mm->head_node.node_list,
> + struct drm_mm_node, node_list);
> + } else {
> + hole = container_of(it, typeof(*hole), it);
> + if (hole->start <= node->start)
> + return -ENOSPC;
> +
> + hole = list_last_entry(&hole->node_list,
> + struct drm_mm_node, node_list);
> + }
>
> - node->mm = mm;
> - node->allocated = 1;
> + hole_start = drm_mm_hole_node_start(hole);
> + hole_end = drm_mm_hole_node_end(hole);
> + if (hole_start > node->start || hole_end < end)
> + return -ENOSPC;
>
> - INIT_LIST_HEAD(&node->hole_stack);
> - list_add(&node->node_list, &hole->node_list);
> + node->mm = mm;
> + node->allocated = 1;
>
> - if (node->start == hole_start) {
> - hole->hole_follows = 0;
> - list_del_init(&hole->hole_stack);
> - }
> + INIT_LIST_HEAD(&node->hole_stack);
> + list_add(&node->node_list, &hole->node_list);
>
> - node->hole_follows = 0;
> - if (end != hole_end) {
> - list_add(&node->hole_stack, &mm->hole_stack);
> - node->hole_follows = 1;
> - }
> + node->it.start = node->start;
> + node->it.last = node->start + node->size - 1;
> + interval_tree_insert(&node->it, &mm->interval_tree);
>
> - return 0;
> + if (node->start == hole_start) {
> + hole->hole_follows = 0;
> + list_del_init(&hole->hole_stack);
> }
>
> - return -ENOSPC;
> + node->hole_follows = 0;
> + if (end != hole_end) {
> + list_add(&node->hole_stack, &mm->hole_stack);
> + node->hole_follows = 1;
> + }
> +
> + return 0;
> }
> EXPORT_SYMBOL(drm_mm_reserve_node);
>
> @@ -300,6 +318,10 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
> INIT_LIST_HEAD(&node->hole_stack);
> list_add(&node->node_list, &hole_node->node_list);
>
> + node->it.start = node->start;
> + node->it.last = node->start + node->size - 1;
> + interval_tree_insert(&node->it, &mm->interval_tree);
> +
> BUG_ON(node->start < start);
> BUG_ON(node->start < adj_start);
> BUG_ON(node->start + node->size > adj_end);
> @@ -388,6 +410,7 @@ void drm_mm_remove_node(struct drm_mm_node *node)
> } else
> list_move(&prev_node->hole_stack, &mm->hole_stack);
>
> + interval_tree_remove(&node->it, &mm->interval_tree);
> list_del(&node->node_list);
> node->allocated = 0;
> }
> @@ -756,6 +779,8 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
> mm->head_node.size = start - mm->head_node.start;
> list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
>
> + mm->interval_tree = RB_ROOT;
> +
> mm->color_adjust = NULL;
> }
> EXPORT_SYMBOL(drm_mm_init);
> diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
> index 0de6290df4da..6d531fe529bf 100644
> --- a/include/drm/drm_mm.h
> +++ b/include/drm/drm_mm.h
> @@ -37,6 +37,7 @@
> * Generic range manager structs
> */
> #include <linux/bug.h>
> +#include <linux/interval_tree.h>
> #include <linux/kernel.h>
> #include <linux/list.h>
> #include <linux/spinlock.h>
> @@ -61,6 +62,7 @@ enum drm_mm_allocator_flags {
> struct drm_mm_node {
> struct list_head node_list;
> struct list_head hole_stack;
> + struct interval_tree_node it;
> unsigned hole_follows : 1;
> unsigned scanned_block : 1;
> unsigned scanned_prev_free : 1;
> @@ -79,6 +81,9 @@ struct drm_mm {
> /* head_node.node_list is the list of all memory nodes, ordered
> * according to the (increasing) start address of the memory node. */
> struct drm_mm_node head_node;
> + /* Keep an interval_tree for fast lookup of drm_mm_nodes by address. */
> + struct rb_root interval_tree;
> +
> unsigned int scan_check_range : 1;
> unsigned scan_alignment;
> unsigned long scan_color;
> --
> 2.6.0
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread* Re: [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-21 15:11 ` Daniel Vetter
@ 2015-10-21 15:14 ` David Herrmann
2015-10-22 8:07 ` [Intel-gfx] " Daniel Vetter
0 siblings, 1 reply; 27+ messages in thread
From: David Herrmann @ 2015-10-21 15:14 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Intel Graphics Development, dri-devel@lists.freedesktop.org
Hi
On Wed, Oct 21, 2015 at 5:11 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
>> In addition to the last-in/first-out stack for accessing drm_mm nodes,
>> we occasionally and in the future often want to find a drm_mm_node by an
>> address. To do so efficiently we need to track the nodes in an interval
>> tree - lookups for a particular address will then be O(lg(N)), where N
>> is the number of nodes in the range manager as opposed to O(N).
>> Insertion however gains an extra O(lg(N)) step for all nodes
>> irrespective of whether the interval tree is in use. For future i915
>> patches, eliminating the linear walk is a significant improvement.
>>
>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>> Cc: dri-devel@lists.freedesktop.org
>
> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> I guess for simpler merge ordering we can just pull this into drm-intel
> and patch up the vma manager (just need to drop a lot of code and adjust
> the search to use the drm_mm internal_tree nodes) later on.
Agreed.
Acked-by: David Herrmann <dh.herrmann@gmail.com>
Thanks
David
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 27+ messages in thread
* Re: [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree
2015-10-21 15:14 ` David Herrmann
@ 2015-10-22 8:07 ` Daniel Vetter
0 siblings, 0 replies; 27+ messages in thread
From: Daniel Vetter @ 2015-10-22 8:07 UTC (permalink / raw)
To: David Herrmann
Cc: Intel Graphics Development, dri-devel@lists.freedesktop.org
On Wed, Oct 21, 2015 at 5:14 PM, David Herrmann <dh.herrmann@gmail.com> wrote:
> On Wed, Oct 21, 2015 at 5:11 PM, Daniel Vetter <daniel@ffwll.ch> wrote:
>> On Tue, Oct 06, 2015 at 11:53:09AM +0100, Chris Wilson wrote:
>>> In addition to the last-in/first-out stack for accessing drm_mm nodes,
>>> we occasionally and in the future often want to find a drm_mm_node by an
>>> address. To do so efficiently we need to track the nodes in an interval
>>> tree - lookups for a particular address will then be O(lg(N)), where N
>>> is the number of nodes in the range manager as opposed to O(N).
>>> Insertion however gains an extra O(lg(N)) step for all nodes
>>> irrespective of whether the interval tree is in use. For future i915
>>> patches, eliminating the linear walk is a significant improvement.
>>>
>>> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
>>> Cc: dri-devel@lists.freedesktop.org
>>
>> Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
>>
>> I guess for simpler merge ordering we can just pull this into drm-intel
>> and patch up the vma manager (just need to drop a lot of code and adjust
>> the search to use the drm_mm internal_tree nodes) later on.
>
> Agreed.
>
> Acked-by: David Herrmann <dh.herrmann@gmail.com>
Also has Dave's irc-ack for pushing through drm-intel.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
_______________________________________________
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel
^ permalink raw reply [flat|nested] 27+ messages in thread
* [PATCH 1/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
@ 2014-05-15 15:55 Chris Wilson
2014-05-15 15:55 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
0 siblings, 1 reply; 27+ messages in thread
From: Chris Wilson @ 2014-05-15 15:55 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky
With ppgtt, it is no longer correct to mark an object as
map_and_fenceable if we simply unbind it from the global gtt. This has
consequences during execbuffer where we simply use
obj->map_and_fenceable in use_cpu_reloc() to decide which access method
to use for writing into the object. Now for a ppgtt object,
map_and_fenceable will be true when it is not bound into the ggtt but
only in a ppgtt, leading to an invalid access on a non-llc architecture.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/i915/i915_gem.c | 8 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 146 +++++++++++++++--------------
2 files changed, 79 insertions(+), 75 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f8b2c16745f8..844ea6048321 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2807,9 +2807,8 @@ int i915_vma_unbind(struct i915_vma *vma)
i915_gem_gtt_finish_object(obj);
list_del_init(&vma->mm_list);
- /* Avoid an unnecessary call to unbind on rebind. */
if (i915_is_ggtt(vma->vm))
- obj->map_and_fenceable = true;
+ obj->map_and_fenceable = false;
drm_mm_remove_node(&vma->node);
i915_gem_vma_destroy(vma);
@@ -3159,6 +3158,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
return 0;
}
} else if (enable) {
+ if (WARN_ON(!obj->map_and_fenceable))
+ return -EINVAL;
+
reg = i915_find_fence_reg(dev);
if (IS_ERR(reg))
return PTR_ERR(reg);
@@ -4170,8 +4172,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->fence_reg = I915_FENCE_REG_NONE;
obj->madv = I915_MADV_WILLNEED;
- /* Avoid an unnecessary call to unbind on the first bind. */
- obj->map_and_fenceable = true;
i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c7ee1e3013ac..b4d8c31c0919 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
struct eb_vmas {
struct list_head vmas;
@@ -532,14 +533,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
}
static int
-need_reloc_mappable(struct i915_vma *vma)
-{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
- i915_is_ggtt(vma->vm);
-}
-
-static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_ring_buffer *ring,
bool *need_reloc)
@@ -547,19 +540,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
- bool need_fence;
unsigned flags;
int ret;
flags = 0;
-
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- if (need_fence || need_reloc_mappable(vma))
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_MAPPABLE;
-
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
@@ -595,6 +581,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
return 0;
}
+static bool
+need_reloc_mappable(struct i915_vma *vma)
+{
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+
+ if (entry->relocation_count == 0)
+ return false;
+
+ if (!i915_is_ggtt(vma->vm))
+ return false;
+
+ /* See also use_cpu_reloc() */
+ if (HAS_LLC(vma->obj->base.dev))
+ return false;
+
+ if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return false;
+
+ return true;
+}
+
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct list_head *vmas,
@@ -627,9 +634,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
- if (need_mappable)
+ if (need_mappable) {
+ entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
- else
+ } else
list_move_tail(&vma->exec_list, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -657,25 +665,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
/* Unbind any ill-fitting objects or pin. */
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- bool need_fence, need_mappable;
obj = vma->obj;
if (!drm_mm_node_allocated(&vma->node))
continue;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
-
- WARN_ON((need_mappable || need_fence) &&
- !i915_is_ggtt(vma->vm));
-
- if ((entry->alignment &&
- vma->node.start & (entry->alignment - 1)) ||
- (need_mappable && !obj->map_and_fenceable))
+ if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
+ (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -776,9 +773,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
* relocations were valid.
*/
for (j = 0; j < exec[i].relocation_count; j++) {
- if (copy_to_user(&user_relocs[j].presumed_offset,
- &invalid_offset,
- sizeof(invalid_offset))) {
+ if (__copy_to_user(&user_relocs[j].presumed_offset,
+ &invalid_offset,
+ sizeof(invalid_offset))) {
ret = -EFAULT;
mutex_lock(&dev->struct_mutex);
goto err;
@@ -1268,33 +1265,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
- if (flags & I915_DISPATCH_SECURE &&
- !batch_obj->has_global_gtt_mapping) {
- /* When we have multiple VMs, we'll need to make sure that we
- * allocate space first */
- struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
- BUG_ON(!vma);
- vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
- }
+ if (flags & I915_DISPATCH_SECURE) {
+ ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ if (ret)
+ goto err;
- if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
- else
+ } else
exec_start += i915_gem_obj_offset(batch_obj, vm);
ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
if (ret)
- goto err;
+ goto err_batch;
ret = i915_switch_context(ring, ctx);
if (ret)
- goto err;
+ goto err_batch;
if (ring == &dev_priv->ring[RCS] &&
mode != dev_priv->relative_constants_mode) {
ret = intel_ring_begin(ring, 4);
if (ret)
- goto err;
+ goto err_batch;
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
@@ -1308,30 +1300,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
ret = i915_reset_gen7_sol_offsets(dev, ring);
if (ret)
- goto err;
+ goto err_batch;
}
-
exec_len = args->batch_len;
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {
ret = i915_emit_box(dev, &cliprects[i],
args->DR1, args->DR4);
if (ret)
- goto err;
+ goto err_batch;
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
- goto err;
+ goto err_batch;
}
} else {
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
- goto err;
+ goto err_batch;
}
trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
@@ -1339,6 +1330,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
+err_batch:
+ if (flags & I915_DISPATCH_SECURE)
+ i915_gem_object_ggtt_unpin(batch_obj);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);
@@ -1420,18 +1414,21 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
if (!ret) {
+ struct drm_i915_gem_exec_object __user *user_exec_list =
+ to_user_ptr(args->buffers_ptr);
+
/* Copy the new buffer offsets back to the user's exec list. */
- for (i = 0; i < args->buffer_count; i++)
- exec_list[i].offset = exec2_list[i].offset;
- /* ... and back out to userspace */
- ret = copy_to_user(to_user_ptr(args->buffers_ptr),
- exec_list,
- sizeof(*exec_list) * args->buffer_count);
- if (ret) {
- ret = -EFAULT;
- DRM_DEBUG("failed to copy %d exec entries "
- "back to user (%d)\n",
- args->buffer_count, ret);
+ for (i = 0; i < args->buffer_count; i++) {
+ ret = __copy_to_user(&user_exec_list[i].offset,
+ &exec2_list[i].offset,
+ sizeof(user_exec_list[i].offset));
+ if (ret) {
+ ret = -EFAULT;
+ DRM_DEBUG("failed to copy %d exec entries "
+ "back to user (%d)\n",
+ args->buffer_count, ret);
+ break;
+ }
}
}
@@ -1482,14 +1479,21 @@ i915_gem_execbuffer2(struct drm_device *dev, void *data,
ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
if (!ret) {
/* Copy the new buffer offsets back to the user's exec list. */
- ret = copy_to_user(to_user_ptr(args->buffers_ptr),
- exec2_list,
- sizeof(*exec2_list) * args->buffer_count);
- if (ret) {
- ret = -EFAULT;
- DRM_DEBUG("failed to copy %d exec entries "
- "back to user (%d)\n",
- args->buffer_count, ret);
+ struct drm_i915_gem_exec_object2 *user_exec_list =
+ to_user_ptr(args->buffers_ptr);
+ int i;
+
+ for (i = 0; i < args->buffer_count; i++) {
+ ret = __copy_to_user(&user_exec_list[i].offset,
+ &exec2_list[i].offset,
+ sizeof(user_exec_list[i].offset));
+ if (ret) {
+ ret = -EFAULT;
+ DRM_DEBUG("failed to copy %d exec entries "
+ "back to user\n",
+ args->buffer_count);
+ break;
+ }
}
}
--
2.0.0.rc2
^ permalink raw reply related [flat|nested] 27+ messages in thread* [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer
2014-05-15 15:55 [PATCH 1/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
@ 2014-05-15 15:55 ` Chris Wilson
0 siblings, 0 replies; 27+ messages in thread
From: Chris Wilson @ 2014-05-15 15:55 UTC (permalink / raw)
To: intel-gfx
Userspace can pass in an offset that it presumes the object is located
at. The kernel will then do its utmost to fit the object into that
location. The assumption is that userspace is handling its own object
locations (for example along with full-ppgtt) and that the kernel will
rarely have to make space for the user's requests.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 3 ++
drivers/gpu/drm/i915/i915_gem.c | 50 ++++++++++++++++++++--------
drivers/gpu/drm/i915/i915_gem_evict.c | 52 ++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 ++++--
include/uapi/drm/i915_drm.h | 3 +-
5 files changed, 100 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 64a1b2340e3a..04fe312df8e4 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2406,6 +2406,9 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
unsigned long start,
unsigned long end,
unsigned flags);
+int __must_check
+i915_gem_evict_range(struct drm_device *dev, struct i915_address_space *vm,
+ unsigned long start, unsigned long end);
int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
int i915_gem_evict_everything(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 79244b911125..ba4f266b5f2e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3313,22 +3313,43 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
if (IS_ERR(vma))
goto err_unpin;
+ if (flags & PIN_OFFSET_FIXED) {
+ uint64_t offset = flags & PIN_OFFSET_MASK;
+ if (alignment && offset & (alignment - 1)) {
+ vma = ERR_PTR(-EINVAL);
+ goto err_free_vma;
+ }
+ vma->node.start = offset;
+ vma->node.size = size;
+ vma->node.color = obj->cache_level;
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ if (ret) {
+ ret = i915_gem_evict_range(dev, vm, start, end);
+ if (ret == 0)
+ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
+ }
+ if (ret) {
+ vma = ERR_PTR(ret);
+ goto err_free_vma;
+ }
+ } else {
search_free:
- ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
- size, alignment,
- obj->cache_level,
- start, end,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
- if (ret) {
- ret = i915_gem_evict_something(dev, vm, size, alignment,
- obj->cache_level,
- start, end,
- flags);
- if (ret == 0)
- goto search_free;
+ ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+ size, alignment,
+ obj->cache_level,
+ start, end,
+ DRM_MM_SEARCH_DEFAULT,
+ DRM_MM_CREATE_DEFAULT);
+ if (ret) {
+ ret = i915_gem_evict_something(dev, vm, size, alignment,
+ obj->cache_level,
+ start, end,
+ flags);
+ if (ret == 0)
+ goto search_free;
- goto err_free_vma;
+ goto err_free_vma;
+ }
}
if (WARN_ON(!i915_gem_valid_gtt_space(dev, &vma->node,
obj->cache_level))) {
@@ -3911,6 +3932,7 @@ i915_gem_object_pin(struct drm_i915_gem_object *obj,
if ((alignment &&
vma->node.start & (alignment - 1)) ||
(flags & PIN_MAPPABLE && !obj->map_and_fenceable) ||
+ (flags & PIN_OFFSET_FIXED && vma->node.start != (flags & PIN_OFFSET_MASK)) ||
(flags & PIN_OFFSET_BIAS && vma->node.start < (flags & PIN_OFFSET_MASK))) {
WARN(vma->pin_count,
"bo is already pinned with incorrect alignment:"
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index bbf4b12d842e..bd3020afd363 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -194,6 +194,58 @@ found:
return ret;
}
+int
+i915_gem_evict_range(struct drm_device *dev, struct i915_address_space *vm,
+ unsigned long start, unsigned long end)
+{
+ struct drm_mm_node *node;
+ struct list_head eviction_list;
+ int ret = 0;
+
+ INIT_LIST_HEAD(&eviction_list);
+ drm_mm_for_each_node(node, &vm->mm) {
+ struct i915_vma *vma;
+
+ if (node->start + node->size <= start)
+ continue;
+ if (node->start >= end)
+ break;
+
+ vma = container_of(node, typeof(*vma), node);
+ if (vma->pin_count) {
+ ret = -EBUSY;
+ break;
+ }
+
+ if (WARN_ON(!list_empty(&vma->exec_list))) {
+ ret = -EINVAL;
+ break;
+ }
+
+ drm_gem_object_reference(&vma->obj->base);
+ list_add(&vma->exec_list, &eviction_list);
+ }
+
+ while (!list_empty(&eviction_list)) {
+ struct i915_vma *vma;
+ struct drm_gem_object *obj;
+
+ vma = list_first_entry(&eviction_list,
+ struct i915_vma,
+ exec_list);
+
+ obj = &vma->obj->base;
+
+ list_del_init(&vma->exec_list);
+ if (ret == 0)
+ ret = i915_vma_unbind(vma);
+
+ drm_gem_object_unreference(obj);
+ }
+
+ return ret;
+}
+
/**
* i915_gem_evict_vm - Evict all idle vmas from a vm
*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 15cb3825dfef..31130f14cb49 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -553,6 +553,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
flags |= PIN_GLOBAL;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
flags |= BIAS | PIN_OFFSET_BIAS;
+ if (entry->flags & EXEC_OBJECT_PINNED)
+ flags |= entry->offset | PIN_OFFSET_FIXED;
ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
if (ret)
@@ -677,6 +679,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
continue;
if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
+ (entry->flags & EXEC_OBJECT_PINNED && vma->node.start != entry->offset) ||
(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable) ||
(entry->flags & __EXEC_OBJECT_NEEDS_BIAS && vma->node.start < BIAS))
ret = i915_vma_unbind(vma);
@@ -1045,8 +1048,10 @@ struct drm_i915_gem_object *
eb_get_batch(struct eb_vmas *eb)
{
struct i915_vma *vma = list_entry(eb->vmas.prev, typeof(*vma), exec_list);
- if (INTEL_INFO(vma->obj->base.dev)->gen >= 7)
- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ if (INTEL_INFO(vma->obj->base.dev)->gen >= 7) {
+ if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
+ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ }
return vma->obj;
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8a3e4ef00c3d..310fb2b602c7 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -660,7 +660,8 @@ struct drm_i915_gem_exec_object2 {
#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
#define EXEC_OBJECT_NEEDS_GTT (1<<1)
#define EXEC_OBJECT_WRITE (1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_PINNED (1<<4)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
__u64 flags;
__u64 rsvd1;
--
2.0.0.rc2
^ permalink raw reply related [flat|nested] 27+ messages in thread
end of thread, other threads:[~2015-12-02 13:42 UTC | newest]
Thread overview: 27+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-10-06 10:53 [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Chris Wilson
2015-10-06 10:53 ` [PATCH 2/3] drm/i915: Allow the user to pass a context to any ring Chris Wilson
2015-10-06 13:57 ` Daniel, Thomas
2015-12-02 13:42 ` Tvrtko Ursulin
2015-10-06 10:53 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
2015-10-06 13:59 ` Daniel, Thomas
2015-10-21 15:07 ` Daniel Vetter
2015-10-21 15:11 ` Daniel, Thomas
2015-10-23 2:31 ` Yang, Rong R
2015-10-27 11:51 ` akash goel
2015-11-05 10:57 ` Daniel, Thomas
2015-12-02 13:28 ` Chris Wilson
2015-11-05 17:51 ` Kristian Høgsberg
2015-11-05 18:17 ` Jesse Barnes
2015-11-06 13:38 ` Chris Wilson
2015-11-06 17:01 ` Jesse Barnes
2015-11-06 23:58 ` Kristian Høgsberg
2015-10-06 11:11 ` [Intel-gfx] [PATCH 1/3] drm: Track drm_mm nodes with an interval tree Daniel Vetter
2015-10-06 11:19 ` Chris Wilson
2015-10-06 12:01 ` Daniel Vetter
2015-10-07 10:22 ` David Herrmann
2015-10-16 8:54 ` Daniel, Thomas
2015-10-16 14:26 ` [Intel-gfx] " Daniel Vetter
2015-10-21 15:11 ` Daniel Vetter
2015-10-21 15:14 ` David Herrmann
2015-10-22 8:07 ` [Intel-gfx] " Daniel Vetter
-- strict thread matches above, loose matches on Subject: below --
2014-05-15 15:55 [PATCH 1/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
2014-05-15 15:55 ` [PATCH 3/3] drm/i915: Add soft-pinning API for execbuffer Chris Wilson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox