* [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt
@ 2014-08-09 16:37 Chris Wilson
2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
` (2 more replies)
0 siblings, 3 replies; 11+ messages in thread
From: Chris Wilson @ 2014-08-09 16:37 UTC (permalink / raw)
To: intel-gfx
If an object is not bound into the global GTT, then it cannot be
accessed via the GTT. This restores the original code that was muddled
by ppGTT. In the process, we remove a WARN that had long outlived its
usefulness and was simply being coded around instead.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 22 +++++-----------------
1 file changed, 5 insertions(+), 17 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 80b807b724a6..99250d27668d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3594,11 +3594,12 @@ int
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
{
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+ struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
uint32_t old_write_domain, old_read_domains;
int ret;
/* Not valid to be called on unbound objects. */
- if (!i915_gem_obj_bound_any(obj))
+ if (vma == NULL)
return -EINVAL;
if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
@@ -3640,13 +3641,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
old_write_domain);
/* And bump the LRU for this access */
- if (i915_gem_object_is_inactive(obj)) {
- struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
- if (vma)
- list_move_tail(&vma->mm_list,
- &dev_priv->gtt.base.inactive_list);
-
- }
+ if (i915_gem_object_is_inactive(obj))
+ list_move_tail(&vma->mm_list,
+ &dev_priv->gtt.base.inactive_list);
return 0;
}
@@ -3810,9 +3807,6 @@ static bool is_pin_display(struct drm_i915_gem_object *obj)
{
struct i915_vma *vma;
- if (list_empty(&obj->vma_list))
- return false;
-
vma = i915_gem_obj_to_ggtt(obj);
if (!vma)
return false;
@@ -5255,12 +5249,6 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
{
struct i915_vma *vma;
- /* This WARN has probably outlived its usefulness (callers already
- * WARN if they don't find the GGTT vma they expect). When removing,
- * remember to remove the pre-check in is_pin_display() as well */
- if (WARN_ON(list_empty(&obj->vma_list)))
- return NULL;
-
vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
if (vma->vm != obj_to_ggtt(obj))
return NULL;
--
2.1.0.rc1
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
@ 2014-08-09 16:37 ` Chris Wilson
2014-08-09 19:29 ` Ben Widawsky
2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
2014-08-11 9:36 ` [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Daniel Vetter
2 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-08-09 16:37 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky
With ppgtt, it is no longer correct to mark an object as
map_and_fenceable if we simply unbind it from the global gtt. This has
consequences during execbuffer where we simply use
obj->map_and_fenceable in use_cpu_reloc() to decide which access method
to use for writing into the object. Now for a ppgtt object,
map_and_fenceable will be true when it is not bound into the ggtt but
only in a ppgtt, leading to an invalid access on a non-llc architecture.
v2: Revamp and resend to ease future patches.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/i915/i915_gem.c | 8 +--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 82 ++++++++++++++----------------
2 files changed, 43 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99250d27668d..6366230c4e32 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
vma->unbind_vma(vma);
list_del_init(&vma->mm_list);
- /* Avoid an unnecessary call to unbind on rebind. */
if (i915_is_ggtt(vma->vm))
- obj->map_and_fenceable = true;
+ obj->map_and_fenceable = false;
drm_mm_remove_node(&vma->node);
i915_gem_vma_destroy(vma);
@@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
return 0;
}
} else if (enable) {
+ if (WARN_ON(!obj->map_and_fenceable))
+ return -EINVAL;
+
reg = i915_find_fence_reg(dev);
if (IS_ERR(reg))
return PTR_ERR(reg);
@@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->fence_reg = I915_FENCE_REG_NONE;
obj->madv = I915_MADV_WILLNEED;
- /* Avoid an unnecessary call to unbind on the first bind. */
- obj->map_and_fenceable = true;
i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 60998fc4e5b2..87e9599f9e4b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
#define BATCH_OFFSET_BIAS (256*1024)
@@ -433,7 +434,7 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
if (obj->active && in_atomic())
return -EFAULT;
- if (use_cpu_reloc(obj))
+ if (use_cpu_reloc(obj) || !i915_gem_obj_ggtt_bound(obj))
ret = relocate_entry_cpu(obj, reloc, target_offset);
else
ret = relocate_entry_gtt(obj, reloc, target_offset);
@@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
}
static int
-need_reloc_mappable(struct i915_vma *vma)
-{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
- i915_is_ggtt(vma->vm);
-}
-
-static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *ring,
bool *need_reloc)
@@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
- bool need_fence;
uint64_t flags;
int ret;
flags = 0;
-
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- if (need_fence || need_reloc_mappable(vma))
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_MAPPABLE;
-
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
}
static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- struct drm_i915_gem_object *obj = vma->obj;
- bool need_fence, need_mappable;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
+ if (entry->relocation_count == 0)
+ return false;
- WARN_ON((need_mappable || need_fence) &&
+ if (!i915_is_ggtt(vma->vm))
+ return false;
+
+ /* See also use_cpu_reloc() */
+ if (HAS_LLC(vma->obj->base.dev))
+ return false;
+
+ if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return false;
+
+ return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
!i915_is_ggtt(vma->vm));
if (entry->alignment &&
vma->node.start & (entry->alignment - 1))
return true;
- if (need_mappable && !obj->map_and_fenceable)
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
return true;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
- if (need_mappable)
+ if (need_mappable) {
+ entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
- else
+ } else
list_move_tail(&vma->exec_list, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
if (!drm_mm_node_allocated(&vma->node))
continue;
- if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+ if (eb_vma_misplaced(vma))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
- if (flags & I915_DISPATCH_SECURE &&
- !batch_obj->has_global_gtt_mapping) {
- /* When we have multiple VMs, we'll need to make sure that we
- * allocate space first */
- struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
- BUG_ON(!vma);
- vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
- }
+ if (flags & I915_DISPATCH_SECURE) {
+ ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ if (ret)
+ goto err;
- if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
- else
+ } else
exec_start += i915_gem_obj_offset(batch_obj, vm);
ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
- args, &eb->vmas, batch_obj, exec_start, flags);
- if (ret)
- goto err;
+ args, &eb->vmas, batch_obj, exec_start, flags);
+ if (flags & I915_DISPATCH_SECURE)
+ i915_gem_object_ggtt_unpin(batch_obj);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);
--
2.1.0.rc1
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
@ 2014-08-09 19:29 ` Ben Widawsky
2014-08-10 6:55 ` Chris Wilson
2014-08-10 7:06 ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
0 siblings, 2 replies; 11+ messages in thread
From: Ben Widawsky @ 2014-08-09 19:29 UTC (permalink / raw)
To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx
On Sat, Aug 09, 2014 at 05:37:23PM +0100, Chris Wilson wrote:
> With ppgtt, it is no longer correct to mark an object as
> map_and_fenceable if we simply unbind it from the global gtt. This has
> consequences during execbuffer where we simply use
> obj->map_and_fenceable in use_cpu_reloc() to decide which access method
> to use for writing into the object. Now for a ppgtt object,
> map_and_fenceable will be true when it is not bound into the ggtt but
> only in a ppgtt, leading to an invalid access on a non-llc architecture.
The last sentence confused me for some reason. I think the first two
sentences were perfectly succinct in describing the problem.
>
> v2: Revamp and resend to ease future patches.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
I didn't read the patch closely, but the problem is clear. It seems like
the one hunk:
> - if (use_cpu_reloc(obj))
> + if (use_cpu_reloc(obj) || !i915_gem_obj_ggtt_bound(obj))
would be sufficient to fix the problem.
I had a local solution at one point (I'm too lazy to check what I've
sent out these days) where I turned map_and_fenceable into a getter(). I
thought that looked pretty nice as well.
lgtm
[snip]
--
Ben Widawsky, Intel Open Source Technology Center
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
2014-08-09 19:29 ` Ben Widawsky
@ 2014-08-10 6:55 ` Chris Wilson
2014-08-10 7:04 ` Chris Wilson
2014-08-10 7:06 ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
1 sibling, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-08-10 6:55 UTC (permalink / raw)
To: Ben Widawsky; +Cc: Daniel Vetter, intel-gfx
On Sat, Aug 09, 2014 at 12:29:17PM -0700, Ben Widawsky wrote:
> On Sat, Aug 09, 2014 at 05:37:23PM +0100, Chris Wilson wrote:
> > With ppgtt, it is no longer correct to mark an object as
> > map_and_fenceable if we simply unbind it from the global gtt. This has
> > consequences during execbuffer where we simply use
> > obj->map_and_fenceable in use_cpu_reloc() to decide which access method
> > to use for writing into the object. Now for a ppgtt object,
> > map_and_fenceable will be true when it is not bound into the ggtt but
> > only in a ppgtt, leading to an invalid access on a non-llc architecture.
>
> The last sentence confused me for some reason. I think the first two
> sentences were perfectly succinct in describing the problem.
Actually I think the last sentence is obsolete. So long as we only tweak
map_and_fenceable when manipulating the ggtt, the current optimisation
still holds and the subject is wrong.
> > v2: Revamp and resend to ease future patches.
> >
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> > Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
>
> I didn't read the patch closely, but the problem is clear. It seems like
> the one hunk:
> > - if (use_cpu_reloc(obj))
> > + if (use_cpu_reloc(obj) || !i915_gem_obj_ggtt_bound(obj))
>
> would be sufficient to fix the problem.
Right, I wanted this patch as later patches build upon the flags and
using the flags makes the code cleaner. This tweak was something that
occurred to me when reviewing the patch and looking at recent bug
reports.
Let me go back and look at whether we can keep the obj->map_and_fencceable
micro-optimisation.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
2014-08-10 6:55 ` Chris Wilson
@ 2014-08-10 7:04 ` Chris Wilson
0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2014-08-10 7:04 UTC (permalink / raw)
To: Ben Widawsky, intel-gfx, Daniel Vetter
On Sun, Aug 10, 2014 at 07:55:46AM +0100, Chris Wilson wrote:
> On Sat, Aug 09, 2014 at 12:29:17PM -0700, Ben Widawsky wrote:
> > I didn't read the patch closely, but the problem is clear. It seems like
> > the one hunk:
> > > - if (use_cpu_reloc(obj))
> > > + if (use_cpu_reloc(obj) || !i915_gem_obj_ggtt_bound(obj))
> >
> > would be sufficient to fix the problem.
>
> Right, I wanted this patch as later patches build upon the flags and
> using the flags makes the code cleaner. This tweak was something that
> occurred to me when reviewing the patch and looking at recent bug
> reports.
As it happens, that is the key to the patch. Setting
obj->map_and_fenceable correctly makes that tweak redundant.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH] drm/i915: Force CPU relocations if not GTT mapped
2014-08-09 19:29 ` Ben Widawsky
2014-08-10 6:55 ` Chris Wilson
@ 2014-08-10 7:06 ` Chris Wilson
2014-08-11 10:02 ` Daniel Vetter
1 sibling, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-08-10 7:06 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky
Move the decision on whether we need to have a mappable object during
execbuffer to the fore and then reuse that decision by propagating the
flag through to reservation. As a corollary, before doing the actual
relocation through the GTT, we can make sure that we do have a GTT
mapping through which to operate.
v2: Revamp and resend to ease future patches.
v3: Refresh patch rationale
References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/i915/i915_gem.c | 8 +--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++----------------
2 files changed, 42 insertions(+), 46 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 99250d27668d..6366230c4e32 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
vma->unbind_vma(vma);
list_del_init(&vma->mm_list);
- /* Avoid an unnecessary call to unbind on rebind. */
if (i915_is_ggtt(vma->vm))
- obj->map_and_fenceable = true;
+ obj->map_and_fenceable = false;
drm_mm_remove_node(&vma->node);
i915_gem_vma_destroy(vma);
@@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
return 0;
}
} else if (enable) {
+ if (WARN_ON(!obj->map_and_fenceable))
+ return -EINVAL;
+
reg = i915_find_fence_reg(dev);
if (IS_ERR(reg))
return PTR_ERR(reg);
@@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->fence_reg = I915_FENCE_REG_NONE;
obj->madv = I915_MADV_WILLNEED;
- /* Avoid an unnecessary call to unbind on the first bind. */
- obj->map_and_fenceable = true;
i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 60998fc4e5b2..8b734d5d16b4 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
#define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
#define BATCH_OFFSET_BIAS (256*1024)
@@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
}
static int
-need_reloc_mappable(struct i915_vma *vma)
-{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
- i915_is_ggtt(vma->vm);
-}
-
-static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *ring,
bool *need_reloc)
@@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
- bool need_fence;
uint64_t flags;
int ret;
flags = 0;
-
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- if (need_fence || need_reloc_mappable(vma))
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_MAPPABLE;
-
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
@@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
}
static bool
-eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
+need_reloc_mappable(struct i915_vma *vma)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- struct drm_i915_gem_object *obj = vma->obj;
- bool need_fence, need_mappable;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
+ if (entry->relocation_count == 0)
+ return false;
- WARN_ON((need_mappable || need_fence) &&
+ if (!i915_is_ggtt(vma->vm))
+ return false;
+
+ /* See also use_cpu_reloc() */
+ if (HAS_LLC(vma->obj->base.dev))
+ return false;
+
+ if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return false;
+
+ return true;
+}
+
+static bool
+eb_vma_misplaced(struct i915_vma *vma)
+{
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+ struct drm_i915_gem_object *obj = vma->obj;
+
+ WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
!i915_is_ggtt(vma->vm));
if (entry->alignment &&
vma->node.start & (entry->alignment - 1))
return true;
- if (need_mappable && !obj->map_and_fenceable)
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
return true;
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
@@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
- if (need_mappable)
+ if (need_mappable) {
+ entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
- else
+ } else
list_move_tail(&vma->exec_list, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
if (!drm_mm_node_allocated(&vma->node))
continue;
- if (eb_vma_misplaced(vma, has_fenced_gpu_access))
+ if (eb_vma_misplaced(vma))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
- if (flags & I915_DISPATCH_SECURE &&
- !batch_obj->has_global_gtt_mapping) {
- /* When we have multiple VMs, we'll need to make sure that we
- * allocate space first */
- struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
- BUG_ON(!vma);
- vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
- }
+ if (flags & I915_DISPATCH_SECURE) {
+ ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ if (ret)
+ goto err;
- if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
- else
+ } else
exec_start += i915_gem_obj_offset(batch_obj, vm);
ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
- args, &eb->vmas, batch_obj, exec_start, flags);
- if (ret)
- goto err;
+ args, &eb->vmas, batch_obj, exec_start, flags);
+ if (flags & I915_DISPATCH_SECURE)
+ i915_gem_object_ggtt_unpin(batch_obj);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);
--
2.1.0.rc1
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH] drm/i915: Force CPU relocations if not GTT mapped
2014-08-10 7:06 ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
@ 2014-08-11 10:02 ` Daniel Vetter
0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2014-08-11 10:02 UTC (permalink / raw)
To: Chris Wilson; +Cc: Daniel Vetter, intel-gfx, Ben Widawsky
On Sun, Aug 10, 2014 at 08:06:57AM +0100, Chris Wilson wrote:
> Move the decision on whether we need to have a mappable object during
> execbuffer to the fore and then reuse that decision by propagating the
> flag through to reservation. As a corollary, before doing the actual
> relocation through the GTT, we can make sure that we do have a GTT
> mapping through which to operate.
>
> v2: Revamp and resend to ease future patches.
> v3: Refresh patch rationale
>
> References: https://bugs.freedesktop.org/show_bug.cgi?id=81094
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Ben Widawsky <benjamin.widawsky@intel.com>
> Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Ok, the secure batch hunk in here looks rather unrelated and imo also a
bit incomplete. I've dropped it. And I've added a bit of text to the
commit message to explain why this patch touches map_and_fenceable logic.
-Daniel
> ---
> drivers/gpu/drm/i915/i915_gem.c | 8 +--
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 80 ++++++++++++++----------------
> 2 files changed, 42 insertions(+), 46 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 99250d27668d..6366230c4e32 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2930,9 +2930,8 @@ int i915_vma_unbind(struct i915_vma *vma)
> vma->unbind_vma(vma);
>
> list_del_init(&vma->mm_list);
> - /* Avoid an unnecessary call to unbind on rebind. */
> if (i915_is_ggtt(vma->vm))
> - obj->map_and_fenceable = true;
> + obj->map_and_fenceable = false;
>
> drm_mm_remove_node(&vma->node);
> i915_gem_vma_destroy(vma);
> @@ -3284,6 +3283,9 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj)
> return 0;
> }
> } else if (enable) {
> + if (WARN_ON(!obj->map_and_fenceable))
> + return -EINVAL;
> +
> reg = i915_find_fence_reg(dev);
> if (IS_ERR(reg))
> return PTR_ERR(reg);
> @@ -4333,8 +4335,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>
> obj->fence_reg = I915_FENCE_REG_NONE;
> obj->madv = I915_MADV_WILLNEED;
> - /* Avoid an unnecessary call to unbind on the first bind. */
> - obj->map_and_fenceable = true;
>
> i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 60998fc4e5b2..8b734d5d16b4 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -35,6 +35,7 @@
>
> #define __EXEC_OBJECT_HAS_PIN (1<<31)
> #define __EXEC_OBJECT_HAS_FENCE (1<<30)
> +#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
> #define __EXEC_OBJECT_NEEDS_BIAS (1<<28)
>
> #define BATCH_OFFSET_BIAS (256*1024)
> @@ -535,14 +536,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
> }
>
> static int
> -need_reloc_mappable(struct i915_vma *vma)
> -{
> - struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> - return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
> - i915_is_ggtt(vma->vm);
> -}
> -
> -static int
> i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> struct intel_engine_cs *ring,
> bool *need_reloc)
> @@ -550,19 +543,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> struct drm_i915_gem_object *obj = vma->obj;
> struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> - bool need_fence;
> uint64_t flags;
> int ret;
>
> flags = 0;
> -
> - need_fence =
> - has_fenced_gpu_access &&
> - entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> - obj->tiling_mode != I915_TILING_NONE;
> - if (need_fence || need_reloc_mappable(vma))
> + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
> flags |= PIN_MAPPABLE;
> -
> if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
> flags |= PIN_GLOBAL;
> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
> @@ -601,26 +587,40 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> }
>
> static bool
> -eb_vma_misplaced(struct i915_vma *vma, bool has_fenced_gpu_access)
> +need_reloc_mappable(struct i915_vma *vma)
> {
> struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> - struct drm_i915_gem_object *obj = vma->obj;
> - bool need_fence, need_mappable;
>
> - need_fence =
> - has_fenced_gpu_access &&
> - entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> - obj->tiling_mode != I915_TILING_NONE;
> - need_mappable = need_fence || need_reloc_mappable(vma);
> + if (entry->relocation_count == 0)
> + return false;
>
> - WARN_ON((need_mappable || need_fence) &&
> + if (!i915_is_ggtt(vma->vm))
> + return false;
> +
> + /* See also use_cpu_reloc() */
> + if (HAS_LLC(vma->obj->base.dev))
> + return false;
> +
> + if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
> + return false;
> +
> + return true;
> +}
> +
> +static bool
> +eb_vma_misplaced(struct i915_vma *vma)
> +{
> + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> + struct drm_i915_gem_object *obj = vma->obj;
> +
> + WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP &&
> !i915_is_ggtt(vma->vm));
>
> if (entry->alignment &&
> vma->node.start & (entry->alignment - 1))
> return true;
>
> - if (need_mappable && !obj->map_and_fenceable)
> + if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
> return true;
>
> if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
> @@ -664,9 +664,10 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
> obj->tiling_mode != I915_TILING_NONE;
> need_mappable = need_fence || need_reloc_mappable(vma);
>
> - if (need_mappable)
> + if (need_mappable) {
> + entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
> list_move(&vma->exec_list, &ordered_vmas);
> - else
> + } else
> list_move_tail(&vma->exec_list, &ordered_vmas);
>
> obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
> @@ -696,7 +697,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
> if (!drm_mm_node_allocated(&vma->node))
> continue;
>
> - if (eb_vma_misplaced(vma, has_fenced_gpu_access))
> + if (eb_vma_misplaced(vma))
> ret = i915_vma_unbind(vma);
> else
> ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
> @@ -1386,25 +1387,20 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
> /* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
> * batch" bit. Hence we need to pin secure batches into the global gtt.
> * hsw should have this fixed, but bdw mucks it up again. */
> - if (flags & I915_DISPATCH_SECURE &&
> - !batch_obj->has_global_gtt_mapping) {
> - /* When we have multiple VMs, we'll need to make sure that we
> - * allocate space first */
> - struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
> - BUG_ON(!vma);
> - vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
> - }
> + if (flags & I915_DISPATCH_SECURE) {
> + ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
> + if (ret)
> + goto err;
>
> - if (flags & I915_DISPATCH_SECURE)
> exec_start += i915_gem_obj_ggtt_offset(batch_obj);
> - else
> + } else
> exec_start += i915_gem_obj_offset(batch_obj, vm);
>
> ret = legacy_ringbuffer_submission(dev, file, ring, ctx,
> - args, &eb->vmas, batch_obj, exec_start, flags);
> - if (ret)
> - goto err;
> + args, &eb->vmas, batch_obj, exec_start, flags);
>
> + if (flags & I915_DISPATCH_SECURE)
> + i915_gem_object_ggtt_unpin(batch_obj);
> err:
> /* the request owns the ref now */
> i915_gem_context_unreference(ctx);
> --
> 2.1.0.rc1
>
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access
2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
@ 2014-08-09 16:37 ` Chris Wilson
2014-08-11 10:20 ` Daniel Vetter
2014-08-11 9:36 ` [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Daniel Vetter
2 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-08-09 16:37 UTC (permalink / raw)
To: intel-gfx
This migrates the fence tracking onto the existing seqno
infrastructure so that the later conversion to tracking via requests is
simplified.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 7 ------
drivers/gpu/drm/i915/i915_gem.c | 17 ---------------
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 +++++++++++++++++-------------
drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +-
4 files changed, 20 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index fab97bc3215f..5c3f033ff928 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1777,13 +1777,6 @@ struct drm_i915_gem_object {
* Only honoured if hardware has relevant pte bit
*/
unsigned long gt_ro:1;
-
- /*
- * Is the GPU currently using a fence to access this buffer,
- */
- unsigned int pending_fenced_gpu_access:1;
- unsigned int fenced_gpu_access:1;
-
unsigned int cache_level:3;
unsigned int has_aliasing_ppgtt_mapping:1;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6366230c4e32..e808b449ac75 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2163,8 +2163,6 @@ static void
i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
struct intel_engine_cs *ring)
{
- struct drm_device *dev = obj->base.dev;
- struct drm_i915_private *dev_priv = dev->dev_private;
u32 seqno = intel_ring_get_seqno(ring);
BUG_ON(ring == NULL);
@@ -2183,19 +2181,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
list_move_tail(&obj->ring_list, &ring->active_list);
obj->last_read_seqno = seqno;
-
- if (obj->fenced_gpu_access) {
- obj->last_fenced_seqno = seqno;
-
- /* Bump MRU to take account of the delayed flush */
- if (obj->fence_reg != I915_FENCE_REG_NONE) {
- struct drm_i915_fence_reg *reg;
-
- reg = &dev_priv->fence_regs[obj->fence_reg];
- list_move_tail(®->lru_list,
- &dev_priv->mm.fence_list);
- }
- }
}
void i915_vma_move_to_active(struct i915_vma *vma,
@@ -2231,7 +2216,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
obj->base.write_domain = 0;
obj->last_fenced_seqno = 0;
- obj->fenced_gpu_access = false;
obj->active = 0;
drm_gem_object_unreference(&obj->base);
@@ -3176,7 +3160,6 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
obj->last_fenced_seqno = 0;
}
- obj->fenced_gpu_access = false;
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 87e9599f9e4b..fbac7b51736c 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -542,7 +542,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
{
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
uint64_t flags;
int ret;
@@ -560,17 +559,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
entry->flags |= __EXEC_OBJECT_HAS_PIN;
- if (has_fenced_gpu_access) {
- if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
- ret = i915_gem_object_get_fence(obj);
- if (ret)
- return ret;
-
- if (i915_gem_object_pin_fence(obj))
- entry->flags |= __EXEC_OBJECT_HAS_FENCE;
+ if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+ ret = i915_gem_object_get_fence(obj);
+ if (ret)
+ return ret;
- obj->pending_fenced_gpu_access = true;
- }
+ if (i915_gem_object_pin_fence(obj))
+ entry->flags |= __EXEC_OBJECT_HAS_FENCE;
}
if (entry->offset != vma->node.start) {
@@ -658,8 +653,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
obj = vma->obj;
entry = vma->exec_entry;
+ if (!has_fenced_gpu_access)
+ entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
need_fence =
- has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
@@ -672,7 +668,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
obj->base.pending_write_domain = 0;
- obj->pending_fenced_gpu_access = false;
}
list_splice(&ordered_vmas, vmas);
@@ -959,9 +954,11 @@ static void
i915_gem_execbuffer_move_to_active(struct list_head *vmas,
struct intel_engine_cs *ring)
{
+ u32 seqno = intel_ring_get_seqno(ring);
struct i915_vma *vma;
list_for_each_entry(vma, vmas, exec_list) {
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
struct drm_i915_gem_object *obj = vma->obj;
u32 old_read = obj->base.read_domains;
u32 old_write = obj->base.write_domain;
@@ -970,18 +967,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
if (obj->base.write_domain == 0)
obj->base.pending_read_domains |= obj->base.read_domains;
obj->base.read_domains = obj->base.pending_read_domains;
- obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
i915_vma_move_to_active(vma, ring);
if (obj->base.write_domain) {
obj->dirty = 1;
- obj->last_write_seqno = intel_ring_get_seqno(ring);
+ obj->last_write_seqno = seqno;
intel_fb_obj_invalidate(obj, ring);
/* update for the implicit flush after a batch */
obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
}
+ if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
+ obj->last_fenced_seqno = seqno;
+ if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
+ struct drm_i915_private *dev_priv = to_i915(ring->dev);
+ list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
+ &dev_priv->mm.fence_list);
+ }
+ }
trace_i915_gem_object_change_domain(obj, old_read, old_write);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index cb150e8b4336..7e623bf097a1 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
if (ret == 0) {
obj->fence_dirty =
- obj->fenced_gpu_access ||
+ obj->last_fenced_seqno ||
obj->fence_reg != I915_FENCE_REG_NONE;
obj->tiling_mode = args->tiling_mode;
--
2.1.0.rc1
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access
2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
@ 2014-08-11 10:20 ` Daniel Vetter
0 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2014-08-11 10:20 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Sat, Aug 09, 2014 at 05:37:24PM +0100, Chris Wilson wrote:
> This migrates the fence tracking onto the existing seqno
> infrastructure so that the later conversion to tracking via requests is
> simplified.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Pulled in all 3 with the changes to patch 2 as discussed on irc.
Thanks, Daniel
> ---
> drivers/gpu/drm/i915/i915_drv.h | 7 ------
> drivers/gpu/drm/i915/i915_gem.c | 17 ---------------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 +++++++++++++++++-------------
> drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +-
> 4 files changed, 20 insertions(+), 40 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fab97bc3215f..5c3f033ff928 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1777,13 +1777,6 @@ struct drm_i915_gem_object {
> * Only honoured if hardware has relevant pte bit
> */
> unsigned long gt_ro:1;
> -
> - /*
> - * Is the GPU currently using a fence to access this buffer,
> - */
> - unsigned int pending_fenced_gpu_access:1;
> - unsigned int fenced_gpu_access:1;
> -
> unsigned int cache_level:3;
>
> unsigned int has_aliasing_ppgtt_mapping:1;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6366230c4e32..e808b449ac75 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2163,8 +2163,6 @@ static void
> i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> struct intel_engine_cs *ring)
> {
> - struct drm_device *dev = obj->base.dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> u32 seqno = intel_ring_get_seqno(ring);
>
> BUG_ON(ring == NULL);
> @@ -2183,19 +2181,6 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj,
> list_move_tail(&obj->ring_list, &ring->active_list);
>
> obj->last_read_seqno = seqno;
> -
> - if (obj->fenced_gpu_access) {
> - obj->last_fenced_seqno = seqno;
> -
> - /* Bump MRU to take account of the delayed flush */
> - if (obj->fence_reg != I915_FENCE_REG_NONE) {
> - struct drm_i915_fence_reg *reg;
> -
> - reg = &dev_priv->fence_regs[obj->fence_reg];
> - list_move_tail(®->lru_list,
> - &dev_priv->mm.fence_list);
> - }
> - }
> }
>
> void i915_vma_move_to_active(struct i915_vma *vma,
> @@ -2231,7 +2216,6 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj)
> obj->base.write_domain = 0;
>
> obj->last_fenced_seqno = 0;
> - obj->fenced_gpu_access = false;
>
> obj->active = 0;
> drm_gem_object_unreference(&obj->base);
> @@ -3176,7 +3160,6 @@ i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
> obj->last_fenced_seqno = 0;
> }
>
> - obj->fenced_gpu_access = false;
> return 0;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 87e9599f9e4b..fbac7b51736c 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -542,7 +542,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
> {
> struct drm_i915_gem_object *obj = vma->obj;
> struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> - bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
> uint64_t flags;
> int ret;
>
> @@ -560,17 +559,13 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
>
> entry->flags |= __EXEC_OBJECT_HAS_PIN;
>
> - if (has_fenced_gpu_access) {
> - if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> - ret = i915_gem_object_get_fence(obj);
> - if (ret)
> - return ret;
> -
> - if (i915_gem_object_pin_fence(obj))
> - entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> + ret = i915_gem_object_get_fence(obj);
> + if (ret)
> + return ret;
>
> - obj->pending_fenced_gpu_access = true;
> - }
> + if (i915_gem_object_pin_fence(obj))
> + entry->flags |= __EXEC_OBJECT_HAS_FENCE;
> }
>
> if (entry->offset != vma->node.start) {
> @@ -658,8 +653,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
> obj = vma->obj;
> entry = vma->exec_entry;
>
> + if (!has_fenced_gpu_access)
> + entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
> need_fence =
> - has_fenced_gpu_access &&
> entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
> obj->tiling_mode != I915_TILING_NONE;
> need_mappable = need_fence || need_reloc_mappable(vma);
> @@ -672,7 +668,6 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
>
> obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
> obj->base.pending_write_domain = 0;
> - obj->pending_fenced_gpu_access = false;
> }
> list_splice(&ordered_vmas, vmas);
>
> @@ -959,9 +954,11 @@ static void
> i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> struct intel_engine_cs *ring)
> {
> + u32 seqno = intel_ring_get_seqno(ring);
> struct i915_vma *vma;
>
> list_for_each_entry(vma, vmas, exec_list) {
> + struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
> struct drm_i915_gem_object *obj = vma->obj;
> u32 old_read = obj->base.read_domains;
> u32 old_write = obj->base.write_domain;
> @@ -970,18 +967,25 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> if (obj->base.write_domain == 0)
> obj->base.pending_read_domains |= obj->base.read_domains;
> obj->base.read_domains = obj->base.pending_read_domains;
> - obj->fenced_gpu_access = obj->pending_fenced_gpu_access;
>
> i915_vma_move_to_active(vma, ring);
> if (obj->base.write_domain) {
> obj->dirty = 1;
> - obj->last_write_seqno = intel_ring_get_seqno(ring);
> + obj->last_write_seqno = seqno;
>
> intel_fb_obj_invalidate(obj, ring);
>
> /* update for the implicit flush after a batch */
> obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> }
> + if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> + obj->last_fenced_seqno = seqno;
> + if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> + struct drm_i915_private *dev_priv = to_i915(ring->dev);
> + list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> + &dev_priv->mm.fence_list);
> + }
> + }
>
> trace_i915_gem_object_change_domain(obj, old_read, old_write);
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index cb150e8b4336..7e623bf097a1 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -376,7 +376,7 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
>
> if (ret == 0) {
> obj->fence_dirty =
> - obj->fenced_gpu_access ||
> + obj->last_fenced_seqno ||
> obj->fence_reg != I915_FENCE_REG_NONE;
>
> obj->tiling_mode = args->tiling_mode;
> --
> 2.1.0.rc1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt
2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
@ 2014-08-11 9:36 ` Daniel Vetter
2 siblings, 0 replies; 11+ messages in thread
From: Daniel Vetter @ 2014-08-11 9:36 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx
On Sat, Aug 09, 2014 at 05:37:22PM +0100, Chris Wilson wrote:
> If an object is not bound into the global GTT, then it cannot be
> accessed via the GTT. This restores the original code that was muddled
> by ppGTT. In the process, we remove a WARN that had long outlived its
> usefulness and was simply being coded around instead.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 22 +++++-----------------
> 1 file changed, 5 insertions(+), 17 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 80b807b724a6..99250d27668d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -3594,11 +3594,12 @@ int
> i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> {
> struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> + struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> uint32_t old_write_domain, old_read_domains;
> int ret;
>
> /* Not valid to be called on unbound objects. */
> - if (!i915_gem_obj_bound_any(obj))
> + if (vma == NULL)
> return -EINVAL;
>
> if (obj->base.write_domain == I915_GEM_DOMAIN_GTT)
> @@ -3640,13 +3641,9 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
> old_write_domain);
>
> /* And bump the LRU for this access */
> - if (i915_gem_object_is_inactive(obj)) {
> - struct i915_vma *vma = i915_gem_obj_to_ggtt(obj);
> - if (vma)
> - list_move_tail(&vma->mm_list,
> - &dev_priv->gtt.base.inactive_list);
> -
> - }
> + if (i915_gem_object_is_inactive(obj))
Ben will put a few more needles into his danvet voodoo doll, but I guess
this here is a fairly clear indicator that tracking the active bit on the
object instead of the vma wasn't an awesome idea ...
Anyway, patch looks good, merged.
-Daniel
> + list_move_tail(&vma->mm_list,
> + &dev_priv->gtt.base.inactive_list);
>
> return 0;
> }
> @@ -3810,9 +3807,6 @@ static bool is_pin_display(struct drm_i915_gem_object *obj)
> {
> struct i915_vma *vma;
>
> - if (list_empty(&obj->vma_list))
> - return false;
> -
> vma = i915_gem_obj_to_ggtt(obj);
> if (!vma)
> return false;
> @@ -5255,12 +5249,6 @@ struct i915_vma *i915_gem_obj_to_ggtt(struct drm_i915_gem_object *obj)
> {
> struct i915_vma *vma;
>
> - /* This WARN has probably outlived its usefulness (callers already
> - * WARN if they don't find the GGTT vma they expect). When removing,
> - * remember to remove the pre-check in is_pin_display() as well */
> - if (WARN_ON(list_empty(&obj->vma_list)))
> - return NULL;
> -
> vma = list_first_entry(&obj->vma_list, typeof(*vma), vma_link);
> if (vma->vm != obj_to_ggtt(obj))
> return NULL;
> --
> 2.1.0.rc1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer
@ 2014-01-01 14:00 Chris Wilson
2014-01-01 14:00 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
0 siblings, 1 reply; 11+ messages in thread
From: Chris Wilson @ 2014-01-01 14:00 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky
One side-effect of the introduction of ppgtt was that we needed to
rebind the object into the appropriate vm (and global gtt in some
peculiar cases). For simplicity this was done twice for every object on
every call to execbuffer. However, that adds a tremendous amount of CPU
overhead (rewriting all the PTE for all objects into WC memory) per
draw. The fix is to push all the decision about which vm to bind into
and when down into the low-level bind routines through hints rather than
inside the execbuffer routine.
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=72906
Tested-by: jianx.zhou@intel.com
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/i915/i915_drv.h | 25 +++---
drivers/gpu/drm/i915/i915_gem.c | 119 +++++++++++------------------
drivers/gpu/drm/i915/i915_gem_context.c | 9 +--
drivers/gpu/drm/i915/i915_gem_evict.c | 12 +--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 18 +++--
drivers/gpu/drm/i915/i915_gem_gtt.c | 2 +-
drivers/gpu/drm/i915/i915_trace.h | 20 ++---
drivers/gpu/drm/i915/intel_overlay.c | 2 +-
drivers/gpu/drm/i915/intel_pm.c | 2 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 19 +++--
10 files changed, 103 insertions(+), 125 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index cc8afff878b1..65379474f312 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2033,11 +2033,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
int __must_check i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
uint32_t alignment,
- bool map_and_fenceable,
- bool nonblocking);
-void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
+ unsigned flags);
+#define PIN_MAPPABLE 0x1
+#define PIN_GLOBAL 0x2
+#define PIN_NONBLOCK 0x4
int __must_check i915_vma_unbind(struct i915_vma *vma);
-int __must_check i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj);
int i915_gem_object_put_pages(struct drm_i915_gem_object *obj);
void i915_gem_release_all_mmaps(struct drm_i915_private *dev_priv);
void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
@@ -2237,13 +2237,19 @@ i915_gem_obj_ggtt_size(struct drm_i915_gem_object *obj)
static inline int __must_check
i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
uint32_t alignment,
- bool map_and_fenceable,
- bool nonblocking)
+ unsigned flags)
{
- return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment,
- map_and_fenceable, nonblocking);
+ return i915_gem_object_pin(obj, obj_to_ggtt(obj), alignment, flags | PIN_GLOBAL);
}
+static inline int
+i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
+{
+ return i915_vma_unbind(i915_gem_obj_to_ggtt(obj));
+}
+
+void i915_gem_object_ggtt_unpin(struct drm_i915_gem_object *obj);
+
/* i915_gem_context.c */
#define ctx_to_ppgtt(ctx) container_of((ctx)->vm, struct i915_hw_ppgtt, base)
int __must_check i915_gem_context_init(struct drm_device *dev);
@@ -2280,8 +2286,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
int min_size,
unsigned alignment,
unsigned cache_level,
- bool mappable,
- bool nonblock);
+ unsigned flags);
int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
int i915_gem_evict_everything(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 656406deada0..d9acfa78d1a5 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -43,12 +43,6 @@ static void i915_gem_object_flush_cpu_write_domain(struct drm_i915_gem_object *o
static __must_check int
i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
bool readonly);
-static __must_check int
-i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
- struct i915_address_space *vm,
- unsigned alignment,
- bool map_and_fenceable,
- bool nonblocking);
static int i915_gem_phys_pwrite(struct drm_device *dev,
struct drm_i915_gem_object *obj,
struct drm_i915_gem_pwrite *args,
@@ -605,7 +599,7 @@ i915_gem_gtt_pwrite_fast(struct drm_device *dev,
char __user *user_data;
int page_offset, page_length, ret;
- ret = i915_gem_obj_ggtt_pin(obj, 0, true, true);
+ ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE | PIN_NONBLOCK);
if (ret)
goto out;
@@ -1399,7 +1393,7 @@ int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
/* Now bind it into the GTT if needed */
- ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
+ ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_MAPPABLE);
if (ret)
goto unlock;
@@ -2767,7 +2761,6 @@ int i915_vma_unbind(struct i915_vma *vma)
if (!drm_mm_node_allocated(&vma->node)) {
i915_gem_vma_destroy(vma);
-
return 0;
}
@@ -2819,26 +2812,6 @@ int i915_vma_unbind(struct i915_vma *vma)
return 0;
}
-/**
- * Unbinds an object from the global GTT aperture.
- */
-int
-i915_gem_object_ggtt_unbind(struct drm_i915_gem_object *obj)
-{
- struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
- struct i915_address_space *ggtt = &dev_priv->gtt.base;
-
- if (!i915_gem_obj_ggtt_bound(obj))
- return 0;
-
- if (i915_gem_obj_to_ggtt(obj)->pin_count)
- return -EBUSY;
-
- BUG_ON(obj->pages == NULL);
-
- return i915_vma_unbind(i915_gem_obj_to_vma(obj, ggtt));
-}
-
int i915_gpu_idle(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = dev->dev_private;
@@ -3256,18 +3229,17 @@ static void i915_gem_verify_gtt(struct drm_device *dev)
/**
* Finds free space in the GTT aperture and binds the object there.
*/
-static int
+static struct i915_vma *
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
unsigned alignment,
- bool map_and_fenceable,
- bool nonblocking)
+ unsigned flags)
{
struct drm_device *dev = obj->base.dev;
drm_i915_private_t *dev_priv = dev->dev_private;
u32 size, fence_size, fence_alignment, unfenced_alignment;
size_t gtt_max =
- map_and_fenceable ? dev_priv->gtt.mappable_end : vm->total;
+ flags & PIN_MAPPABLE ? dev_priv->gtt.mappable_end : vm->total;
struct i915_vma *vma;
int ret;
@@ -3279,18 +3251,18 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
obj->tiling_mode, true);
unfenced_alignment =
i915_gem_get_gtt_alignment(dev,
- obj->base.size,
- obj->tiling_mode, false);
+ obj->base.size,
+ obj->tiling_mode, false);
if (alignment == 0)
- alignment = map_and_fenceable ? fence_alignment :
+ alignment = flags & PIN_MAPPABLE ? fence_alignment :
unfenced_alignment;
- if (map_and_fenceable && alignment & (fence_alignment - 1)) {
+ if (flags & PIN_MAPPABLE && alignment & (fence_alignment - 1)) {
DRM_ERROR("Invalid object alignment requested %u\n", alignment);
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
- size = map_and_fenceable ? fence_size : obj->base.size;
+ size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
/* If the object is bigger than the entire aperture, reject it early
* before evicting everything in a vain attempt to find space.
@@ -3298,22 +3270,20 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
if (obj->base.size > gtt_max) {
DRM_ERROR("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%zu\n",
obj->base.size,
- map_and_fenceable ? "mappable" : "total",
+ flags & PIN_MAPPABLE ? "mappable" : "total",
gtt_max);
- return -E2BIG;
+ return ERR_PTR(-E2BIG);
}
ret = i915_gem_object_get_pages(obj);
if (ret)
- return ret;
+ return ERR_PTR(ret);
i915_gem_object_pin_pages(obj);
vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
- if (IS_ERR(vma)) {
- ret = PTR_ERR(vma);
+ if (IS_ERR(vma))
goto err_unpin;
- }
search_free:
ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
@@ -3322,9 +3292,7 @@ search_free:
DRM_MM_SEARCH_DEFAULT);
if (ret) {
ret = i915_gem_evict_something(dev, vm, size, alignment,
- obj->cache_level,
- map_and_fenceable,
- nonblocking);
+ obj->cache_level, flags);
if (ret == 0)
goto search_free;
@@ -3355,19 +3323,23 @@ search_free:
obj->map_and_fenceable = mappable && fenceable;
}
- WARN_ON(map_and_fenceable && !obj->map_and_fenceable);
+ WARN_ON(flags & PIN_MAPPABLE && !obj->map_and_fenceable);
+
+ trace_i915_vma_bind(vma, flags);
+ vma->bind_vma(vma, obj->cache_level,
+ flags & (PIN_MAPPABLE | PIN_GLOBAL) ? GLOBAL_BIND : 0);
- trace_i915_vma_bind(vma, map_and_fenceable);
i915_gem_verify_gtt(dev);
- return 0;
+ return vma;
err_remove_node:
drm_mm_remove_node(&vma->node);
err_free_vma:
i915_gem_vma_destroy(vma);
+ vma = ERR_PTR(ret);
err_unpin:
i915_gem_object_unpin_pages(obj);
- return ret;
+ return vma;
}
bool
@@ -3559,7 +3531,9 @@ int i915_gem_object_set_cache_level(struct drm_i915_gem_object *obj,
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
- vma->bind_vma(vma, cache_level, 0);
+ if (drm_mm_node_allocated(&vma->node))
+ vma->bind_vma(vma, cache_level,
+ obj->has_global_gtt_mapping ? GLOBAL_BIND : 0);
}
list_for_each_entry(vma, &obj->vma_list, vma_link)
@@ -3728,7 +3702,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
* (e.g. libkms for the bootup splash), we have to ensure that we
* always use map_and_fenceable for all scanout buffers.
*/
- ret = i915_gem_obj_ggtt_pin(obj, alignment, true, false);
+ ret = i915_gem_obj_ggtt_pin(obj, alignment, PIN_MAPPABLE);
if (ret)
goto err_unpin_display;
@@ -3884,52 +3858,49 @@ int
i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
uint32_t alignment,
- bool map_and_fenceable,
- bool nonblocking)
+ unsigned flags)
{
- const u32 flags = map_and_fenceable ? GLOBAL_BIND : 0;
struct i915_vma *vma;
int ret;
- WARN_ON(map_and_fenceable && !i915_is_ggtt(vm));
+ if (WARN_ON(flags & (PIN_GLOBAL | PIN_MAPPABLE) && !i915_is_ggtt(vm)))
+ return -EINVAL;
vma = i915_gem_obj_to_vma(obj, vm);
-
if (vma) {
if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
return -EBUSY;
if ((alignment &&
vma->node.start & (alignment - 1)) ||
- (map_and_fenceable && !obj->map_and_fenceable)) {
+ (flags & PIN_MAPPABLE && !obj->map_and_fenceable)) {
WARN(vma->pin_count,
"bo is already pinned with incorrect alignment:"
" offset=%lx, req.alignment=%x, req.map_and_fenceable=%d,"
" obj->map_and_fenceable=%d\n",
i915_gem_obj_offset(obj, vm), alignment,
- map_and_fenceable,
+ flags & PIN_MAPPABLE,
obj->map_and_fenceable);
ret = i915_vma_unbind(vma);
if (ret)
return ret;
+
+ vma = NULL;
}
}
- if (!i915_gem_obj_bound(obj, vm)) {
- ret = i915_gem_object_bind_to_vm(obj, vm, alignment,
- map_and_fenceable,
- nonblocking);
- if (ret)
- return ret;
-
+ if (vma == NULL || !drm_mm_node_allocated(&vma->node)) {
+ vma = i915_gem_object_bind_to_vm(obj, vm, alignment, flags);
+ if (IS_ERR(vma))
+ return PTR_ERR(vma);
}
- vma = i915_gem_obj_to_vma(obj, vm);
-
- vma->bind_vma(vma, obj->cache_level, flags);
+ if (flags & PIN_GLOBAL && !obj->has_global_gtt_mapping)
+ vma->bind_vma(vma, obj->cache_level, GLOBAL_BIND);
- i915_gem_obj_to_vma(obj, vm)->pin_count++;
- obj->pin_mappable |= map_and_fenceable;
+ vma->pin_count++;
+ if (flags & PIN_MAPPABLE)
+ obj->pin_mappable |= true;
return 0;
}
@@ -3987,7 +3958,7 @@ i915_gem_pin_ioctl(struct drm_device *dev, void *data,
}
if (obj->user_pin_count == 0) {
- ret = i915_gem_obj_ggtt_pin(obj, args->alignment, true, false);
+ ret = i915_gem_obj_ggtt_pin(obj, args->alignment, PIN_MAPPABLE);
if (ret)
goto out;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ebe0f67eac08..b428b101f9d0 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -282,8 +282,7 @@ i915_gem_create_context(struct drm_device *dev,
* context.
*/
ret = i915_gem_obj_ggtt_pin(ctx->obj,
- get_context_alignment(dev),
- false, false);
+ get_context_alignment(dev), 0);
if (ret) {
DRM_DEBUG_DRIVER("Couldn't pin %d\n", ret);
goto err_destroy;
@@ -336,8 +335,7 @@ void i915_gem_context_reset(struct drm_device *dev)
if (i == RCS) {
WARN_ON(i915_gem_obj_ggtt_pin(dctx->obj,
- get_context_alignment(dev),
- false, false));
+ get_context_alignment(dev), 0));
/* Fake a finish/inactive */
dctx->obj->base.write_domain = 0;
dctx->obj->active = 0;
@@ -600,8 +598,7 @@ static int do_switch(struct intel_ring_buffer *ring,
/* Trying to pin first makes error handling easier. */
if (ring == &dev_priv->ring[RCS]) {
ret = i915_gem_obj_ggtt_pin(to->obj,
- get_context_alignment(ring->dev),
- false, false);
+ get_context_alignment(ring->dev), 0);
if (ret)
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 54413ed46a46..fed72a67afd2 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -48,14 +48,14 @@ mark_free(struct i915_vma *vma, struct list_head *unwind)
int
i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
int min_size, unsigned alignment, unsigned cache_level,
- bool mappable, bool nonblocking)
+ unsigned flags)
{
drm_i915_private_t *dev_priv = dev->dev_private;
struct list_head eviction_list, unwind_list;
struct i915_vma *vma;
int ret = 0;
- trace_i915_gem_evict(dev, min_size, alignment, mappable);
+ trace_i915_gem_evict(dev, min_size, alignment, flags);
/*
* The goal is to evict objects and amalgamate space in LRU order.
@@ -81,7 +81,7 @@ i915_gem_evict_something(struct drm_device *dev, struct i915_address_space *vm,
*/
INIT_LIST_HEAD(&unwind_list);
- if (mappable) {
+ if (flags & PIN_MAPPABLE) {
BUG_ON(!i915_is_ggtt(vm));
drm_mm_init_scan_with_range(&vm->mm, min_size,
alignment, cache_level, 0,
@@ -96,7 +96,7 @@ search_again:
goto found;
}
- if (nonblocking)
+ if (flags & PIN_NONBLOCK)
goto none;
/* Now merge in the soon-to-be-expired objects... */
@@ -120,13 +120,13 @@ none:
/* Can we unpin some objects such as idle hw contents,
* or pending flips?
*/
- ret = nonblocking ? -ENOSPC : i915_gpu_idle(dev);
+ ret = flags & PIN_NONBLOCK ? -ENOSPC : i915_gpu_idle(dev);
if (ret)
return ret;
/* Only idle the GPU and repeat the search once */
i915_gem_retire_requests(dev);
- nonblocking = true;
+ flags |= PIN_NONBLOCK;
goto search_again;
found:
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bbff8f9b6549..cd09d42f507b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -544,19 +544,23 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
- bool need_fence, need_mappable;
- u32 flags = (entry->flags & EXEC_OBJECT_NEEDS_GTT) &&
- !vma->obj->has_global_gtt_mapping ? GLOBAL_BIND : 0;
+ bool need_fence;
+ unsigned flags;
int ret;
+ flags = 0;
+
need_fence =
has_fenced_gpu_access &&
entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
+ if (need_fence || need_reloc_mappable(vma))
+ flags |= PIN_MAPPABLE;
- ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable,
- false);
+ if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
+ flags |= PIN_GLOBAL;
+
+ ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
if (ret)
return ret;
@@ -585,8 +589,6 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
}
- vma->bind_vma(vma, obj->cache_level, flags);
-
return 0;
}
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 998f9a0b322a..d113eb5e2f5b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -865,7 +865,7 @@ alloc:
if (ret == -ENOSPC && !retried) {
ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
GEN6_PD_SIZE, GEN6_PD_ALIGN,
- I915_CACHE_NONE, false, true);
+ I915_CACHE_NONE, 0);
if (ret)
return ret;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 6e580c98dede..b95a380958db 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -34,15 +34,15 @@ TRACE_EVENT(i915_gem_object_create,
);
TRACE_EVENT(i915_vma_bind,
- TP_PROTO(struct i915_vma *vma, bool mappable),
- TP_ARGS(vma, mappable),
+ TP_PROTO(struct i915_vma *vma, unsigned flags),
+ TP_ARGS(vma, flags),
TP_STRUCT__entry(
__field(struct drm_i915_gem_object *, obj)
__field(struct i915_address_space *, vm)
__field(u32, offset)
__field(u32, size)
- __field(bool, mappable)
+ __field(unsigned, flags)
),
TP_fast_assign(
@@ -50,12 +50,12 @@ TRACE_EVENT(i915_vma_bind,
__entry->vm = vma->vm;
__entry->offset = vma->node.start;
__entry->size = vma->node.size;
- __entry->mappable = mappable;
+ __entry->flags = flags;
),
TP_printk("obj=%p, offset=%08x size=%x%s vm=%p",
__entry->obj, __entry->offset, __entry->size,
- __entry->mappable ? ", mappable" : "",
+ __entry->flags & PIN_MAPPABLE ? ", mappable" : "",
__entry->vm)
);
@@ -196,26 +196,26 @@ DEFINE_EVENT(i915_gem_object, i915_gem_object_destroy,
);
TRACE_EVENT(i915_gem_evict,
- TP_PROTO(struct drm_device *dev, u32 size, u32 align, bool mappable),
- TP_ARGS(dev, size, align, mappable),
+ TP_PROTO(struct drm_device *dev, u32 size, u32 align, unsigned flags),
+ TP_ARGS(dev, size, align, flags),
TP_STRUCT__entry(
__field(u32, dev)
__field(u32, size)
__field(u32, align)
- __field(bool, mappable)
+ __field(unsigned, flags)
),
TP_fast_assign(
__entry->dev = dev->primary->index;
__entry->size = size;
__entry->align = align;
- __entry->mappable = mappable;
+ __entry->flags = flags;
),
TP_printk("dev=%d, size=%d, align=%d %s",
__entry->dev, __entry->size, __entry->align,
- __entry->mappable ? ", mappable" : "")
+ __entry->flags & PIN_MAPPABLE ? ", mappable" : "")
);
TRACE_EVENT(i915_gem_evict_everything,
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index a1397b1646af..1d46608bbd8b 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -1349,7 +1349,7 @@ void intel_setup_overlay(struct drm_device *dev)
}
overlay->flip_addr = reg_bo->phys_obj->handle->busaddr;
} else {
- ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, true, false);
+ ret = i915_gem_obj_ggtt_pin(reg_bo, PAGE_SIZE, PIN_MAPPABLE);
if (ret) {
DRM_ERROR("failed to pin overlay register bo\n");
goto out_free_bo;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 469170c11bb4..e29804eda113 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -2753,7 +2753,7 @@ intel_alloc_context_page(struct drm_device *dev)
return NULL;
}
- ret = i915_gem_obj_ggtt_pin(ctx, 4096, true, false);
+ ret = i915_gem_obj_ggtt_pin(ctx, 4096, 0);
if (ret) {
DRM_ERROR("failed to pin power context: %d\n", ret);
goto err_unref;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 442c9a66ebab..5744841669e4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -531,9 +531,11 @@ init_pipe_control(struct intel_ring_buffer *ring)
goto err;
}
- i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
+ ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
+ if (ret)
+ goto err_unref;
- ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, true, false);
+ ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
if (ret)
goto err_unref;
@@ -1269,12 +1271,13 @@ static int init_status_page(struct intel_ring_buffer *ring)
goto err;
}
- i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+ ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
+ if (ret)
+ goto err_unref;
- ret = i915_gem_obj_ggtt_pin(obj, 4096, true, false);
- if (ret != 0) {
+ ret = i915_gem_obj_ggtt_pin(obj, 4096, 0);
+ if (ret)
goto err_unref;
- }
ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
@@ -1354,7 +1357,7 @@ static int intel_init_ring_buffer(struct drm_device *dev,
ring->obj = obj;
- ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, true, false);
+ ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
if (ret)
goto err_unref;
@@ -1927,7 +1930,7 @@ int intel_init_render_ring_buffer(struct drm_device *dev)
return -ENOMEM;
}
- ret = i915_gem_obj_ggtt_pin(obj, 0, true, false);
+ ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
if (ret != 0) {
drm_gem_object_unreference(&obj->base);
DRM_ERROR("Failed to ping batch bo\n");
--
1.8.5.2
^ permalink raw reply related [flat|nested] 11+ messages in thread* [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt
2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
@ 2014-01-01 14:00 ` Chris Wilson
0 siblings, 0 replies; 11+ messages in thread
From: Chris Wilson @ 2014-01-01 14:00 UTC (permalink / raw)
To: intel-gfx; +Cc: Daniel Vetter, Ben Widawsky
With ppgtt, it is no longer correct to mark an object as
map_and_fenceable if we simply unbind it from the global gtt. This has
consequences during execbuffer where we simply use
obj->map_and_fenceable in use_cpu_reloc() to decide which access method
to use for writing into the object. Now for a ppgtt object,
map_and_fenceable will be true when it is not bound into the ggtt but
only in a ppgtt, leading to an invalid access on a non-llc architecture.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Ben Widawsky <benjamin.widawsky@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
---
drivers/gpu/drm/i915/i915_gem.c | 5 +-
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 92 ++++++++++++++----------------
drivers/gpu/drm/i915/i915_gem_tiling.c | 7 +--
3 files changed, 47 insertions(+), 57 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d9acfa78d1a5..344bb47adaeb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2791,9 +2791,8 @@ int i915_vma_unbind(struct i915_vma *vma)
i915_gem_gtt_finish_object(obj);
list_del(&vma->mm_list);
- /* Avoid an unnecessary call to unbind on rebind. */
if (i915_is_ggtt(vma->vm))
- obj->map_and_fenceable = true;
+ obj->map_and_fenceable = false;
drm_mm_remove_node(&vma->node);
i915_gem_vma_destroy(vma);
@@ -4114,8 +4113,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
obj->fence_reg = I915_FENCE_REG_NONE;
obj->madv = I915_MADV_WILLNEED;
- /* Avoid an unnecessary call to unbind on the first bind. */
- obj->map_and_fenceable = true;
i915_gem_info_add_obj(obj->base.dev->dev_private, obj->base.size);
}
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index cd09d42f507b..26b57f1135c9 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -35,6 +35,7 @@
#define __EXEC_OBJECT_HAS_PIN (1<<31)
#define __EXEC_OBJECT_HAS_FENCE (1<<30)
+#define __EXEC_OBJECT_NEEDS_MAP (1<<29)
struct eb_vmas {
struct list_head vmas;
@@ -529,14 +530,6 @@ i915_gem_execbuffer_relocate(struct eb_vmas *eb)
}
static int
-need_reloc_mappable(struct i915_vma *vma)
-{
- struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
- i915_is_ggtt(vma->vm);
-}
-
-static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_ring_buffer *ring,
bool *need_reloc)
@@ -544,19 +537,12 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
- bool need_fence;
unsigned flags;
int ret;
flags = 0;
-
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- if (need_fence || need_reloc_mappable(vma))
+ if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_MAPPABLE;
-
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
@@ -592,6 +578,27 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
return 0;
}
+static bool
+need_reloc_mappable(struct i915_vma *vma)
+{
+ struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
+
+ if (entry->relocation_count == 0)
+ return false;
+
+ if (!i915_is_ggtt(vma->vm))
+ return false;
+
+ /* See also use_cpu_reloc() */
+ if (HAS_LLC(vma->obj->base.dev))
+ return false;
+
+ if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
+ return false;
+
+ return true;
+}
+
static int
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
struct list_head *vmas,
@@ -624,9 +631,10 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
obj->tiling_mode != I915_TILING_NONE;
need_mappable = need_fence || need_reloc_mappable(vma);
- if (need_mappable)
+ if (need_mappable) {
+ entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
list_move(&vma->exec_list, &ordered_vmas);
- else
+ } else
list_move_tail(&vma->exec_list, &ordered_vmas);
obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
@@ -654,25 +662,14 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
/* Unbind any ill-fitting objects or pin. */
list_for_each_entry(vma, vmas, exec_list) {
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
- bool need_fence, need_mappable;
obj = vma->obj;
if (!drm_mm_node_allocated(&vma->node))
continue;
- need_fence =
- has_fenced_gpu_access &&
- entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
- obj->tiling_mode != I915_TILING_NONE;
- need_mappable = need_fence || need_reloc_mappable(vma);
-
- WARN_ON((need_mappable || need_fence) &&
- !i915_is_ggtt(vma->vm));
-
- if ((entry->alignment &&
- vma->node.start & (entry->alignment - 1)) ||
- (need_mappable && !obj->map_and_fenceable))
+ if ((entry->alignment && vma->node.start & (entry->alignment - 1)) ||
+ (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable))
ret = i915_vma_unbind(vma);
else
ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
@@ -1185,33 +1182,28 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
- if (flags & I915_DISPATCH_SECURE &&
- !batch_obj->has_global_gtt_mapping) {
- /* When we have multiple VMs, we'll need to make sure that we
- * allocate space first */
- struct i915_vma *vma = i915_gem_obj_to_ggtt(batch_obj);
- BUG_ON(!vma);
- vma->bind_vma(vma, batch_obj->cache_level, GLOBAL_BIND);
- }
+ if (flags & I915_DISPATCH_SECURE) {
+ ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
+ if (ret)
+ goto err;
- if (flags & I915_DISPATCH_SECURE)
exec_start += i915_gem_obj_ggtt_offset(batch_obj);
- else
+ } else
exec_start += i915_gem_obj_offset(batch_obj, vm);
ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
if (ret)
- goto err;
+ goto err_batch;
ret = i915_switch_context(ring, file, ctx);
if (ret)
- goto err;
+ goto err_batch;
if (ring == &dev_priv->ring[RCS] &&
mode != dev_priv->relative_constants_mode) {
ret = intel_ring_begin(ring, 4);
if (ret)
- goto err;
+ goto err_batch;
intel_ring_emit(ring, MI_NOOP);
intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
@@ -1225,30 +1217,29 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
ret = i915_reset_gen7_sol_offsets(dev, ring);
if (ret)
- goto err;
+ goto err_batch;
}
-
exec_len = args->batch_len;
if (cliprects) {
for (i = 0; i < args->num_cliprects; i++) {
ret = i915_emit_box(dev, &cliprects[i],
args->DR1, args->DR4);
if (ret)
- goto err;
+ goto err_batch;
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
- goto err;
+ goto err_batch;
}
} else {
ret = ring->dispatch_execbuffer(ring,
exec_start, exec_len,
flags);
if (ret)
- goto err;
+ goto err_batch;
}
trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);
@@ -1256,6 +1247,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
+err_batch:
+ if (flags & I915_DISPATCH_SECURE)
+ i915_gem_object_ggtt_unpin(batch_obj);
err:
/* the request owns the ref now */
i915_gem_context_unreference(ctx);
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index eb993584aa6b..e49c662ca9c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -359,10 +359,9 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
*/
obj->map_and_fenceable =
- !i915_gem_obj_ggtt_bound(obj) ||
- (i915_gem_obj_ggtt_offset(obj) +
- obj->base.size <= dev_priv->gtt.mappable_end &&
- i915_gem_object_fence_ok(obj, args->tiling_mode));
+ i915_gem_obj_ggtt_bound(obj) &&
+ i915_gem_obj_ggtt_offset(obj) + obj->base.size <= dev_priv->gtt.mappable_end &&
+ i915_gem_object_fence_ok(obj, args->tiling_mode);
/* Rebind if we need a change of alignment */
if (!obj->map_and_fenceable) {
--
1.8.5.2
^ permalink raw reply related [flat|nested] 11+ messages in thread
end of thread, other threads:[~2014-08-11 10:20 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-08-09 16:37 [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Chris Wilson
2014-08-09 16:37 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
2014-08-09 19:29 ` Ben Widawsky
2014-08-10 6:55 ` Chris Wilson
2014-08-10 7:04 ` Chris Wilson
2014-08-10 7:06 ` [PATCH] drm/i915: Force CPU relocations if not GTT mapped Chris Wilson
2014-08-11 10:02 ` Daniel Vetter
2014-08-09 16:37 ` [PATCH 3/3] drm/i915: Remove fenced_gpu_access and pending_fenced_gpu_access Chris Wilson
2014-08-11 10:20 ` Daniel Vetter
2014-08-11 9:36 ` [PATCH 1/3] drm/i915: Only perform set-to-gtt domain for objects bound to the global gtt Daniel Vetter
-- strict thread matches above, loose matches on Subject: below --
2014-01-01 14:00 [PATCH 1/3] drm/i915: Only bind each object rather than for every execbuffer Chris Wilson
2014-01-01 14:00 ` [PATCH 2/3] drm/i915: Fix obj->map_and_fenceable for ppgtt Chris Wilson
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox