* [PATCH RFC] drm/i915: Pad GTT views of exec objects up to user specified size
@ 2015-03-25 13:29 Chris Wilson
2015-03-25 15:16 ` Tvrtko Ursulin
2015-03-25 20:01 ` shuang.he
0 siblings, 2 replies; 3+ messages in thread
From: Chris Wilson @ 2015-03-25 13:29 UTC (permalink / raw)
To: intel-gfx
Our GPUs impose certain requirements upon buffers that depend upon how
exactly they are used. Typically this is expressed as that they require
a larger surface than would be naively computed by pitch * height.
Normally such requirements are hidden away in the userspace driver, but
when we accept pointers from strangers and later impose extra conditions
on them, the original client allocator has no idea about the
monstrosities in the GPU and we require the userspace driver to inform
the kernel how many padding pages are required beyond the client
allocation.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
Tvrtko is working on an igt to exercise this feature. The original
request basically stems from trying to mix client allocations (userptr)
and tiling. But I see it as a nice extension of our lazy_fencing code to
reduce buffer sizes for tiled allocations (as we then do not need to
allocate the entire last tilerow) and for relaxing restrictions on
foriegn buffers like DRI3.
---
drivers/gpu/drm/i915/i915_drv.h | 6 +++--
drivers/gpu/drm/i915/i915_gem.c | 41 +++++++++++++++++++-----------
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 16 +++++++++++-
include/uapi/drm/i915_drm.h | 5 ++--
4 files changed, 48 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d1f12ccce4fa..0beac0062942 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2631,11 +2631,13 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
int __must_check
i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
+ uint32_t size,
uint32_t alignment,
uint64_t flags);
int __must_check
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
+ uint32_t size,
uint32_t alignment,
uint64_t flags);
@@ -2896,8 +2898,8 @@ i915_gem_obj_ggtt_pin(struct drm_i915_gem_object *obj,
uint32_t alignment,
unsigned flags)
{
- return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj),
- alignment, flags | PIN_GLOBAL);
+ return i915_gem_object_pin(obj, i915_obj_to_ggtt(obj), 0, alignment,
+ flags | PIN_GLOBAL);
}
static inline int
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index f47aa7e205b9..5e86dee12759 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3540,12 +3540,13 @@ static struct i915_vma *
i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *ggtt_view,
+ uint32_t size,
unsigned alignment,
uint64_t flags)
{
struct drm_device *dev = obj->base.dev;
struct drm_i915_private *dev_priv = dev->dev_private;
- u32 size, fence_size, fence_alignment, unfenced_alignment;
+ u32 fence_size, fence_alignment, unfenced_alignment;
unsigned long start =
flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
unsigned long end =
@@ -3575,14 +3576,16 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
return ERR_PTR(-EINVAL);
}
- size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+ size = max_t(u32, size, obj->base.size);
+ if (flags & PIN_MAPPABLE)
+ size = max_t(u32, size, fence_size);
/* If the object is bigger than the entire aperture, reject it early
* before evicting everything in a vain attempt to find space.
*/
- if (obj->base.size > end) {
- DRM_DEBUG("Attempting to bind an object larger than the aperture: object=%zd > %s aperture=%lu\n",
- obj->base.size,
+ if (size > end) {
+ DRM_DEBUG("Attempting to bind an object larger than the aperture: requested=%u [object=%zd] > %s aperture=%lu\n",
+ size, obj->base.size,
flags & PIN_MAPPABLE ? "mappable" : "total",
end);
return ERR_PTR(-E2BIG);
@@ -4023,7 +4026,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
* put it anyway and hope that userspace can cope (but always first
* try to preserve the existing ABI).
*/
- ret = i915_gem_object_ggtt_pin(obj, view, alignment,
+ ret = i915_gem_object_ggtt_pin(obj, view, 0, alignment,
view->type == I915_GGTT_VIEW_NORMAL ?
PIN_MAPPABLE : 0);
if (ret)
@@ -4187,12 +4190,17 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
}
static bool
-i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+i915_vma_misplaced(struct i915_vma *vma,
+ uint32_t size,
+ uint32_t alignment,
+ uint64_t flags)
{
struct drm_i915_gem_object *obj = vma->obj;
- if (alignment &&
- vma->node.start & (alignment - 1))
+ if (vma->node.size < size)
+ return true;
+
+ if (alignment && vma->node.start & (alignment - 1))
return true;
if (flags & PIN_MAPPABLE && !obj->map_and_fenceable)
@@ -4209,6 +4217,7 @@ static int
i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
const struct i915_ggtt_view *ggtt_view,
+ uint32_t size,
uint32_t alignment,
uint64_t flags)
{
@@ -4239,7 +4248,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
if (WARN_ON(vma->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT))
return -EBUSY;
- if (i915_vma_misplaced(vma, alignment, flags)) {
+ if (i915_vma_misplaced(vma, size, alignment, flags)) {
unsigned long offset;
offset = ggtt_view ? i915_gem_obj_ggtt_offset_view(obj, ggtt_view->type) :
i915_gem_obj_offset(obj, vm);
@@ -4266,8 +4275,8 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
* means we must do a context switch before the GPU can
* accurately read some of the VMAs.
*/
- vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view, alignment,
- flags);
+ vma = i915_gem_object_bind_to_vm(obj, vm, ggtt_view,
+ size, alignment, flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
}
@@ -4290,7 +4299,7 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
obj->tiling_mode,
true);
- fenceable = (vma->node.size == fence_size &&
+ fenceable = (vma->node.size >= fence_size &&
(vma->node.start & (fence_alignment - 1)) == 0);
mappable = (vma->node.start + fence_size <=
@@ -4311,17 +4320,19 @@ i915_gem_object_do_pin(struct drm_i915_gem_object *obj,
int
i915_gem_object_pin(struct drm_i915_gem_object *obj,
struct i915_address_space *vm,
+ uint32_t size,
uint32_t alignment,
uint64_t flags)
{
return i915_gem_object_do_pin(obj, vm,
i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL,
- alignment, flags);
+ size, alignment, flags);
}
int
i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
const struct i915_ggtt_view *view,
+ uint32_t size,
uint32_t alignment,
uint64_t flags)
{
@@ -4329,7 +4340,7 @@ i915_gem_object_ggtt_pin(struct drm_i915_gem_object *obj,
return -EINVAL;
return i915_gem_object_do_pin(obj, i915_obj_to_ggtt(obj), view,
- alignment, flags | PIN_GLOBAL);
+ size, alignment, flags | PIN_GLOBAL);
}
void
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 7f3d78e0eec2..d39bc24c6d1f 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -595,10 +595,14 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
}
- ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+ ret = i915_gem_object_pin(obj, vma->vm,
+ entry->pad_to_size,
+ entry->alignment,
+ flags);
if ((ret == -ENOSPC || ret == -E2BIG) &&
only_mappable_for_reloc(entry->flags))
ret = i915_gem_object_pin(obj, vma->vm,
+ entry->pad_to_size,
entry->alignment,
flags & ~(PIN_GLOBAL | PIN_MAPPABLE));
if (ret)
@@ -662,6 +666,9 @@ eb_vma_misplaced(struct i915_vma *vma)
vma->node.start & (entry->alignment - 1))
return true;
+ if (vma->node.size < entry->pad_to_size)
+ return true;
+
if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
vma->node.start < BATCH_OFFSET_BIAS)
return true;
@@ -948,6 +955,13 @@ validate_exec_list(struct drm_device *dev,
if (exec[i].flags & invalid_flags)
return -EINVAL;
+ /* pad_to_size was once a reserved field, so sanitize it */
+ if (exec[i].flags & EXEC_OBJECT_PAD_TO_SIZE) {
+ if (offset_in_page(exec[i].pad_to_size))
+ return -EINVAL;
+ } else
+ exec[i].pad_to_size = 0;
+
/* First check for malicious input causing overflow in
* the worst case where we need to allocate the entire
* relocation tree as a single array.
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 8d1be9073380..94e9a8fb80bf 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -680,10 +680,11 @@ struct drm_i915_gem_exec_object2 {
#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
#define EXEC_OBJECT_NEEDS_GTT (1<<1)
#define EXEC_OBJECT_WRITE (1<<2)
-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
+#define EXEC_OBJECT_PAD_TO_SIZE (1<<3)
+#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PAD_TO_SIZE<<1)
__u64 flags;
- __u64 rsvd1;
+ __u64 pad_to_size;
__u64 rsvd2;
};
--
2.1.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH RFC] drm/i915: Pad GTT views of exec objects up to user specified size
2015-03-25 13:29 [PATCH RFC] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
@ 2015-03-25 15:16 ` Tvrtko Ursulin
2015-03-25 20:01 ` shuang.he
1 sibling, 0 replies; 3+ messages in thread
From: Tvrtko Ursulin @ 2015-03-25 15:16 UTC (permalink / raw)
To: Chris Wilson, intel-gfx
On 03/25/2015 01:29 PM, Chris Wilson wrote:
> Our GPUs impose certain requirements upon buffers that depend upon how
> exactly they are used. Typically this is expressed as that they require
> a larger surface than would be naively computed by pitch * height.
> Normally such requirements are hidden away in the userspace driver, but
> when we accept pointers from strangers and later impose extra conditions
> on them, the original client allocator has no idea about the
> monstrosities in the GPU and we require the userspace driver to inform
> the kernel how many padding pages are required beyond the client
> allocation.
>
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> Tvrtko is working on an igt to exercise this feature. The original
> request basically stems from trying to mix client allocations (userptr)
> and tiling. But I see it as a nice extension of our lazy_fencing code to
> reduce buffer sizes for tiled allocations (as we then do not need to
> allocate the entire last tilerow) and for relaxing restrictions on
> foriegn buffers like DRI3.
> ---
(Original request was actually about sampler overfetch in various
texture modes.)
We talked about per-process (or so) scratch pages before. I looked now
but I see at the moment even PPGTT shares the same scratch page so
presumably that means someone has thought about security and decided
that is fine?
Otherwise looks good to me.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH RFC] drm/i915: Pad GTT views of exec objects up to user specified size
2015-03-25 13:29 [PATCH RFC] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
2015-03-25 15:16 ` Tvrtko Ursulin
@ 2015-03-25 20:01 ` shuang.he
1 sibling, 0 replies; 3+ messages in thread
From: shuang.he @ 2015-03-25 20:01 UTC (permalink / raw)
To: shuang.he, ethan.gao, intel-gfx, chris
Tested-By: PRC QA PRTS (Patch Regression Test System Contact: shuang.he@intel.com)
Task id: 6050
-------------------------------------Summary-------------------------------------
Platform Delta drm-intel-nightly Series Applied
PNV -2 269/269 267/269
ILK 303/303 303/303
SNB -1 304/304 303/304
IVB 338/338 338/338
BYT 287/287 287/287
HSW -1 362/362 361/362
BDW 310/310 310/310
-------------------------------------Detailed-------------------------------------
Platform Test drm-intel-nightly Series Applied
PNV igt@gem_userptr_blits@coherency-sync CRASH(4)PASS(3) CRASH(2)
PNV igt@gen3_render_linear_blits FAIL(2)PASS(2) FAIL(1)
*SNB igt@pm_rpm@fences-dpms PASS(2) DMESG_WARN(1)PASS(1)
*HSW igt@gem_pwrite_pread@snooped-copy-performance PASS(2) DMESG_WARN(1)PASS(1)
Note: You need to pay more attention to line start with '*'
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2015-03-25 20:01 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-03-25 13:29 [PATCH RFC] drm/i915: Pad GTT views of exec objects up to user specified size Chris Wilson
2015-03-25 15:16 ` Tvrtko Ursulin
2015-03-25 20:01 ` shuang.he
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox