* [PATCH 0/3] Reduce the time for which 'struct_mutex' is held
@ 2015-08-24 11:58 ankitprasad.r.sharma
2015-08-24 11:58 ` [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages ankitprasad.r.sharma
` (3 more replies)
0 siblings, 4 replies; 15+ messages in thread
From: ankitprasad.r.sharma @ 2015-08-24 11:58 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
We are trying to reduce the time for which the global 'struct_mutex'
is locked. Execbuffer ioctl is one place where it is generally held
for the longest time. And sometimes because of this occasional
glitches/flickers are observed in 60 fps playback (due to miss of
V-blank intervals) as page flip calls gets blocked/delayed because the
'struct_mutex' is already locked.
For this, we have exposed two new flags in GEM_CREATE ioctl, to pre-populate
the object with system memory pages and also do an immediate clflush for the
new pages.
The third patch too tries to reduce the 'struct_mutex' lock time by
moving only those objects to CPU domain in put_pages(), that can either
be used in the future or had a CPU mapping.
This series is based on an earlier series of Stolen Memory patches,
extending the GEM_CREATE ioctl further
http://lists.freedesktop.org/archives/intel-gfx/2015-July/072199.html
Ankitprasad Sharma (2):
drm/i915: Support for pre-populating the object with system pages
drm/i915: Support for the clflush of pre-populated pages
Chris Wilson (1):
drm/i915: Only move to the CPU write domain if keeping the GTT pages
drivers/gpu/drm/i915/i915_dma.c | 2 +-
drivers/gpu/drm/i915/i915_drv.h | 5 ++
drivers/gpu/drm/i915/i915_gem.c | 116 ++++++++++++++++++++++++++++++----------
include/uapi/drm/i915_drm.h | 12 ++---
4 files changed, 101 insertions(+), 34 deletions(-)
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages
2015-08-24 11:58 [PATCH 0/3] Reduce the time for which 'struct_mutex' is held ankitprasad.r.sharma
@ 2015-08-24 11:58 ` ankitprasad.r.sharma
2015-08-24 12:35 ` Chris Wilson
2015-08-25 10:51 ` Siluvery, Arun
2015-08-24 11:58 ` [PATCH 2/3] drm/i915: Support for the clflush of pre-populated pages ankitprasad.r.sharma
` (2 subsequent siblings)
3 siblings, 2 replies; 15+ messages in thread
From: ankitprasad.r.sharma @ 2015-08-24 11:58 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch provides support for the User to populate the object
with system pages at its creation time. Since this can be safely
performed without holding the 'struct_mutex', it would help to reduce
the time 'struct_mutex' is kept locked especially during the exec-buffer
path, where it is generally held for the longest time.
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++----------
include/uapi/drm/i915_drm.h | 11 ++++-----
3 files changed, 45 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8319e07..955aa16 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = HAS_RESOURCE_STREAMER(dev);
break;
case I915_PARAM_CREATE_VERSION:
- value = 2;
+ value = 3;
break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c44bd05..3904feb 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,6 +46,7 @@ static void
i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
static void
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
+static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
static bool cpu_cache_is_coherent(struct drm_device *dev,
enum i915_cache_level level)
@@ -414,6 +415,18 @@ i915_gem_create(struct drm_file *file,
if (obj == NULL)
return -ENOMEM;
+ if (flags & I915_CREATE_POPULATE) {
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ ret = __i915_gem_object_get_pages(obj);
+ if (ret)
+ return ret;
+
+ mutex_lock(&dev->struct_mutex);
+ list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+ mutex_unlock(&dev->struct_mutex);
+ }
+
ret = drm_gem_handle_create(file, &obj->base, &handle);
/* drop reference from allocate - handle holds it now */
drm_gem_object_unreference_unlocked(&obj->base);
@@ -2328,6 +2341,31 @@ err_pages:
return ret;
}
+static int
+__i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+ const struct drm_i915_gem_object_ops *ops = obj->ops;
+ int ret;
+
+ WARN_ON(obj->pages);
+
+ if (obj->madv != I915_MADV_WILLNEED) {
+ DRM_DEBUG("Attempting to obtain a purgeable object\n");
+ return -EFAULT;
+ }
+
+ BUG_ON(obj->pages_pin_count);
+
+ ret = ops->get_pages(obj);
+ if (ret)
+ return ret;
+
+ obj->get_page.sg = obj->pages->sgl;
+ obj->get_page.last = 0;
+
+ return 0;
+}
+
/* Ensure that the associated pages are gathered from the backing storage
* and pinned into our object. i915_gem_object_get_pages() may be called
* multiple times before they are released by a single call to
@@ -2339,28 +2377,17 @@ int
i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
{
struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
- const struct drm_i915_gem_object_ops *ops = obj->ops;
int ret;
if (obj->pages)
return 0;
- if (obj->madv != I915_MADV_WILLNEED) {
- DRM_DEBUG("Attempting to obtain a purgeable object\n");
- return -EFAULT;
- }
-
- BUG_ON(obj->pages_pin_count);
-
- ret = ops->get_pages(obj);
+ ret = __i915_gem_object_get_pages(obj);
if (ret)
return ret;
list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
- obj->get_page.sg = obj->pages->sgl;
- obj->get_page.last = 0;
-
return 0;
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f71f75c..26ea715 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -457,20 +457,19 @@ struct drm_i915_gem_create {
__u32 handle;
__u32 pad;
/**
- * Requested flags (currently used for placement
- * (which memory domain))
+ * Requested flags
*
* You can request that the object be created from special memory
* rather than regular system pages using this parameter. Such
* irregular objects may have certain restrictions (such as CPU
* access to a stolen object is verboten).
- *
- * This can be used in the future for other purposes too
- * e.g. specifying tiling/caching/madvise
+ * Also using this parameter object can be pre-populated with system
+ * pages.
*/
__u32 flags;
#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps */
-#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_PLACEMENT_STOLEN << 1)
+#define I915_CREATE_POPULATE (1<<1) /* Pre-populate object pages */
+#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_POPULATE << 1)
};
struct drm_i915_gem_pread {
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 2/3] drm/i915: Support for the clflush of pre-populated pages
2015-08-24 11:58 [PATCH 0/3] Reduce the time for which 'struct_mutex' is held ankitprasad.r.sharma
2015-08-24 11:58 ` [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages ankitprasad.r.sharma
@ 2015-08-24 11:58 ` ankitprasad.r.sharma
2015-08-24 12:43 ` Chris Wilson
2015-08-24 11:58 ` [PATCH 3/3] drm/i915: Only move to the CPU write domain if keeping the GTT pages ankitprasad.r.sharma
2015-08-26 9:25 ` [PATCH 0/3] Reduce the time for which 'struct_mutex' is held Daniel Vetter
3 siblings, 1 reply; 15+ messages in thread
From: ankitprasad.r.sharma @ 2015-08-24 11:58 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch provides a support for the User to immediately flush
out the cachelines for the pre-populated pages of an object, at the
time of its creation. This will not lead to any redundancy and would
further reduce the time for which the 'struct_mutex' is kept locked in
execbuffer path, as cache flush of the newly allocated pages is anyways
done when the object is submitted to GPU.
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 3 +++
include/uapi/drm/i915_drm.h | 3 ++-
3 files changed, 6 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 955aa16..eb0b31d 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = HAS_RESOURCE_STREAMER(dev);
break;
case I915_PARAM_CREATE_VERSION:
- value = 3;
+ value = 4;
break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 3904feb..dc3435f 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -422,6 +422,9 @@ i915_gem_create(struct drm_file *file,
if (ret)
return ret;
+ if (flags & I915_CREATE_FLUSH)
+ i915_gem_object_flush_cpu_write_domain(obj);
+
mutex_lock(&dev->struct_mutex);
list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
mutex_unlock(&dev->struct_mutex);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 26ea715..547305a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -469,7 +469,8 @@ struct drm_i915_gem_create {
__u32 flags;
#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps */
#define I915_CREATE_POPULATE (1<<1) /* Pre-populate object pages */
-#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_POPULATE << 1)
+#define I915_CREATE_FLUSH (1<<2) /* Clflush prepopulated pages */
+#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_FLUSH << 1)
};
struct drm_i915_gem_pread {
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* [PATCH 3/3] drm/i915: Only move to the CPU write domain if keeping the GTT pages
2015-08-24 11:58 [PATCH 0/3] Reduce the time for which 'struct_mutex' is held ankitprasad.r.sharma
2015-08-24 11:58 ` [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages ankitprasad.r.sharma
2015-08-24 11:58 ` [PATCH 2/3] drm/i915: Support for the clflush of pre-populated pages ankitprasad.r.sharma
@ 2015-08-24 11:58 ` ankitprasad.r.sharma
2015-08-24 12:46 ` Chris Wilson
2015-08-26 9:25 ` [PATCH 0/3] Reduce the time for which 'struct_mutex' is held Daniel Vetter
3 siblings, 1 reply; 15+ messages in thread
From: ankitprasad.r.sharma @ 2015-08-24 11:58 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Chris Wilson <chris@chris-wilson.co.uk>
We have for a long time been ultra-paranoid about the situation whereby
we hand back pages to the system that have been written to by the GPU
and potentially simultaneously by the user through a CPU mmapping. We
can relax this restriction when we know that the cache domain tracking
is true and there can be no stale cacheline invalidatations. This is
true if the object has never been CPU mmaped as all internal accesses
(i.e. kmap/iomap) are carefully flushed. For a CPU mmaping, one would
expect that the invalid cache lines are resolved on PTE/TLB shootdown
during munmap(), so the only situation we need to be paranoid about is
when such a CPU mmaping exists at the time of put_pages. Given that we
need to treat put_pages carefully as we may return live data to the
system that we want to use again in the future (i.e. I915_MADV_WILLNEED
pages) we can simply treat a live CPU mmaping as a special case of
WILLNEED (which it is!). Any I915_MADV_DONTNEED pages and their
mmapings are shot down immediately following put_pages.
v2: Add a new flag to check if ever a cached CPU mapping was created
for the object. This is needed as we have verified that the CPU
cachelines are not invalidated upon munmap(). So to ensure correctness,
object still needs to be moved to CPU write domain in put_pages(), even
if there are no live CPU mappings for I915_MADV_DONTNEED pages.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 5 ++++
drivers/gpu/drm/i915/i915_gem.c | 62 +++++++++++++++++++++++++++++++----------
2 files changed, 52 insertions(+), 15 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8db905a..bb85401 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2096,6 +2096,11 @@ struct drm_i915_gem_object {
unsigned int frontbuffer_bits:INTEL_FRONTBUFFER_BITS;
+ /*
+ * Whether the object was ever mapped with cached CPU mapping
+ */
+ unsigned int has_stale_cachelines:1;
+
unsigned int pin_display;
struct sg_table *pages;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index dc3435f..1c965ef 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1741,7 +1741,17 @@ i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
else
addr = -ENOMEM;
up_write(&mm->mmap_sem);
+ } else {
+ mutex_lock(&dev->struct_mutex);
+ /*
+ * the cached mapping could lead to stale cachelines, so an
+ * invalidation is needed for the object pages, when they are
+ * released back to kernel
+ */
+ (to_intel_bo(obj))->has_stale_cachelines = 1;
+ mutex_unlock(&dev->struct_mutex);
}
+
drm_gem_object_unreference_unlocked(obj);
if (IS_ERR((void *)addr))
return addr;
@@ -2158,24 +2168,46 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
BUG_ON(obj->madv == __I915_MADV_PURGED);
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret) {
- /* In the event of a disaster, abandon all caches and
- * hope for the best.
- */
- WARN_ON(ret != -EIO);
- i915_gem_clflush_object(obj, true);
- obj->base.read_domains = obj->base.write_domain = I915_GEM_DOMAIN_CPU;
- }
-
- i915_gem_gtt_finish_object(obj);
-
- if (i915_gem_object_needs_bit17_swizzle(obj))
- i915_gem_object_save_bit_17_swizzle(obj);
+ /* If we need to access the data in the future, we need to
+ * be sure that the contents of the object is coherent with
+ * the CPU prior to releasing the pages back to the system.
+ * Once we unpin them, the mm is free to move them to different
+ * zones or even swap them out to disk - all without our
+ * intervention. (Though we could track such operations with our
+ * own gemfs, if we ever write one.) As such if we want to keep
+ * the data, set it to the CPU domain now just in case someone
+ * else touches it.
+ *
+ * For a long time we have been paranoid about handing back
+ * pages to the system with stale cacheline invalidation. For
+ * all internal use (kmap/iomap), we know that the domain tracking is
+ * accurate. However, the userspace API is lax and the user can CPU
+ * mmap the object and invalidate cachelines without our accurate
+ * tracking. We have been paranoid to be sure that we always flushed
+ * the cachelines when we stopped using the pages. For which we
+ * maintain a flag for each object that has been CPU mmapped, based
+ * on which we can invalidate the cachelines upon put_pages()
+ */
+ if (obj->madv == I915_MADV_WILLNEED || obj->has_stale_cachelines) {
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ if (ret) {
+ /* In the event of a disaster, abandon all caches and
+ * hope for the best.
+ */
+ WARN_ON(ret != -EIO);
+ i915_gem_clflush_object(obj, true);
+ obj->base.read_domains = I915_GEM_DOMAIN_CPU;
+ obj->base.write_domain = I915_GEM_DOMAIN_CPU;
+ }
- if (obj->madv == I915_MADV_DONTNEED)
+ if (obj->madv == I915_MADV_WILLNEED)
+ if (i915_gem_object_needs_bit17_swizzle(obj))
+ i915_gem_object_save_bit_17_swizzle(obj);
+ } else
obj->dirty = 0;
+ i915_gem_gtt_finish_object(obj);
+
for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
struct page *page = sg_page_iter_page(&sg_iter);
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages
2015-08-24 11:58 ` [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages ankitprasad.r.sharma
@ 2015-08-24 12:35 ` Chris Wilson
2015-08-27 5:48 ` Ankitprasad Sharma
2015-08-25 10:51 ` Siluvery, Arun
1 sibling, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2015-08-24 12:35 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel
On Mon, Aug 24, 2015 at 05:28:14PM +0530, ankitprasad.r.sharma@intel.com wrote:
> +static int
> +__i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> +{
> + const struct drm_i915_gem_object_ops *ops = obj->ops;
> + int ret;
> +
> + WARN_ON(obj->pages);
> +
> + if (obj->madv != I915_MADV_WILLNEED) {
> + DRM_DEBUG("Attempting to obtain a purgeable object\n");
> + return -EFAULT;
> + }
> +
> + BUG_ON(obj->pages_pin_count);
Put the parameter checking into i915_gem_object_get_pages(). The __i915
version is only allowed from strict contexts and we can place the burden
of being correct on the caller.
> + ret = ops->get_pages(obj);
> + if (ret)
> + return ret;
> +
> + obj->get_page.sg = obj->pages->sgl;
> + obj->get_page.last = 0;
> +
> + return 0;
> +}
> +
> /* Ensure that the associated pages are gathered from the backing storage
> * and pinned into our object. i915_gem_object_get_pages() may be called
> * multiple times before they are released by a single call to
> @@ -2339,28 +2377,17 @@ int
> i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> {
> struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> - const struct drm_i915_gem_object_ops *ops = obj->ops;
> int ret;
>
> if (obj->pages)
> return 0;
>
> - if (obj->madv != I915_MADV_WILLNEED) {
> - DRM_DEBUG("Attempting to obtain a purgeable object\n");
> - return -EFAULT;
> - }
> -
> - BUG_ON(obj->pages_pin_count);
> -
> - ret = ops->get_pages(obj);
> + ret = __i915_gem_object_get_pages(obj);
> if (ret)
> return ret;
>
> list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
I am tempted to say this should be in a new
__i915_gem_object_get_pages__tail_locked()
so that we don't have to hunt down users if we ever need to modify the
global lists.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 2/3] drm/i915: Support for the clflush of pre-populated pages
2015-08-24 11:58 ` [PATCH 2/3] drm/i915: Support for the clflush of pre-populated pages ankitprasad.r.sharma
@ 2015-08-24 12:43 ` Chris Wilson
0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2015-08-24 12:43 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel
On Mon, Aug 24, 2015 at 05:28:15PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> This patch provides a support for the User to immediately flush
> out the cachelines for the pre-populated pages of an object, at the
> time of its creation. This will not lead to any redundancy and would
> further reduce the time for which the 'struct_mutex' is kept locked in
> execbuffer path, as cache flush of the newly allocated pages is anyways
> done when the object is submitted to GPU.
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 3/3] drm/i915: Only move to the CPU write domain if keeping the GTT pages
2015-08-24 11:58 ` [PATCH 3/3] drm/i915: Only move to the CPU write domain if keeping the GTT pages ankitprasad.r.sharma
@ 2015-08-24 12:46 ` Chris Wilson
0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2015-08-24 12:46 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel
On Mon, Aug 24, 2015 at 05:28:16PM +0530, ankitprasad.r.sharma@intel.com wrote:
> + /*
> + * the cached mapping could lead to stale cachelines, so an
> + * invalidation is needed for the object pages, when they are
> + * released back to kernel
> + */
Multiline comments should be full sentences (i.e. captilised and punctuated).
> + (to_intel_bo(obj))->has_stale_cachelines = 1;
Was there a buy one, get one free other on brackets? Is there a bug in
the to_intel_bo() macro?
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages
2015-08-24 11:58 ` [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages ankitprasad.r.sharma
2015-08-24 12:35 ` Chris Wilson
@ 2015-08-25 10:51 ` Siluvery, Arun
2015-08-27 6:34 ` Ankitprasad Sharma
1 sibling, 1 reply; 15+ messages in thread
From: Siluvery, Arun @ 2015-08-25 10:51 UTC (permalink / raw)
To: ankitprasad.r.sharma, intel-gfx; +Cc: akash.goel
On 24/08/2015 12:58, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> This patch provides support for the User to populate the object
> with system pages at its creation time. Since this can be safely
> performed without holding the 'struct_mutex', it would help to reduce
> the time 'struct_mutex' is kept locked especially during the exec-buffer
> path, where it is generally held for the longest time.
>
> Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> ---
> drivers/gpu/drm/i915/i915_dma.c | 2 +-
> drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++----------
> include/uapi/drm/i915_drm.h | 11 ++++-----
> 3 files changed, 45 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> index 8319e07..955aa16 100644
> --- a/drivers/gpu/drm/i915/i915_dma.c
> +++ b/drivers/gpu/drm/i915/i915_dma.c
> @@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
> value = HAS_RESOURCE_STREAMER(dev);
> break;
> case I915_PARAM_CREATE_VERSION:
> - value = 2;
> + value = 3;
> break;
> default:
> DRM_DEBUG("Unknown parameter %d\n", param->param);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index c44bd05..3904feb 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -46,6 +46,7 @@ static void
> i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
> static void
> i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
> +static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
>
> static bool cpu_cache_is_coherent(struct drm_device *dev,
> enum i915_cache_level level)
> @@ -414,6 +415,18 @@ i915_gem_create(struct drm_file *file,
> if (obj == NULL)
> return -ENOMEM;
>
> + if (flags & I915_CREATE_POPULATE) {
> + struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + ret = __i915_gem_object_get_pages(obj);
> + if (ret)
> + return ret;
> +
> + mutex_lock(&dev->struct_mutex);
> + list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
> + mutex_unlock(&dev->struct_mutex);
> + }
> +
> ret = drm_gem_handle_create(file, &obj->base, &handle);
If I915_CREATE_POPULATE is set, don't we have to release the pages when
this call fails?
regards
Arun
> /* drop reference from allocate - handle holds it now */
> drm_gem_object_unreference_unlocked(&obj->base);
> @@ -2328,6 +2341,31 @@ err_pages:
> return ret;
> }
>
> +static int
> +__i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> +{
> + const struct drm_i915_gem_object_ops *ops = obj->ops;
> + int ret;
> +
> + WARN_ON(obj->pages);
> +
> + if (obj->madv != I915_MADV_WILLNEED) {
> + DRM_DEBUG("Attempting to obtain a purgeable object\n");
> + return -EFAULT;
> + }
> +
> + BUG_ON(obj->pages_pin_count);
> +
> + ret = ops->get_pages(obj);
> + if (ret)
> + return ret;
> +
> + obj->get_page.sg = obj->pages->sgl;
> + obj->get_page.last = 0;
> +
> + return 0;
> +}
> +
> /* Ensure that the associated pages are gathered from the backing storage
> * and pinned into our object. i915_gem_object_get_pages() may be called
> * multiple times before they are released by a single call to
> @@ -2339,28 +2377,17 @@ int
> i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> {
> struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> - const struct drm_i915_gem_object_ops *ops = obj->ops;
> int ret;
>
> if (obj->pages)
> return 0;
>
> - if (obj->madv != I915_MADV_WILLNEED) {
> - DRM_DEBUG("Attempting to obtain a purgeable object\n");
> - return -EFAULT;
> - }
> -
> - BUG_ON(obj->pages_pin_count);
> -
> - ret = ops->get_pages(obj);
> + ret = __i915_gem_object_get_pages(obj);
> if (ret)
> return ret;
>
> list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>
> - obj->get_page.sg = obj->pages->sgl;
> - obj->get_page.last = 0;
> -
> return 0;
> }
>
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index f71f75c..26ea715 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -457,20 +457,19 @@ struct drm_i915_gem_create {
> __u32 handle;
> __u32 pad;
> /**
> - * Requested flags (currently used for placement
> - * (which memory domain))
> + * Requested flags
> *
> * You can request that the object be created from special memory
> * rather than regular system pages using this parameter. Such
> * irregular objects may have certain restrictions (such as CPU
> * access to a stolen object is verboten).
> - *
> - * This can be used in the future for other purposes too
> - * e.g. specifying tiling/caching/madvise
> + * Also using this parameter object can be pre-populated with system
> + * pages.
> */
> __u32 flags;
> #define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps */
> -#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_PLACEMENT_STOLEN << 1)
> +#define I915_CREATE_POPULATE (1<<1) /* Pre-populate object pages */
> +#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_POPULATE << 1)
> };
>
> struct drm_i915_gem_pread {
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/3] Reduce the time for which 'struct_mutex' is held
2015-08-24 11:58 [PATCH 0/3] Reduce the time for which 'struct_mutex' is held ankitprasad.r.sharma
` (2 preceding siblings ...)
2015-08-24 11:58 ` [PATCH 3/3] drm/i915: Only move to the CPU write domain if keeping the GTT pages ankitprasad.r.sharma
@ 2015-08-26 9:25 ` Daniel Vetter
2015-08-27 10:48 ` Ankitprasad Sharma
3 siblings, 1 reply; 15+ messages in thread
From: Daniel Vetter @ 2015-08-26 9:25 UTC (permalink / raw)
To: ankitprasad.r.sharma; +Cc: intel-gfx, akash.goel
On Mon, Aug 24, 2015 at 05:28:13PM +0530, ankitprasad.r.sharma@intel.com wrote:
> From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
>
> We are trying to reduce the time for which the global 'struct_mutex'
> is locked. Execbuffer ioctl is one place where it is generally held
> for the longest time. And sometimes because of this occasional
> glitches/flickers are observed in 60 fps playback (due to miss of
> V-blank intervals) as page flip calls gets blocked/delayed because the
> 'struct_mutex' is already locked.
>
> For this, we have exposed two new flags in GEM_CREATE ioctl, to pre-populate
> the object with system memory pages and also do an immediate clflush for the
> new pages.
>
> The third patch too tries to reduce the 'struct_mutex' lock time by
> moving only those objects to CPU domain in put_pages(), that can either
> be used in the future or had a CPU mapping.
>
> This series is based on an earlier series of Stolen Memory patches,
> extending the GEM_CREATE ioctl further
> http://lists.freedesktop.org/archives/intel-gfx/2015-July/072199.html
>
> Ankitprasad Sharma (2):
> drm/i915: Support for pre-populating the object with system pages
> drm/i915: Support for the clflush of pre-populated pages
>
> Chris Wilson (1):
> drm/i915: Only move to the CPU write domain if keeping the GTT pages
Usual broken maintainer record: Needs igt and userspace. And for the case
of the put_pages optimization probably really nasty igt.
-Daniel
>
> drivers/gpu/drm/i915/i915_dma.c | 2 +-
> drivers/gpu/drm/i915/i915_drv.h | 5 ++
> drivers/gpu/drm/i915/i915_gem.c | 116 ++++++++++++++++++++++++++++++----------
> include/uapi/drm/i915_drm.h | 12 ++---
> 4 files changed, 101 insertions(+), 34 deletions(-)
>
> --
> 1.9.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages
2015-08-24 12:35 ` Chris Wilson
@ 2015-08-27 5:48 ` Ankitprasad Sharma
2015-08-27 7:45 ` Chris Wilson
0 siblings, 1 reply; 15+ messages in thread
From: Ankitprasad Sharma @ 2015-08-27 5:48 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx, akash.goel
On Mon, 2015-08-24 at 13:35 +0100, Chris Wilson wrote:
> On Mon, Aug 24, 2015 at 05:28:14PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > +static int
> > +__i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> > +{
> > + const struct drm_i915_gem_object_ops *ops = obj->ops;
> > + int ret;
> > +
> > + WARN_ON(obj->pages);
> > +
> > + if (obj->madv != I915_MADV_WILLNEED) {
> > + DRM_DEBUG("Attempting to obtain a purgeable object\n");
> > + return -EFAULT;
> > + }
> > +
> > + BUG_ON(obj->pages_pin_count);
>
> Put the parameter checking into i915_gem_object_get_pages(). The __i915
> version is only allowed from strict contexts and we can place the burden
> of being correct on the caller.
>
> > + ret = ops->get_pages(obj);
> > + if (ret)
> > + return ret;
> > +
> > + obj->get_page.sg = obj->pages->sgl;
> > + obj->get_page.last = 0;
> > +
> > + return 0;
> > +}
> > +
> > /* Ensure that the associated pages are gathered from the backing storage
> > * and pinned into our object. i915_gem_object_get_pages() may be called
> > * multiple times before they are released by a single call to
> > @@ -2339,28 +2377,17 @@ int
> > i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
> > {
> > struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> > - const struct drm_i915_gem_object_ops *ops = obj->ops;
> > int ret;
> >
> > if (obj->pages)
> > return 0;
> >
> > - if (obj->madv != I915_MADV_WILLNEED) {
> > - DRM_DEBUG("Attempting to obtain a purgeable object\n");
> > - return -EFAULT;
> > - }
> > -
> > - BUG_ON(obj->pages_pin_count);
> > -
> > - ret = ops->get_pages(obj);
> > + ret = __i915_gem_object_get_pages(obj);
> > if (ret)
> > return ret;
> >
> > list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
>
> I am tempted to say this should be in a new
>
> __i915_gem_object_get_pages__tail_locked()
>
> so that we don't have to hunt down users if we ever need to modify the
> global lists.
Could not get you here.
is it just to add list_add_tail in a separate function
__i915_gem_object_get_pages__tail_locked(), or a new variant of
__i915_gem_object_get_pages() that will also do the link list insertion
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages
2015-08-25 10:51 ` Siluvery, Arun
@ 2015-08-27 6:34 ` Ankitprasad Sharma
2015-08-27 7:43 ` Chris Wilson
0 siblings, 1 reply; 15+ messages in thread
From: Ankitprasad Sharma @ 2015-08-27 6:34 UTC (permalink / raw)
To: Siluvery, Arun, Chris Wilson; +Cc: intel-gfx, akash.goel
On Tue, 2015-08-25 at 11:51 +0100, Siluvery, Arun wrote:
> On 24/08/2015 12:58, ankitprasad.r.sharma@intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > This patch provides support for the User to populate the object
> > with system pages at its creation time. Since this can be safely
> > performed without holding the 'struct_mutex', it would help to reduce
> > the time 'struct_mutex' is kept locked especially during the exec-buffer
> > path, where it is generally held for the longest time.
> >
> > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_dma.c | 2 +-
> > drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++----------
> > include/uapi/drm/i915_drm.h | 11 ++++-----
> > 3 files changed, 45 insertions(+), 19 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > index 8319e07..955aa16 100644
> > --- a/drivers/gpu/drm/i915/i915_dma.c
> > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > @@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
> > value = HAS_RESOURCE_STREAMER(dev);
> > break;
> > case I915_PARAM_CREATE_VERSION:
> > - value = 2;
> > + value = 3;
> > break;
> > default:
> > DRM_DEBUG("Unknown parameter %d\n", param->param);
> > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > index c44bd05..3904feb 100644
> > --- a/drivers/gpu/drm/i915/i915_gem.c
> > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > @@ -46,6 +46,7 @@ static void
> > i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
> > static void
> > i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
> > +static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> >
> > static bool cpu_cache_is_coherent(struct drm_device *dev,
> > enum i915_cache_level level)
> > @@ -414,6 +415,18 @@ i915_gem_create(struct drm_file *file,
> > if (obj == NULL)
> > return -ENOMEM;
> >
> > + if (flags & I915_CREATE_POPULATE) {
> > + struct drm_i915_private *dev_priv = dev->dev_private;
> > +
> > + ret = __i915_gem_object_get_pages(obj);
> > + if (ret)
> > + return ret;
> > +
> > + mutex_lock(&dev->struct_mutex);
> > + list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
> > + mutex_unlock(&dev->struct_mutex);
> > + }
> > +
> > ret = drm_gem_handle_create(file, &obj->base, &handle);
>
> If I915_CREATE_POPULATE is set, don't we have to release the pages when
> this call fails?
I guess the i915_gem_object_get_pages_* takes care of releasing the
pages when returning an error.
One more thing,
What can be preferred here when an error is returned by
__i915_gem_object_get_pages?
Shall we return error to the userspace after unreferencing the object or
to continue without pre-populating pages (and returning object handle)?
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages
2015-08-27 6:34 ` Ankitprasad Sharma
@ 2015-08-27 7:43 ` Chris Wilson
0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2015-08-27 7:43 UTC (permalink / raw)
To: Ankitprasad Sharma; +Cc: intel-gfx, akash.goel
On Thu, Aug 27, 2015 at 12:04:37PM +0530, Ankitprasad Sharma wrote:
> On Tue, 2015-08-25 at 11:51 +0100, Siluvery, Arun wrote:
> > On 24/08/2015 12:58, ankitprasad.r.sharma@intel.com wrote:
> > > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > >
> > > This patch provides support for the User to populate the object
> > > with system pages at its creation time. Since this can be safely
> > > performed without holding the 'struct_mutex', it would help to reduce
> > > the time 'struct_mutex' is kept locked especially during the exec-buffer
> > > path, where it is generally held for the longest time.
> > >
> > > Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> > > ---
> > > drivers/gpu/drm/i915/i915_dma.c | 2 +-
> > > drivers/gpu/drm/i915/i915_gem.c | 51 +++++++++++++++++++++++++++++++----------
> > > include/uapi/drm/i915_drm.h | 11 ++++-----
> > > 3 files changed, 45 insertions(+), 19 deletions(-)
> > >
> > > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
> > > index 8319e07..955aa16 100644
> > > --- a/drivers/gpu/drm/i915/i915_dma.c
> > > +++ b/drivers/gpu/drm/i915/i915_dma.c
> > > @@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
> > > value = HAS_RESOURCE_STREAMER(dev);
> > > break;
> > > case I915_PARAM_CREATE_VERSION:
> > > - value = 2;
> > > + value = 3;
> > > break;
> > > default:
> > > DRM_DEBUG("Unknown parameter %d\n", param->param);
> > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> > > index c44bd05..3904feb 100644
> > > --- a/drivers/gpu/drm/i915/i915_gem.c
> > > +++ b/drivers/gpu/drm/i915/i915_gem.c
> > > @@ -46,6 +46,7 @@ static void
> > > i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
> > > static void
> > > i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
> > > +static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> > >
> > > static bool cpu_cache_is_coherent(struct drm_device *dev,
> > > enum i915_cache_level level)
> > > @@ -414,6 +415,18 @@ i915_gem_create(struct drm_file *file,
> > > if (obj == NULL)
> > > return -ENOMEM;
> > >
> > > + if (flags & I915_CREATE_POPULATE) {
> > > + struct drm_i915_private *dev_priv = dev->dev_private;
> > > +
> > > + ret = __i915_gem_object_get_pages(obj);
> > > + if (ret)
> > > + return ret;
> > > +
> > > + mutex_lock(&dev->struct_mutex);
> > > + list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
> > > + mutex_unlock(&dev->struct_mutex);
> > > + }
> > > +
> > > ret = drm_gem_handle_create(file, &obj->base, &handle);
> >
> > If I915_CREATE_POPULATE is set, don't we have to release the pages when
> > this call fails?
> I guess the i915_gem_object_get_pages_* takes care of releasing the
> pages when returning an error.
Oh, it should just be calling drm_gem_object_unreference_unlocked(). No
need to worry about obj->pages as they will be reaped along with the
object.
> One more thing,
> What can be preferred here when an error is returned by
> __i915_gem_object_get_pages?
> Shall we return error to the userspace after unreferencing the object or
> to continue without pre-populating pages (and returning object handle)?
Report the error. Userspace can then decide if it wants to allocate an
object without prepulated pages, try searching/reaping its own caches
harder for an available object, or do something different entirely to
avoid excess memory usage.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages
2015-08-27 5:48 ` Ankitprasad Sharma
@ 2015-08-27 7:45 ` Chris Wilson
0 siblings, 0 replies; 15+ messages in thread
From: Chris Wilson @ 2015-08-27 7:45 UTC (permalink / raw)
To: Ankitprasad Sharma; +Cc: intel-gfx, akash.goel
On Thu, Aug 27, 2015 at 11:18:15AM +0530, Ankitprasad Sharma wrote:
> On Mon, 2015-08-24 at 13:35 +0100, Chris Wilson wrote:
> > I am tempted to say this should be in a new
> >
> > __i915_gem_object_get_pages__tail_locked()
> >
> > so that we don't have to hunt down users if we ever need to modify the
> > global lists.
> Could not get you here.
> is it just to add list_add_tail in a separate function
> __i915_gem_object_get_pages__tail_locked(),
Yes. It's just a preventative maintenance step for when we need something
other than just adding the object to the global list. I would rather not
have core state tracking dotted around random callers.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 0/3] Reduce the time for which 'struct_mutex' is held
2015-08-26 9:25 ` [PATCH 0/3] Reduce the time for which 'struct_mutex' is held Daniel Vetter
@ 2015-08-27 10:48 ` Ankitprasad Sharma
0 siblings, 0 replies; 15+ messages in thread
From: Ankitprasad Sharma @ 2015-08-27 10:48 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx, akash.goel
On Wed, 2015-08-26 at 11:25 +0200, Daniel Vetter wrote:
> On Mon, Aug 24, 2015 at 05:28:13PM +0530, ankitprasad.r.sharma@intel.com wrote:
> > From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
> >
> > We are trying to reduce the time for which the global 'struct_mutex'
> > is locked. Execbuffer ioctl is one place where it is generally held
> > for the longest time. And sometimes because of this occasional
> > glitches/flickers are observed in 60 fps playback (due to miss of
> > V-blank intervals) as page flip calls gets blocked/delayed because the
> > 'struct_mutex' is already locked.
> >
> > For this, we have exposed two new flags in GEM_CREATE ioctl, to pre-populate
> > the object with system memory pages and also do an immediate clflush for the
> > new pages.
> >
> > The third patch too tries to reduce the 'struct_mutex' lock time by
> > moving only those objects to CPU domain in put_pages(), that can either
> > be used in the future or had a CPU mapping.
> >
> > This series is based on an earlier series of Stolen Memory patches,
> > extending the GEM_CREATE ioctl further
> > http://lists.freedesktop.org/archives/intel-gfx/2015-July/072199.html
> >
> > Ankitprasad Sharma (2):
> > drm/i915: Support for pre-populating the object with system pages
> > drm/i915: Support for the clflush of pre-populated pages
> >
> > Chris Wilson (1):
> > drm/i915: Only move to the CPU write domain if keeping the GTT pages
>
> Usual broken maintainer record: Needs igt and userspace. And for the case
> of the put_pages optimization probably really nasty igt.
Yes, the igt is work in progress. We will extend the gem_create igt for
1st two patches and we are trying to come up with a new igt for the case
of put_pages optimization.
Also a query regarding the userspace,
Where to do the userspace changes? (mesa driver?) and
Who should do the implementation? (Should we do it?)
Thanks,
Ankit
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages
2015-08-27 11:04 [PATCH v2 " ankitprasad.r.sharma
@ 2015-08-27 11:04 ` ankitprasad.r.sharma
0 siblings, 0 replies; 15+ messages in thread
From: ankitprasad.r.sharma @ 2015-08-27 11:04 UTC (permalink / raw)
To: intel-gfx; +Cc: Ankitprasad Sharma, akash.goel
From: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
This patch provides support for the User to populate the object
with system pages at its creation time. Since this can be safely
performed without holding the 'struct_mutex', it would help to reduce
the time 'struct_mutex' is kept locked especially during the exec-buffer
path, where it is generally held for the longest time.
v2: Corrected error handling on pre-populate failure (Arun),
wrap list_add_tail() in __i915_gem_object_get_pages__tail_locked(),
moved error handling to the caller (Chris)
Signed-off-by: Ankitprasad Sharma <ankitprasad.r.sharma@intel.com>
---
drivers/gpu/drm/i915/i915_dma.c | 2 +-
drivers/gpu/drm/i915/i915_gem.c | 50 +++++++++++++++++++++++++++++++++++------
include/uapi/drm/i915_drm.h | 11 +++++----
3 files changed, 49 insertions(+), 14 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 8319e07..955aa16 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -171,7 +171,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
value = HAS_RESOURCE_STREAMER(dev);
break;
case I915_PARAM_CREATE_VERSION:
- value = 2;
+ value = 3;
break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index c44bd05..4430128 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -46,6 +46,9 @@ static void
i915_gem_object_retire__write(struct drm_i915_gem_object *obj);
static void
i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring);
+static int __i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
+static void
+__i915_gem_object_get_pages__tail_locked(struct drm_i915_gem_object *obj);
static bool cpu_cache_is_coherent(struct drm_device *dev,
enum i915_cache_level level)
@@ -414,6 +417,20 @@ i915_gem_create(struct drm_file *file,
if (obj == NULL)
return -ENOMEM;
+ if (flags & I915_CREATE_POPULATE) {
+ struct drm_i915_private *dev_priv = dev->dev_private;
+
+ ret = __i915_gem_object_get_pages(obj);
+ if (ret) {
+ drm_gem_object_unreference_unlocked(&obj->base);
+ return ret;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ __i915_gem_object_get_pages__tail_locked(obj);
+ mutex_unlock(&dev->struct_mutex);
+ }
+
ret = drm_gem_handle_create(file, &obj->base, &handle);
/* drop reference from allocate - handle holds it now */
drm_gem_object_unreference_unlocked(&obj->base);
@@ -2328,6 +2345,30 @@ err_pages:
return ret;
}
+static int
+__i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
+{
+ const struct drm_i915_gem_object_ops *ops = obj->ops;
+ int ret;
+
+ ret = ops->get_pages(obj);
+ if (ret)
+ return ret;
+
+ obj->get_page.sg = obj->pages->sgl;
+ obj->get_page.last = 0;
+
+ return 0;
+}
+
+static void
+__i915_gem_object_get_pages__tail_locked(struct drm_i915_gem_object *obj)
+{
+ struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
+
+ list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
+}
+
/* Ensure that the associated pages are gathered from the backing storage
* and pinned into our object. i915_gem_object_get_pages() may be called
* multiple times before they are released by a single call to
@@ -2338,8 +2379,6 @@ err_pages:
int
i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
{
- struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
- const struct drm_i915_gem_object_ops *ops = obj->ops;
int ret;
if (obj->pages)
@@ -2352,14 +2391,11 @@ i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
BUG_ON(obj->pages_pin_count);
- ret = ops->get_pages(obj);
+ ret = __i915_gem_object_get_pages(obj);
if (ret)
return ret;
- list_add_tail(&obj->global_list, &dev_priv->mm.unbound_list);
-
- obj->get_page.sg = obj->pages->sgl;
- obj->get_page.last = 0;
+ __i915_gem_object_get_pages__tail_locked(obj);
return 0;
}
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index f71f75c..26ea715 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -457,20 +457,19 @@ struct drm_i915_gem_create {
__u32 handle;
__u32 pad;
/**
- * Requested flags (currently used for placement
- * (which memory domain))
+ * Requested flags
*
* You can request that the object be created from special memory
* rather than regular system pages using this parameter. Such
* irregular objects may have certain restrictions (such as CPU
* access to a stolen object is verboten).
- *
- * This can be used in the future for other purposes too
- * e.g. specifying tiling/caching/madvise
+ * Also using this parameter object can be pre-populated with system
+ * pages.
*/
__u32 flags;
#define I915_CREATE_PLACEMENT_STOLEN (1<<0) /* Cannot use CPU mmaps */
-#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_PLACEMENT_STOLEN << 1)
+#define I915_CREATE_POPULATE (1<<1) /* Pre-populate object pages */
+#define __I915_CREATE_UNKNOWN_FLAGS -(I915_CREATE_POPULATE << 1)
};
struct drm_i915_gem_pread {
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
end of thread, other threads:[~2015-08-27 11:22 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-08-24 11:58 [PATCH 0/3] Reduce the time for which 'struct_mutex' is held ankitprasad.r.sharma
2015-08-24 11:58 ` [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages ankitprasad.r.sharma
2015-08-24 12:35 ` Chris Wilson
2015-08-27 5:48 ` Ankitprasad Sharma
2015-08-27 7:45 ` Chris Wilson
2015-08-25 10:51 ` Siluvery, Arun
2015-08-27 6:34 ` Ankitprasad Sharma
2015-08-27 7:43 ` Chris Wilson
2015-08-24 11:58 ` [PATCH 2/3] drm/i915: Support for the clflush of pre-populated pages ankitprasad.r.sharma
2015-08-24 12:43 ` Chris Wilson
2015-08-24 11:58 ` [PATCH 3/3] drm/i915: Only move to the CPU write domain if keeping the GTT pages ankitprasad.r.sharma
2015-08-24 12:46 ` Chris Wilson
2015-08-26 9:25 ` [PATCH 0/3] Reduce the time for which 'struct_mutex' is held Daniel Vetter
2015-08-27 10:48 ` Ankitprasad Sharma
-- strict thread matches above, loose matches on Subject: below --
2015-08-27 11:04 [PATCH v2 " ankitprasad.r.sharma
2015-08-27 11:04 ` [PATCH 1/3] drm/i915: Support for pre-populating the object with system pages ankitprasad.r.sharma
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox