* [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map()
@ 2016-04-19 17:40 Dave Gordon
2016-04-19 17:40 ` [PATCH 2/2] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
2016-04-19 19:50 ` [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Chris Wilson
0 siblings, 2 replies; 8+ messages in thread
From: Dave Gordon @ 2016-04-19 17:40 UTC (permalink / raw)
To: intel-gfx
From: Alex Dai <yu.dai@intel.com>
The recently-added i915_gem_object_pin_map() can be further optimised
for "small" objects. To facilitate this, and simplify the error paths
before adding the new code, this patch pulls out the "mapping" part of
the operation (involving local allocations which must be undone before
return) into its own subfunction.
The next patch will then insert the new optimisation into the middle of
the now-separated subfunction.
This reorganisation will probably not affect the generated code, as the
compiler will most likely inline it anyway, but it makes the logical
structure a bit clearer and easier to modify.
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 61 +++++++++++++++++++++++++++--------------
1 file changed, 40 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6ce2c31..fc42be0 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2396,6 +2396,45 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
return 0;
}
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+{
+ unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+ struct scatterlist *sg = obj->pages->sgl;
+ struct sg_page_iter sg_iter;
+ struct page **pages;
+ unsigned long i = 0;
+ void *addr = NULL;
+
+ /* A single page can always be kmapped */
+ if (n_pages == 1)
+ return kmap(sg_page(sg));
+
+ pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+ if (pages == NULL) {
+ DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+ return NULL;
+ }
+
+ for_each_sg_page(sg, &sg_iter, n_pages, 0) {
+ pages[i] = sg_page_iter_page(&sg_iter);
+ if (++i == n_pages) {
+ addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+ break;
+ }
+ }
+
+ /* We should have got here via the 'break' above */
+ WARN_ON(i != n_pages);
+ if (addr == NULL)
+ DRM_DEBUG_DRIVER("Failed to vmap pages\n");
+
+ drm_free_large(pages);
+
+ return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
{
int ret;
@@ -2409,27 +2448,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
i915_gem_object_pin_pages(obj);
if (obj->mapping == NULL) {
- struct page **pages;
-
- pages = NULL;
- if (obj->base.size == PAGE_SIZE)
- obj->mapping = kmap(sg_page(obj->pages->sgl));
- else
- pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
- sizeof(*pages),
- GFP_TEMPORARY);
- if (pages != NULL) {
- struct sg_page_iter sg_iter;
- int n;
-
- n = 0;
- for_each_sg_page(obj->pages->sgl, &sg_iter,
- obj->pages->nents, 0)
- pages[n++] = sg_page_iter_page(&sg_iter);
-
- obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
- drm_free_large(pages);
- }
+ obj->mapping = i915_gem_object_map(obj);
if (obj->mapping == NULL) {
i915_gem_object_unpin_pages(obj);
return ERR_PTR(-ENOMEM);
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH 2/2] drm/i915: optimise i915_gem_object_map() for small objects
2016-04-19 17:40 [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
@ 2016-04-19 17:40 ` Dave Gordon
2016-04-19 19:50 ` [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Chris Wilson
1 sibling, 0 replies; 8+ messages in thread
From: Dave Gordon @ 2016-04-19 17:40 UTC (permalink / raw)
To: intel-gfx
We're using this function for ringbuffers and other "small" objects, so
it's worth avoiding an extra malloc()/free() cycle if the page array is
small enough to put on the stack. Here we've chosen an arbitrary cutoff
of 32 (4k) pages, which is big enough for a ringbuffer (4 pages) or a
context image (currently up to 22 pages).
v5:
change name of local array [Chris Wilson]
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index fc42be0..71667bc 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2402,7 +2402,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
struct scatterlist *sg = obj->pages->sgl;
struct sg_page_iter sg_iter;
- struct page **pages;
+ struct page *stack_pages[32];
+ struct page **pages = stack_pages;
unsigned long i = 0;
void *addr = NULL;
@@ -2410,10 +2411,13 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
if (n_pages == 1)
return kmap(sg_page(sg));
- pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
- if (pages == NULL) {
- DRM_DEBUG_DRIVER("Failed to get space for pages\n");
- return NULL;
+ if (n_pages > ARRAY_SIZE(stack_pages)) {
+ /* Too big for stack -- allocate temporary array instead */
+ pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+ if (pages == NULL) {
+ DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+ return NULL;
+ }
}
for_each_sg_page(sg, &sg_iter, n_pages, 0) {
@@ -2429,7 +2433,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
if (addr == NULL)
DRM_DEBUG_DRIVER("Failed to vmap pages\n");
- drm_free_large(pages);
+ if (pages != stack_pages)
+ drm_free_large(pages);
return addr;
}
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map()
2016-04-19 17:40 [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
2016-04-19 17:40 ` [PATCH 2/2] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
@ 2016-04-19 19:50 ` Chris Wilson
2016-04-20 9:39 ` Dave Gordon
2016-04-20 13:30 ` [PATCH v2 " Dave Gordon
1 sibling, 2 replies; 8+ messages in thread
From: Chris Wilson @ 2016-04-19 19:50 UTC (permalink / raw)
To: Dave Gordon; +Cc: intel-gfx
On Tue, Apr 19, 2016 at 06:40:07PM +0100, Dave Gordon wrote:
> From: Alex Dai <yu.dai@intel.com>
>
> The recently-added i915_gem_object_pin_map() can be further optimised
> for "small" objects. To facilitate this, and simplify the error paths
> before adding the new code, this patch pulls out the "mapping" part of
> the operation (involving local allocations which must be undone before
> return) into its own subfunction.
>
> The next patch will then insert the new optimisation into the middle of
> the now-separated subfunction.
>
> This reorganisation will probably not affect the generated code, as the
> compiler will most likely inline it anyway, but it makes the logical
> structure a bit clearer and easier to modify.
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 61 +++++++++++++++++++++++++++--------------
> 1 file changed, 40 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6ce2c31..fc42be0 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2396,6 +2396,45 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
> return 0;
> }
>
> +/* The 'mapping' part of i915_gem_object_pin_map() below */
> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
> +{
> + unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
> + struct scatterlist *sg = obj->pages->sgl;
> + struct sg_page_iter sg_iter;
> + struct page **pages;
> + unsigned long i = 0;
> + void *addr = NULL;
> +
> + /* A single page can always be kmapped */
> + if (n_pages == 1)
> + return kmap(sg_page(sg));
> +
> + pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
> + if (pages == NULL) {
> + DRM_DEBUG_DRIVER("Failed to get space for pages\n");
> + return NULL;
> + }
> +
> + for_each_sg_page(sg, &sg_iter, n_pages, 0) {
> + pages[i] = sg_page_iter_page(&sg_iter);
Just pages[i++] = sg_page_iter_page(&sg_iter);
> + if (++i == n_pages) {
> + addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
> + break;
> + }
> + }
> +
> + /* We should have got here via the 'break' above */
> + WARN_ON(i != n_pages);
> + if (addr == NULL)
> + DRM_DEBUG_DRIVER("Failed to vmap pages\n");
As this is a very, very confused loop.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map()
2016-04-19 19:50 ` [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Chris Wilson
@ 2016-04-20 9:39 ` Dave Gordon
2016-04-20 13:30 ` [PATCH v2 " Dave Gordon
1 sibling, 0 replies; 8+ messages in thread
From: Dave Gordon @ 2016-04-20 9:39 UTC (permalink / raw)
To: Chris Wilson; +Cc: intel-gfx@lists.freedesktop.org
On 19/04/16 20:50, Chris Wilson wrote:
> On Tue, Apr 19, 2016 at 06:40:07PM +0100, Dave Gordon wrote:
>> From: Alex Dai <yu.dai@intel.com>
>>
>> The recently-added i915_gem_object_pin_map() can be further optimised
>> for "small" objects. To facilitate this, and simplify the error paths
>> before adding the new code, this patch pulls out the "mapping" part of
>> the operation (involving local allocations which must be undone before
>> return) into its own subfunction.
>>
>> The next patch will then insert the new optimisation into the middle of
>> the now-separated subfunction.
>>
>> This reorganisation will probably not affect the generated code, as the
>> compiler will most likely inline it anyway, but it makes the logical
>> structure a bit clearer and easier to modify.
>>
>> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> Cc: Chris Wilson <chris@chris-wilson.co.uk>
>> ---
>> drivers/gpu/drm/i915/i915_gem.c | 61 +++++++++++++++++++++++++++--------------
>> 1 file changed, 40 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 6ce2c31..fc42be0 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2396,6 +2396,45 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
>> return 0;
>> }
>>
>> +/* The 'mapping' part of i915_gem_object_pin_map() below */
>> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
>> +{
>> + unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
>> + struct scatterlist *sg = obj->pages->sgl;
>> + struct sg_page_iter sg_iter;
>> + struct page **pages;
>> + unsigned long i = 0;
>> + void *addr = NULL;
>> +
>> + /* A single page can always be kmapped */
>> + if (n_pages == 1)
>> + return kmap(sg_page(sg));
>> +
>> + pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
>> + if (pages == NULL) {
>> + DRM_DEBUG_DRIVER("Failed to get space for pages\n");
>> + return NULL;
>> + }
>> +
>> + for_each_sg_page(sg, &sg_iter, n_pages, 0) {
>> + pages[i] = sg_page_iter_page(&sg_iter);
>
> Just pages[i++] = sg_page_iter_page(&sg_iter);
>
>> + if (++i == n_pages) {
>> + addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
>> + break;
>> + }
>> + }
>> +
>> + /* We should have got here via the 'break' above */
>> + WARN_ON(i != n_pages);
>> + if (addr == NULL)
>> + DRM_DEBUG_DRIVER("Failed to vmap pages\n");
>
> As this is a very, very confused loop.
> -Chris
I tried that approach before, but it was actually more difficult to have
tidy error-checking that way (remembering that we must always free the
pages array, so don't really want an early return).
Here, putting the vmap() inside the final iteration of the loop means
that we automatically leave "addr" as NULL if we don't reach the
expected count. The subsequent WARN_ON() tells us that this has
happened, but we don't then have to base any further branching on this
condition (i != n_pages) as "addr" is already right. (Obviously, we
don't want to do the vmap() if we have exited the loop with the wrong
page count).
I'll post the other version, but I think the post-loop checking is
messier, to such an extent that this way round is simpler overall.
.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* [PATCH v2 1/2] drm/i915: refactor i915_gem_object_pin_map()
2016-04-19 19:50 ` [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Chris Wilson
2016-04-20 9:39 ` Dave Gordon
@ 2016-04-20 13:30 ` Dave Gordon
2016-04-20 13:30 ` [PATCH v2 2/2] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
` (2 more replies)
1 sibling, 3 replies; 8+ messages in thread
From: Dave Gordon @ 2016-04-20 13:30 UTC (permalink / raw)
To: intel-gfx
The recently-added i915_gem_object_pin_map() can be further optimised
for "small" objects. To facilitate this, and simplify the error paths
before adding the new code, this patch pulls out the "mapping" part of
the operation (involving local allocations which must be undone before
return) into its own subfunction.
The next patch will then insert the new optimisation into the middle of
the now-separated subfunction.
This reorganisation will probably not affect the generated code, as the
compiler will most likely inline it anyway, but it makes the logical
structure a bit clearer and easier to modify.
v2:
Restructure loop-over-pages & error check (Chris Wilson)
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++++++++++++---------------
1 file changed, 37 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 6ce2c31..5344b6d 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2396,6 +2396,42 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
return 0;
}
+/* The 'mapping' part of i915_gem_object_pin_map() below */
+static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
+{
+ unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
+ struct scatterlist *sg = obj->pages->sgl;
+ struct sg_page_iter sg_iter;
+ struct page **pages;
+ unsigned long i = 0;
+ void *addr = NULL;
+
+ /* A single page can always be kmapped */
+ if (n_pages == 1)
+ return kmap(sg_page(sg));
+
+ pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+ if (pages == NULL) {
+ DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+ return NULL;
+ }
+
+ for_each_sg_page(sg, &sg_iter, n_pages, 0)
+ pages[i++] = sg_page_iter_page(&sg_iter);
+
+ /* Check that we have the expected number of pages */
+ if (!WARN_ON(i != n_pages))
+ addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
+
+ if (addr == NULL)
+ DRM_DEBUG_DRIVER("Failed to vmap pages\n");
+
+ drm_free_large(pages);
+
+ return addr;
+}
+
+/* get, pin, and map the pages of the object into kernel space */
void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
{
int ret;
@@ -2409,27 +2445,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
i915_gem_object_pin_pages(obj);
if (obj->mapping == NULL) {
- struct page **pages;
-
- pages = NULL;
- if (obj->base.size == PAGE_SIZE)
- obj->mapping = kmap(sg_page(obj->pages->sgl));
- else
- pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
- sizeof(*pages),
- GFP_TEMPORARY);
- if (pages != NULL) {
- struct sg_page_iter sg_iter;
- int n;
-
- n = 0;
- for_each_sg_page(obj->pages->sgl, &sg_iter,
- obj->pages->nents, 0)
- pages[n++] = sg_page_iter_page(&sg_iter);
-
- obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
- drm_free_large(pages);
- }
+ obj->mapping = i915_gem_object_map(obj);
if (obj->mapping == NULL) {
i915_gem_object_unpin_pages(obj);
return ERR_PTR(-ENOMEM);
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [PATCH v2 2/2] drm/i915: optimise i915_gem_object_map() for small objects
2016-04-20 13:30 ` [PATCH v2 " Dave Gordon
@ 2016-04-20 13:30 ` Dave Gordon
2016-04-20 13:57 ` [PATCH v2 1/2] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
2016-04-21 8:09 ` Joonas Lahtinen
2 siblings, 0 replies; 8+ messages in thread
From: Dave Gordon @ 2016-04-20 13:30 UTC (permalink / raw)
To: intel-gfx
We're using this function for ringbuffers and other "small" objects, so
it's worth avoiding an extra malloc()/free() cycle if the page array is
small enough to put on the stack. Here we've chosen an arbitrary cutoff
of 32 (4k) pages, which is big enough for a ringbuffer (4 pages) or a
context image (currently up to 22 pages).
v5:
change name of local array [Chris Wilson]
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem.c | 17 +++++++++++------
1 file changed, 11 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 5344b6d..d993e8e 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2402,7 +2402,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
struct scatterlist *sg = obj->pages->sgl;
struct sg_page_iter sg_iter;
- struct page **pages;
+ struct page *stack_pages[32];
+ struct page **pages = stack_pages;
unsigned long i = 0;
void *addr = NULL;
@@ -2410,10 +2411,13 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
if (n_pages == 1)
return kmap(sg_page(sg));
- pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
- if (pages == NULL) {
- DRM_DEBUG_DRIVER("Failed to get space for pages\n");
- return NULL;
+ if (n_pages > ARRAY_SIZE(stack_pages)) {
+ /* Too big for stack -- allocate temporary array instead */
+ pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
+ if (pages == NULL) {
+ DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+ return NULL;
+ }
}
for_each_sg_page(sg, &sg_iter, n_pages, 0)
@@ -2426,7 +2430,8 @@ static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
if (addr == NULL)
DRM_DEBUG_DRIVER("Failed to vmap pages\n");
- drm_free_large(pages);
+ if (pages != stack_pages)
+ drm_free_large(pages);
return addr;
}
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/2] drm/i915: refactor i915_gem_object_pin_map()
2016-04-20 13:30 ` [PATCH v2 " Dave Gordon
2016-04-20 13:30 ` [PATCH v2 2/2] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
@ 2016-04-20 13:57 ` Dave Gordon
2016-04-21 8:09 ` Joonas Lahtinen
2 siblings, 0 replies; 8+ messages in thread
From: Dave Gordon @ 2016-04-20 13:57 UTC (permalink / raw)
To: intel-gfx
On 20/04/16 14:30, Dave Gordon wrote:
> The recently-added i915_gem_object_pin_map() can be further optimised
> for "small" objects. To facilitate this, and simplify the error paths
> before adding the new code, this patch pulls out the "mapping" part of
> the operation (involving local allocations which must be undone before
> return) into its own subfunction.
>
> The next patch will then insert the new optimisation into the middle of
> the now-separated subfunction.
>
> This reorganisation will probably not affect the generated code, as the
> compiler will most likely inline it anyway, but it makes the logical
> structure a bit clearer and easier to modify.
>
> v2:
> Restructure loop-over-pages & error check (Chris Wilson)
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++++++++++++---------------
> 1 file changed, 37 insertions(+), 21 deletions(-)
[snip]
> + for_each_sg_page(sg, &sg_iter, n_pages, 0)
> + pages[i++] = sg_page_iter_page(&sg_iter);
> +
> + /* Check that we have the expected number of pages */
> + if (!WARN_ON(i != n_pages))
> + addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
Well actually the shorter loop and the subsequent check didn't turn out
too ugly; the only thing I don't much like is if (!WARN_ON(...)),
because I think in general that code should still function if WARN() and
similar macros are #defined to empty loops. But WARN_ON() has to be
defined to return (the truth-value of) its parameter even if it doesn't
print anything, so it's not that bad.
OTOH, while looking at this loop, I worked out a better page iterator,
so I'll post that in a little while :)
.Dave.
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH v2 1/2] drm/i915: refactor i915_gem_object_pin_map()
2016-04-20 13:30 ` [PATCH v2 " Dave Gordon
2016-04-20 13:30 ` [PATCH v2 2/2] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
2016-04-20 13:57 ` [PATCH v2 1/2] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
@ 2016-04-21 8:09 ` Joonas Lahtinen
2 siblings, 0 replies; 8+ messages in thread
From: Joonas Lahtinen @ 2016-04-21 8:09 UTC (permalink / raw)
To: Dave Gordon, intel-gfx
On ke, 2016-04-20 at 14:30 +0100, Dave Gordon wrote:
> The recently-added i915_gem_object_pin_map() can be further optimised
> for "small" objects. To facilitate this, and simplify the error paths
> before adding the new code, this patch pulls out the "mapping" part of
> the operation (involving local allocations which must be undone before
> return) into its own subfunction.
>
> The next patch will then insert the new optimisation into the middle of
> the now-separated subfunction.
>
> This reorganisation will probably not affect the generated code, as the
> compiler will most likely inline it anyway, but it makes the logical
> structure a bit clearer and easier to modify.
>
> v2:
> Restructure loop-over-pages & error check (Chris Wilson)
>
> Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> Cc: Chris Wilson <chris@chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_gem.c | 58 ++++++++++++++++++++++++++---------------
> 1 file changed, 37 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 6ce2c31..5344b6d 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -2396,6 +2396,42 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
> return 0;
> }
>
> +/* The 'mapping' part of i915_gem_object_pin_map() below */
> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
> +{
> + unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
> + struct scatterlist *sg = obj->pages->sgl;
> + struct sg_page_iter sg_iter;
> + struct page **pages;
> + unsigned long i = 0;
> + void *addr = NULL;
> +
> + /* A single page can always be kmapped */
> + if (n_pages == 1)
> + return kmap(sg_page(sg));
> +
> + pages = drm_malloc_gfp(n_pages, sizeof(*pages), GFP_TEMPORARY);
> + if (pages == NULL) {
> + DRM_DEBUG_DRIVER("Failed to get space for pages\n");
> + return NULL;
> + }
> +
> + for_each_sg_page(sg, &sg_iter, n_pages, 0)
> + pages[i++] = sg_page_iter_page(&sg_iter);
> +
> + /* Check that we have the expected number of pages */
> + if (!WARN_ON(i != n_pages))
> + addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
> +
> + if (addr == NULL)
> + DRM_DEBUG_DRIVER("Failed to vmap pages\n");
> +
This kind of construct is used elsewhere, too.
if (WARN_ON(i != n_pages)) {
DRM_DEBUG_DRIVER("Failed to vmap pages\n");
goto out;
}
addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
out:
> + drm_free_large(pages);
> +
> + return addr;
> +}
> +
> +/* get, pin, and map the pages of the object into kernel space */
> void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
> {
> int ret;
> @@ -2409,27 +2445,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
> i915_gem_object_pin_pages(obj);
>
> if (obj->mapping == NULL) {
> - struct page **pages;
> -
> - pages = NULL;
> - if (obj->base.size == PAGE_SIZE)
> - obj->mapping = kmap(sg_page(obj->pages->sgl));
> - else
> - pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
> - sizeof(*pages),
> - GFP_TEMPORARY);
> - if (pages != NULL) {
> - struct sg_page_iter sg_iter;
> - int n;
> -
> - n = 0;
> - for_each_sg_page(obj->pages->sgl, &sg_iter,
> - obj->pages->nents, 0)
> - pages[n++] = sg_page_iter_page(&sg_iter);
> -
> - obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
> - drm_free_large(pages);
> - }
> + obj->mapping = i915_gem_object_map(obj);
> if (obj->mapping == NULL) {
> i915_gem_object_unpin_pages(obj);
> return ERR_PTR(-ENOMEM);
--
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2016-04-21 8:08 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-19 17:40 [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
2016-04-19 17:40 ` [PATCH 2/2] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
2016-04-19 19:50 ` [PATCH 1/2] drm/i915: refactor i915_gem_object_pin_map() Chris Wilson
2016-04-20 9:39 ` Dave Gordon
2016-04-20 13:30 ` [PATCH v2 " Dave Gordon
2016-04-20 13:30 ` [PATCH v2 2/2] drm/i915: optimise i915_gem_object_map() for small objects Dave Gordon
2016-04-20 13:57 ` [PATCH v2 1/2] drm/i915: refactor i915_gem_object_pin_map() Dave Gordon
2016-04-21 8:09 ` Joonas Lahtinen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox