public inbox for intel-gfx@lists.freedesktop.org
 help / color / mirror / Atom feed
* [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range()
@ 2016-04-13 20:00 Dave Gordon
  2016-04-13 20:00 ` [PATCH 2/2] drm/i915: optimise i915_gem_object_map_range() for small objects Dave Gordon
                   ` (2 more replies)
  0 siblings, 3 replies; 5+ messages in thread
From: Dave Gordon @ 2016-04-13 20:00 UTC (permalink / raw)
  To: intel-gfx

From: Alex Dai <yu.dai@intel.com>

The recent pin-and-map unification didn't include the command parser's
own custom vmapping code, which essentially duplicates the same
algorithm but without some of the optimisations.

To unify this further, this patch puts the mapping/unmapping operations
from i915_gem_object_pin_map() and i915_gem_object_put_pages() into
separate functions that can then be shared by the command parser.

With this change, we have only one vmap() call in the whole driver :)

Signed-off-by: Alex Dai <yu.dai@intel.com>
Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_cmd_parser.c | 44 +++++---------------
 drivers/gpu/drm/i915/i915_drv.h        | 26 ++++++++++++
 drivers/gpu/drm/i915/i915_gem.c        | 76 +++++++++++++++++++++++-----------
 3 files changed, 87 insertions(+), 59 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index a337f33..8968f33 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -927,40 +927,16 @@ void i915_cmd_parser_fini_ring(struct intel_engine_cs *engine)
 	return NULL;
 }
 
-static u32 *vmap_batch(struct drm_i915_gem_object *obj,
+static u32 *map_batch(struct drm_i915_gem_object *obj,
 		       unsigned start, unsigned len)
 {
-	int i;
-	void *addr = NULL;
-	struct sg_page_iter sg_iter;
-	int first_page = start >> PAGE_SHIFT;
-	int last_page = (len + start + 4095) >> PAGE_SHIFT;
-	int npages = last_page - first_page;
-	struct page **pages;
-
-	pages = drm_malloc_ab(npages, sizeof(*pages));
-	if (pages == NULL) {
-		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
-		goto finish;
-	}
-
-	i = 0;
-	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, first_page) {
-		pages[i++] = sg_page_iter_page(&sg_iter);
-		if (i == npages)
-			break;
-	}
+	unsigned long first, npages;
 
-	addr = vmap(pages, i, 0, PAGE_KERNEL);
-	if (addr == NULL) {
-		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
-		goto finish;
-	}
+	/* Convert [start, len) to pages */
+	first = start >> PAGE_SHIFT;
+	npages = DIV_ROUND_UP(start + len, PAGE_SIZE) - first;
 
-finish:
-	if (pages)
-		drm_free_large(pages);
-	return (u32*)addr;
+	return i915_gem_object_map_range(obj, first, npages);
 }
 
 /* Returns a vmap'd pointer to dest_obj, which the caller must unmap */
@@ -987,7 +963,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 		return ERR_PTR(ret);
 	}
 
-	src_base = vmap_batch(src_obj, batch_start_offset, batch_len);
+	src_base = map_batch(src_obj, batch_start_offset, batch_len);
 	if (!src_base) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap batch\n");
 		ret = -ENOMEM;
@@ -1000,7 +976,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 		goto unmap_src;
 	}
 
-	dst = vmap_batch(dest_obj, 0, batch_len);
+	dst = map_batch(dest_obj, 0, batch_len);
 	if (!dst) {
 		DRM_DEBUG_DRIVER("CMD: Failed to vmap shadow batch\n");
 		ret = -ENOMEM;
@@ -1014,7 +990,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dest_obj,
 	memcpy(dst, src, batch_len);
 
 unmap_src:
-	vunmap(src_base);
+	i915_gem_addr_unmap(src_base);
 unpin_src:
 	i915_gem_object_unpin_pages(src_obj);
 
@@ -1262,7 +1238,7 @@ int i915_parse_cmds(struct intel_engine_cs *engine,
 		ret = -EINVAL;
 	}
 
-	vunmap(batch_base);
+	i915_gem_addr_unmap(batch_base);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d1e6e58..e9cee1d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3005,6 +3005,32 @@ static inline void i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
 }
 
 /**
+ * i915_gem_object_map_range - map some or all of a GEM object into kernel space
+ * @obj: the GEM object to be mapped
+ * @first: offset in pages of the start of the range to be mapped
+ * @npages: length in pages of the range to be mapped. For convenience, a
+ *          length of zero is taken to mean "the remainder of the object"
+ *
+ * Map a given range of a GEM object into kernel virtual space.  The caller must
+ * make sure the associated pages are gathered and pinned before calling this
+ * function, and is responsible for unmapping the returned address when it is no
+ * longer required, by calling i915_gem_addr_unmap().
+ *
+ * Returns the address at which the object has been mapped, or NULL on failure.
+ */
+void *__must_check i915_gem_object_map_range(const struct drm_i915_gem_object *obj,
+					     unsigned long first,
+					     unsigned long npages);
+
+static inline void i915_gem_addr_unmap(void *mapped_addr)
+{
+	if (is_vmalloc_addr(mapped_addr))
+		vunmap(mapped_addr);
+	else
+		kunmap(kmap_to_page(mapped_addr));
+}
+
+/**
  * i915_gem_object_pin_map - return a contiguous mapping of the entire object
  * @obj - the object to map into kernel address space
  *
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index b37ffea..163ca78 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2233,10 +2233,7 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	list_del(&obj->global_list);
 
 	if (obj->mapping) {
-		if (is_vmalloc_addr(obj->mapping))
-			vunmap(obj->mapping);
-		else
-			kunmap(kmap_to_page(obj->mapping));
+		i915_gem_addr_unmap(obj->mapping);
 		obj->mapping = NULL;
 	}
 
@@ -2408,6 +2405,55 @@ static void i915_gem_object_free_mmap_offset(struct drm_i915_gem_object *obj)
 	return 0;
 }
 
+void *i915_gem_object_map_range(const struct drm_i915_gem_object *obj,
+				unsigned long first,
+				unsigned long npages)
+{
+	unsigned long max_pages = obj->base.size >> PAGE_SHIFT;
+	struct scatterlist *sg = obj->pages->sgl;
+	struct sg_page_iter sg_iter;
+	struct page **pages;
+	unsigned long i = 0;
+	void *addr = NULL;
+
+	/* Minimal range check */
+	if (first + npages > max_pages) {
+		DRM_DEBUG_DRIVER("Invalid page range\n");
+		return NULL;
+	}
+
+	/* npages==0 is shorthand for "the rest of the object" */
+	if (npages == 0)
+		npages = max_pages - first;
+
+	/* A single page can always be kmapped */
+	if (npages == 1)
+		return kmap(sg_page(sg));
+
+	pages = drm_malloc_gfp(npages, sizeof(*pages), GFP_TEMPORARY);
+	if (pages == NULL) {
+		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+		return NULL;
+	}
+
+	for_each_sg_page(sg, &sg_iter, max_pages, first) {
+		pages[i] = sg_page_iter_page(&sg_iter);
+		if (++i == npages) {
+			addr = vmap(pages, npages, 0, PAGE_KERNEL);
+			break;
+		}
+	}
+
+	/* We should have got here via the 'break' above */
+	WARN_ON(i != npages);
+	if (addr == NULL)
+		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
+
+	drm_free_large(pages);
+
+	return addr;
+}
+
 void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 {
 	int ret;
@@ -2421,27 +2467,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
 	i915_gem_object_pin_pages(obj);
 
 	if (obj->mapping == NULL) {
-		struct page **pages;
-
-		pages = NULL;
-		if (obj->base.size == PAGE_SIZE)
-			obj->mapping = kmap(sg_page(obj->pages->sgl));
-		else
-			pages = drm_malloc_gfp(obj->base.size >> PAGE_SHIFT,
-					       sizeof(*pages),
-					       GFP_TEMPORARY);
-		if (pages != NULL) {
-			struct sg_page_iter sg_iter;
-			int n;
-
-			n = 0;
-			for_each_sg_page(obj->pages->sgl, &sg_iter,
-					 obj->pages->nents, 0)
-				pages[n++] = sg_page_iter_page(&sg_iter);
-
-			obj->mapping = vmap(pages, n, 0, PAGE_KERNEL);
-			drm_free_large(pages);
-		}
+		obj->mapping = i915_gem_object_map_range(obj, 0, 0);
 		if (obj->mapping == NULL) {
 			i915_gem_object_unpin_pages(obj);
 			return ERR_PTR(-ENOMEM);
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/2] drm/i915: optimise i915_gem_object_map_range() for small objects
  2016-04-13 20:00 [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range() Dave Gordon
@ 2016-04-13 20:00 ` Dave Gordon
  2016-04-13 20:14 ` [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range() Chris Wilson
  2016-04-14 15:55 ` ✗ Fi.CI.BAT: failure for series starting with [1/2] " Patchwork
  2 siblings, 0 replies; 5+ messages in thread
From: Dave Gordon @ 2016-04-13 20:00 UTC (permalink / raw)
  To: intel-gfx

We're using this function for ringbuffers and other "small" objects, so
it's worth avoiding an extra malloc()/free() cycle if the page array is
small enough to put on the stack. Here we've chosen an arbitrary cutoff
of 32 (4k) pages, which is big enough for a ringbuffer (4 pages) or a
context image (currently up to 22 pages).

v5:
    change name of local array [Chris Wilson]

Signed-off-by: Dave Gordon <david.s.gordon@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Alex Dai <yu.dai@intel.com>
---
 drivers/gpu/drm/i915/i915_gem.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 163ca78..8f70c39 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2412,7 +2412,8 @@ void *i915_gem_object_map_range(const struct drm_i915_gem_object *obj,
 	unsigned long max_pages = obj->base.size >> PAGE_SHIFT;
 	struct scatterlist *sg = obj->pages->sgl;
 	struct sg_page_iter sg_iter;
-	struct page **pages;
+	struct page *stack_pages[32];
+	struct page **pages = stack_pages;
 	unsigned long i = 0;
 	void *addr = NULL;
 
@@ -2430,10 +2431,13 @@ void *i915_gem_object_map_range(const struct drm_i915_gem_object *obj,
 	if (npages == 1)
 		return kmap(sg_page(sg));
 
-	pages = drm_malloc_gfp(npages, sizeof(*pages), GFP_TEMPORARY);
-	if (pages == NULL) {
-		DRM_DEBUG_DRIVER("Failed to get space for pages\n");
-		return NULL;
+	if (npages > ARRAY_SIZE(stack_pages)) {
+		/* Too big for stack -- allocate temporary array instead */
+		pages = drm_malloc_gfp(npages, sizeof(*pages), GFP_TEMPORARY);
+		if (pages == NULL) {
+			DRM_DEBUG_DRIVER("Failed to get space for pages\n");
+			return NULL;
+		}
 	}
 
 	for_each_sg_page(sg, &sg_iter, max_pages, first) {
@@ -2449,7 +2453,8 @@ void *i915_gem_object_map_range(const struct drm_i915_gem_object *obj,
 	if (addr == NULL)
 		DRM_DEBUG_DRIVER("Failed to vmap pages\n");
 
-	drm_free_large(pages);
+	if (pages != stack_pages)
+		drm_free_large(pages);
 
 	return addr;
 }
-- 
1.9.1

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range()
  2016-04-13 20:00 [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range() Dave Gordon
  2016-04-13 20:00 ` [PATCH 2/2] drm/i915: optimise i915_gem_object_map_range() for small objects Dave Gordon
@ 2016-04-13 20:14 ` Chris Wilson
  2016-04-13 20:49   ` Dave Gordon
  2016-04-14 15:55 ` ✗ Fi.CI.BAT: failure for series starting with [1/2] " Patchwork
  2 siblings, 1 reply; 5+ messages in thread
From: Chris Wilson @ 2016-04-13 20:14 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx

On Wed, Apr 13, 2016 at 09:00:53PM +0100, Dave Gordon wrote:
> From: Alex Dai <yu.dai@intel.com>
> 
> The recent pin-and-map unification didn't include the command parser's
> own custom vmapping code, which essentially duplicates the same
> algorithm but without some of the optimisations.

No. There is no need for extra hacks for the cmdparser when the very
issue is that it takes a vmap every batch.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range()
  2016-04-13 20:14 ` [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range() Chris Wilson
@ 2016-04-13 20:49   ` Dave Gordon
  0 siblings, 0 replies; 5+ messages in thread
From: Dave Gordon @ 2016-04-13 20:49 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx, Alex Dai, Tvrtko Ursulin

On 13/04/16 21:14, Chris Wilson wrote:
> On Wed, Apr 13, 2016 at 09:00:53PM +0100, Dave Gordon wrote:
>> From: Alex Dai <yu.dai@intel.com>
>>
>> The recent pin-and-map unification didn't include the command parser's
>> own custom vmapping code, which essentially duplicates the same
>> algorithm but without some of the optimisations.
>
> No. There is no need for extra hacks for the cmdparser when the very
> issue is that it takes a vmap every batch.
> -Chris

Actually, sharing your mapping code will mean that it won't use vmap() 
for "sufficiently small" batches (i.e. one page), it will take the kmap 
path instead. And then for larger batches it will take advantage of the 
stack optimisation instead; only the largest won't benefit from that.

Even without the utility of sharing the code with the command parser, 
I'd still be inclined to refactor the pin-and-map into the function that 
does the mapping, with whatever clever optimisations we can apply, and 
the outer wrapper that manages the pinning and error recovery.

BTW, I expected to find a kvunmap() which would do exactly what I moved 
into i915_gem_addr_unmap(), but there doesn't seem to be one.

+static inline void i915_gem_addr_unmap(void *mapped_addr)
+{
+	if (is_vmalloc_addr(mapped_addr))
+		vunmap(mapped_addr);
+	else
+		kunmap(kmap_to_page(mapped_addr));
+}

Do you think this would be of sufficient utility to be pushed upstream? 
The analogous kvfree() is quite widely used!

.Dave.

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

* ✗ Fi.CI.BAT: failure for series starting with [1/2] drm/i915: introduce and use i915_gem_object_map_range()
  2016-04-13 20:00 [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range() Dave Gordon
  2016-04-13 20:00 ` [PATCH 2/2] drm/i915: optimise i915_gem_object_map_range() for small objects Dave Gordon
  2016-04-13 20:14 ` [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range() Chris Wilson
@ 2016-04-14 15:55 ` Patchwork
  2 siblings, 0 replies; 5+ messages in thread
From: Patchwork @ 2016-04-14 15:55 UTC (permalink / raw)
  To: Dave Gordon; +Cc: intel-gfx

== Series Details ==

Series: series starting with [1/2] drm/i915: introduce and use i915_gem_object_map_range()
URL   : https://patchwork.freedesktop.org/series/5691/
State : failure

== Summary ==

Series 5691v1 Series without cover letter
http://patchwork.freedesktop.org/api/1.0/series/5691/revisions/1/mbox/

Test kms_force_connector_basic:
        Subgroup force-edid:
                skip       -> PASS       (hsw-gt2)
        Subgroup force-load-detect:
                skip       -> PASS       (ilk-hp8440p)

bdw-nuci7        total:203  pass:190  dwarn:0   dfail:0   fail:1   skip:12 
bdw-ultra        total:203  pass:179  dwarn:0   dfail:0   fail:1   skip:23 
bsw-nuc-2        total:202  pass:162  dwarn:0   dfail:0   fail:1   skip:39 
byt-nuc          total:202  pass:164  dwarn:0   dfail:0   fail:0   skip:38 
hsw-brixbox      total:203  pass:178  dwarn:0   dfail:0   fail:1   skip:24 
hsw-gt2          total:203  pass:183  dwarn:0   dfail:0   fail:1   skip:19 
ilk-hp8440p      total:203  pass:134  dwarn:0   dfail:0   fail:1   skip:68 
ivb-t430s        total:203  pass:174  dwarn:0   dfail:0   fail:1   skip:28 
skl-i7k-2        total:203  pass:177  dwarn:0   dfail:0   fail:1   skip:25 
skl-nuci5        total:203  pass:191  dwarn:0   dfail:0   fail:1   skip:11 
snb-x220t        total:203  pass:164  dwarn:0   dfail:0   fail:2   skip:37 
BOOT FAILED for snb-dellxps

Results at /archive/results/CI_IGT_test/Patchwork_1900/

c7583aec08ba04e2336bd9879a10f30d4e0cdc60 drm-intel-nightly: 2016y-04m-14d-14h-53m-34s UTC integration manifest
a7250be drm/i915: optimise i915_gem_object_map_range() for small objects
f52941e drm/i915: introduce and use i915_gem_object_map_range()

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2016-04-14 15:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-13 20:00 [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range() Dave Gordon
2016-04-13 20:00 ` [PATCH 2/2] drm/i915: optimise i915_gem_object_map_range() for small objects Dave Gordon
2016-04-13 20:14 ` [PATCH 1/2] drm/i915: introduce and use i915_gem_object_map_range() Chris Wilson
2016-04-13 20:49   ` Dave Gordon
2016-04-14 15:55 ` ✗ Fi.CI.BAT: failure for series starting with [1/2] " Patchwork

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox