* [PATCH] drm/i915: Add ppgtt->kunmap_page_dma vfunc
@ 2016-05-17 13:34 Tvrtko Ursulin
2016-05-17 15:38 ` ✗ Ro.CI.BAT: failure for " Patchwork
` (2 more replies)
0 siblings, 3 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2016-05-17 13:34 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Rather than asking itself "am I a Broadwell, am I a Cherryview,
or am I neither of the two" on on low level page table operations,
like inserting and clearing PTEs; add a new vfunc kunmap_page_dma
and set it to appropriate flavour at ppgtt init time.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 51 +++++++++++++++++++++++--------------
drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
2 files changed, 33 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7eab619a3eb2..dc7e128d7483 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -361,29 +361,30 @@ static void *kmap_page_dma(struct i915_page_dma *p)
return kmap_atomic(p->page);
}
+static void kunmap_page_dma(void *vaddr)
+{
+ kunmap_atomic(vaddr);
+}
+
/* We use the flushing unmap only with ppgtt structures:
* page directories, page tables and scratch pages.
*/
-static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
+static void kunmap_page_dma_flush(void *vaddr)
{
- /* There are only few exceptions for gen >=6. chv and bxt.
- * And we are not sure about the latter so play safe for now.
- */
- if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
- drm_clflush_virt_range(vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
}
#define kmap_px(px) kmap_page_dma(px_base(px))
-#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
+#define kunmap_px(ppgtt, vaddr) (ppgtt)->kunmap_page_dma((vaddr))
#define setup_px(dev, px) setup_page_dma((dev), px_base(px))
#define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
-#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
-#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
+#define fill_px(ppgtt, px, v) fill_page_dma((ppgtt), px_base(px), (v))
+#define fill32_px(ppgtt, px, v) fill_page_dma_32((ppgtt), px_base(px), (v))
-static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
+static void fill_page_dma(struct i915_hw_ppgtt *ppgtt, struct i915_page_dma *p,
const uint64_t val)
{
int i;
@@ -392,17 +393,17 @@ static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
for (i = 0; i < 512; i++)
vaddr[i] = val;
- kunmap_page_dma(dev, vaddr);
+ ppgtt->kunmap_page_dma(vaddr);
}
-static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
- const uint32_t val32)
+static void fill_page_dma_32(struct i915_hw_ppgtt *ppgtt,
+ struct i915_page_dma *p, const uint32_t val32)
{
uint64_t v = val32;
v = v << 32 | val32;
- fill_page_dma(dev, p, v);
+ fill_page_dma(ppgtt, p, v);
}
static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
@@ -480,7 +481,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
I915_CACHE_LLC, true);
- fill_px(vm->dev, pt, scratch_pte);
+ fill_px(i915_vm_to_ppgtt(vm), pt, scratch_pte);
}
static void gen6_initialize_pt(struct i915_address_space *vm,
@@ -493,7 +494,7 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
I915_CACHE_LLC, true, 0);
- fill32_px(vm->dev, pt, scratch_pte);
+ fill32_px(i915_vm_to_ppgtt(vm), pt, scratch_pte);
}
static struct i915_page_directory *alloc_pd(struct drm_device *dev)
@@ -540,7 +541,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
- fill_px(vm->dev, pd, scratch_pde);
+ fill_px(i915_vm_to_ppgtt(vm), pd, scratch_pde);
}
static int __pdp_init(struct drm_device *dev,
@@ -621,7 +622,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
- fill_px(vm->dev, pdp, scratch_pdpe);
+ fill_px(i915_vm_to_ppgtt(vm), pdp, scratch_pdpe);
}
static void gen8_initialize_pml4(struct i915_address_space *vm,
@@ -632,7 +633,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
I915_CACHE_LLC);
- fill_px(vm->dev, pml4, scratch_pml4e);
+ fill_px(i915_vm_to_ppgtt(vm), pml4, scratch_pml4e);
}
static void
@@ -1512,8 +1513,17 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
*/
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
{
+ struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
int ret;
+ /* There are only few exceptions for gen >=6. chv and bxt.
+ * And we are not sure about the latter so play safe for now.
+ */
+ if (IS_BROADWELL(dev_priv) || IS_CHERRYVIEW(dev_priv))
+ ppgtt->kunmap_page_dma = kunmap_page_dma_flush;
+ else
+ ppgtt->kunmap_page_dma = kunmap_page_dma;
+
ret = gen8_init_scratch(&ppgtt->base);
if (ret)
return ret;
@@ -2073,6 +2083,9 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
int ret;
ppgtt->base.pte_encode = ggtt->base.pte_encode;
+
+ ppgtt->kunmap_page_dma = kunmap_page_dma;
+
if (IS_GEN6(dev)) {
ppgtt->switch_mm = gen6_mm_switch;
} else if (IS_HASWELL(dev)) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 62be77cac5cd..b36b997406c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -379,6 +379,7 @@ struct i915_hw_ppgtt {
gen6_pte_t __iomem *pd_addr;
+ void (*kunmap_page_dma)(void *vaddr);
int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req);
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* ✗ Ro.CI.BAT: failure for drm/i915: Add ppgtt->kunmap_page_dma vfunc
2016-05-17 13:34 [PATCH] drm/i915: Add ppgtt->kunmap_page_dma vfunc Tvrtko Ursulin
@ 2016-05-17 15:38 ` Patchwork
2016-05-18 11:53 ` [PATCH] " Daniele Ceraolo Spurio
2016-05-18 12:49 ` ✗ Ro.CI.BAT: failure for drm/i915: Add ppgtt->kunmap_page_dma vfunc (rev2) Patchwork
2 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2016-05-17 15:38 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Add ppgtt->kunmap_page_dma vfunc
URL : https://patchwork.freedesktop.org/series/7296/
State : failure
== Summary ==
Series 7296v1 drm/i915: Add ppgtt->kunmap_page_dma vfunc
http://patchwork.freedesktop.org/api/1.0/series/7296/revisions/1/mbox
Test gem_exec_flush:
Subgroup basic-batch-kernel-default-cmd:
pass -> FAIL (fi-byt-n2820)
Test kms_flip:
Subgroup basic-flip-vs-wf_vblank:
pass -> FAIL (ro-hsw-i7-4770r)
pass -> FAIL (ro-byt-n2820)
Test kms_pipe_crc_basic:
Subgroup hang-read-crc-pipe-a:
pass -> DMESG-WARN (ro-ivb2-i7-3770)
fi-bdw-i7-5557u total:219 pass:206 dwarn:0 dfail:0 fail:0 skip:13
fi-bsw-n3050 total:218 pass:174 dwarn:0 dfail:0 fail:2 skip:42
fi-byt-n2820 total:218 pass:174 dwarn:0 dfail:0 fail:3 skip:41
fi-hsw-i7-4770k total:219 pass:198 dwarn:0 dfail:0 fail:0 skip:21
fi-hsw-i7-4770r total:219 pass:193 dwarn:0 dfail:0 fail:0 skip:26
fi-kbl-y total:219 pass:191 dwarn:1 dfail:0 fail:2 skip:25
fi-skl-i7-6700k total:219 pass:191 dwarn:0 dfail:0 fail:0 skip:28
fi-snb-i7-2600 total:219 pass:176 dwarn:0 dfail:0 fail:0 skip:43
ro-bdw-i5-5250u total:219 pass:181 dwarn:0 dfail:0 fail:0 skip:38
ro-bdw-i7-5557U total:219 pass:206 dwarn:0 dfail:0 fail:0 skip:13
ro-bdw-i7-5600u total:219 pass:187 dwarn:0 dfail:0 fail:0 skip:32
ro-bsw-n3050 total:219 pass:175 dwarn:0 dfail:0 fail:2 skip:42
ro-byt-n2820 total:218 pass:173 dwarn:0 dfail:0 fail:4 skip:41
ro-hsw-i3-4010u total:218 pass:193 dwarn:0 dfail:0 fail:0 skip:25
ro-hsw-i7-4770r total:219 pass:193 dwarn:0 dfail:0 fail:1 skip:25
ro-ilk-i7-620lm total:219 pass:151 dwarn:0 dfail:0 fail:1 skip:67
ro-ilk1-i5-650 total:214 pass:151 dwarn:0 dfail:0 fail:2 skip:61
ro-ivb-i7-3770 total:219 pass:183 dwarn:0 dfail:0 fail:0 skip:36
ro-ivb2-i7-3770 total:219 pass:186 dwarn:1 dfail:0 fail:0 skip:32
ro-skl-i7-6700hq total:214 pass:189 dwarn:0 dfail:0 fail:0 skip:25
ro-snb-i7-2620M total:219 pass:177 dwarn:0 dfail:0 fail:1 skip:41
Results at /archive/results/CI_IGT_test/RO_Patchwork_921/
68c6a6d drm-intel-nightly: 2016y-05m-17d-13h-59m-27s UTC integration manifest
3ca83ec drm/i915: Add ppgtt->kunmap_page_dma vfunc
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/i915: Add ppgtt->kunmap_page_dma vfunc
2016-05-17 13:34 [PATCH] drm/i915: Add ppgtt->kunmap_page_dma vfunc Tvrtko Ursulin
2016-05-17 15:38 ` ✗ Ro.CI.BAT: failure for " Patchwork
@ 2016-05-18 11:53 ` Daniele Ceraolo Spurio
2016-05-18 12:05 ` Tvrtko Ursulin
2016-05-18 12:06 ` [PATCH v2] " Tvrtko Ursulin
2016-05-18 12:49 ` ✗ Ro.CI.BAT: failure for drm/i915: Add ppgtt->kunmap_page_dma vfunc (rev2) Patchwork
2 siblings, 2 replies; 9+ messages in thread
From: Daniele Ceraolo Spurio @ 2016-05-18 11:53 UTC (permalink / raw)
To: Tvrtko Ursulin, Intel-gfx
On 17/05/16 14:34, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Rather than asking itself "am I a Broadwell, am I a Cherryview,
> or am I neither of the two" on on low level page table operations,
> like inserting and clearing PTEs; add a new vfunc kunmap_page_dma
> and set it to appropriate flavour at ppgtt init time.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 51 +++++++++++++++++++++++--------------
> drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
> 2 files changed, 33 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 7eab619a3eb2..dc7e128d7483 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -361,29 +361,30 @@ static void *kmap_page_dma(struct i915_page_dma *p)
> return kmap_atomic(p->page);
> }
>
> +static void kunmap_page_dma(void *vaddr)
> +{
> + kunmap_atomic(vaddr);
> +}
> +
> /* We use the flushing unmap only with ppgtt structures:
> * page directories, page tables and scratch pages.
> */
> -static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
> +static void kunmap_page_dma_flush(void *vaddr)
> {
> - /* There are only few exceptions for gen >=6. chv and bxt.
> - * And we are not sure about the latter so play safe for now.
> - */
> - if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
> - drm_clflush_virt_range(vaddr, PAGE_SIZE);
> + drm_clflush_virt_range(vaddr, PAGE_SIZE);
>
> kunmap_atomic(vaddr);
> }
>
> #define kmap_px(px) kmap_page_dma(px_base(px))
> -#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
> +#define kunmap_px(ppgtt, vaddr) (ppgtt)->kunmap_page_dma((vaddr))
>
> #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
> #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
> -#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
> -#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
> +#define fill_px(ppgtt, px, v) fill_page_dma((ppgtt), px_base(px), (v))
> +#define fill32_px(ppgtt, px, v) fill_page_dma_32((ppgtt), px_base(px), (v))
>
This feels a bit asymmetric, because some of the functions expect the
ppgtt pointer while others expect the dev pointer. Nothing clean comes
to mind to solve this immediately without adding extra pointer
indirections, but I guess we could come back to standardize it in the
future if we start passing the ppgtt pointer around more (e.g. like in
https://patchwork.freedesktop.org/patch/83777/). Not a blocker for this
patch anyway.
> -static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
> +static void fill_page_dma(struct i915_hw_ppgtt *ppgtt, struct i915_page_dma *p,
> const uint64_t val)
> {
> int i;
> @@ -392,17 +393,17 @@ static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
> for (i = 0; i < 512; i++)
> vaddr[i] = val;
>
> - kunmap_page_dma(dev, vaddr);
> + ppgtt->kunmap_page_dma(vaddr);
> }
>
> -static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
> - const uint32_t val32)
> +static void fill_page_dma_32(struct i915_hw_ppgtt *ppgtt,
> + struct i915_page_dma *p, const uint32_t val32)
> {
> uint64_t v = val32;
>
> v = v << 32 | val32;
>
> - fill_page_dma(dev, p, v);
> + fill_page_dma(ppgtt, p, v);
> }
>
> static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
> @@ -480,7 +481,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
> scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
> I915_CACHE_LLC, true);
>
> - fill_px(vm->dev, pt, scratch_pte);
> + fill_px(i915_vm_to_ppgtt(vm), pt, scratch_pte);
> }
>
> static void gen6_initialize_pt(struct i915_address_space *vm,
> @@ -493,7 +494,7 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
> scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
> I915_CACHE_LLC, true, 0);
>
> - fill32_px(vm->dev, pt, scratch_pte);
> + fill32_px(i915_vm_to_ppgtt(vm), pt, scratch_pte);
> }
>
> static struct i915_page_directory *alloc_pd(struct drm_device *dev)
> @@ -540,7 +541,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
>
> scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
>
> - fill_px(vm->dev, pd, scratch_pde);
> + fill_px(i915_vm_to_ppgtt(vm), pd, scratch_pde);
> }
>
> static int __pdp_init(struct drm_device *dev,
> @@ -621,7 +622,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
>
> scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
>
> - fill_px(vm->dev, pdp, scratch_pdpe);
> + fill_px(i915_vm_to_ppgtt(vm), pdp, scratch_pdpe);
> }
>
> static void gen8_initialize_pml4(struct i915_address_space *vm,
> @@ -632,7 +633,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
> scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
> I915_CACHE_LLC);
>
> - fill_px(vm->dev, pml4, scratch_pml4e);
> + fill_px(i915_vm_to_ppgtt(vm), pml4, scratch_pml4e);
> }
>
> static void
> @@ -1512,8 +1513,17 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
> */
> static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> {
> + struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
> int ret;
>
> + /* There are only few exceptions for gen >=6. chv and bxt.
> + * And we are not sure about the latter so play safe for now.
> + */
> + if (IS_BROADWELL(dev_priv) || IS_CHERRYVIEW(dev_priv))
This should be IS_BROXTON instead of IS_BROADWELL.
> + ppgtt->kunmap_page_dma = kunmap_page_dma_flush;
> + else
> + ppgtt->kunmap_page_dma = kunmap_page_dma;
> +
This virtual function assignment comes before the gen8_init_scratch call
while all the others are after that. To keep all of them together we
could move the call to gen8_init_scratch further down.
Regards,
Daniele
> ret = gen8_init_scratch(&ppgtt->base);
> if (ret)
> return ret;
> @@ -2073,6 +2083,9 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> int ret;
>
> ppgtt->base.pte_encode = ggtt->base.pte_encode;
> +
> + ppgtt->kunmap_page_dma = kunmap_page_dma;
> +
> if (IS_GEN6(dev)) {
> ppgtt->switch_mm = gen6_mm_switch;
> } else if (IS_HASWELL(dev)) {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index 62be77cac5cd..b36b997406c6 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -379,6 +379,7 @@ struct i915_hw_ppgtt {
>
> gen6_pte_t __iomem *pd_addr;
>
> + void (*kunmap_page_dma)(void *vaddr);
> int (*enable)(struct i915_hw_ppgtt *ppgtt);
> int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
> struct drm_i915_gem_request *req);
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH] drm/i915: Add ppgtt->kunmap_page_dma vfunc
2016-05-18 11:53 ` [PATCH] " Daniele Ceraolo Spurio
@ 2016-05-18 12:05 ` Tvrtko Ursulin
2016-05-18 12:06 ` [PATCH v2] " Tvrtko Ursulin
1 sibling, 0 replies; 9+ messages in thread
From: Tvrtko Ursulin @ 2016-05-18 12:05 UTC (permalink / raw)
To: Daniele Ceraolo Spurio, Intel-gfx
On 18/05/16 12:53, Daniele Ceraolo Spurio wrote:
>
>
> On 17/05/16 14:34, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Rather than asking itself "am I a Broadwell, am I a Cherryview,
>> or am I neither of the two" on on low level page table operations,
>> like inserting and clearing PTEs; add a new vfunc kunmap_page_dma
>> and set it to appropriate flavour at ppgtt init time.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_gem_gtt.c | 51
>> +++++++++++++++++++++++--------------
>> drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
>> 2 files changed, 33 insertions(+), 19 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index 7eab619a3eb2..dc7e128d7483 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -361,29 +361,30 @@ static void *kmap_page_dma(struct i915_page_dma *p)
>> return kmap_atomic(p->page);
>> }
>> +static void kunmap_page_dma(void *vaddr)
>> +{
>> + kunmap_atomic(vaddr);
>> +}
>> +
>> /* We use the flushing unmap only with ppgtt structures:
>> * page directories, page tables and scratch pages.
>> */
>> -static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
>> +static void kunmap_page_dma_flush(void *vaddr)
>> {
>> - /* There are only few exceptions for gen >=6. chv and bxt.
>> - * And we are not sure about the latter so play safe for now.
>> - */
>> - if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
>> - drm_clflush_virt_range(vaddr, PAGE_SIZE);
>> + drm_clflush_virt_range(vaddr, PAGE_SIZE);
>> kunmap_atomic(vaddr);
>> }
>> #define kmap_px(px) kmap_page_dma(px_base(px))
>> -#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev,
>> (vaddr))
>> +#define kunmap_px(ppgtt, vaddr) (ppgtt)->kunmap_page_dma((vaddr))
>> #define setup_px(dev, px) setup_page_dma((dev), px_base(px))
>> #define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
>> -#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
>> -#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
>> +#define fill_px(ppgtt, px, v) fill_page_dma((ppgtt), px_base(px), (v))
>> +#define fill32_px(ppgtt, px, v) fill_page_dma_32((ppgtt),
>> px_base(px), (v))
>
> This feels a bit asymmetric, because some of the functions expect the
> ppgtt pointer while others expect the dev pointer. Nothing clean comes
> to mind to solve this immediately without adding extra pointer
> indirections, but I guess we could come back to standardize it in the
> future if we start passing the ppgtt pointer around more (e.g. like in
> https://patchwork.freedesktop.org/patch/83777/). Not a blocker for this
> patch anyway.
Yes I pretty much agree.
>> -static void fill_page_dma(struct drm_device *dev, struct
>> i915_page_dma *p,
>> +static void fill_page_dma(struct i915_hw_ppgtt *ppgtt, struct
>> i915_page_dma *p,
>> const uint64_t val)
>> {
>> int i;
>> @@ -392,17 +393,17 @@ static void fill_page_dma(struct drm_device
>> *dev, struct i915_page_dma *p,
>> for (i = 0; i < 512; i++)
>> vaddr[i] = val;
>> - kunmap_page_dma(dev, vaddr);
>> + ppgtt->kunmap_page_dma(vaddr);
>> }
>> -static void fill_page_dma_32(struct drm_device *dev, struct
>> i915_page_dma *p,
>> - const uint32_t val32)
>> +static void fill_page_dma_32(struct i915_hw_ppgtt *ppgtt,
>> + struct i915_page_dma *p, const uint32_t val32)
>> {
>> uint64_t v = val32;
>> v = v << 32 | val32;
>> - fill_page_dma(dev, p, v);
>> + fill_page_dma(ppgtt, p, v);
>> }
>> static struct i915_page_scratch *alloc_scratch_page(struct
>> drm_device *dev)
>> @@ -480,7 +481,7 @@ static void gen8_initialize_pt(struct
>> i915_address_space *vm,
>> scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
>> I915_CACHE_LLC, true);
>> - fill_px(vm->dev, pt, scratch_pte);
>> + fill_px(i915_vm_to_ppgtt(vm), pt, scratch_pte);
>> }
>> static void gen6_initialize_pt(struct i915_address_space *vm,
>> @@ -493,7 +494,7 @@ static void gen6_initialize_pt(struct
>> i915_address_space *vm,
>> scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
>> I915_CACHE_LLC, true, 0);
>> - fill32_px(vm->dev, pt, scratch_pte);
>> + fill32_px(i915_vm_to_ppgtt(vm), pt, scratch_pte);
>> }
>> static struct i915_page_directory *alloc_pd(struct drm_device *dev)
>> @@ -540,7 +541,7 @@ static void gen8_initialize_pd(struct
>> i915_address_space *vm,
>> scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
>> I915_CACHE_LLC);
>> - fill_px(vm->dev, pd, scratch_pde);
>> + fill_px(i915_vm_to_ppgtt(vm), pd, scratch_pde);
>> }
>> static int __pdp_init(struct drm_device *dev,
>> @@ -621,7 +622,7 @@ static void gen8_initialize_pdp(struct
>> i915_address_space *vm,
>> scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd),
>> I915_CACHE_LLC);
>> - fill_px(vm->dev, pdp, scratch_pdpe);
>> + fill_px(i915_vm_to_ppgtt(vm), pdp, scratch_pdpe);
>> }
>> static void gen8_initialize_pml4(struct i915_address_space *vm,
>> @@ -632,7 +633,7 @@ static void gen8_initialize_pml4(struct
>> i915_address_space *vm,
>> scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
>> I915_CACHE_LLC);
>> - fill_px(vm->dev, pml4, scratch_pml4e);
>> + fill_px(i915_vm_to_ppgtt(vm), pml4, scratch_pml4e);
>> }
>> static void
>> @@ -1512,8 +1513,17 @@ static int
>> gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
>> */
>> static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>> {
>> + struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
>> int ret;
>> + /* There are only few exceptions for gen >=6. chv and bxt.
>> + * And we are not sure about the latter so play safe for now.
>> + */
>> + if (IS_BROADWELL(dev_priv) || IS_CHERRYVIEW(dev_priv))
>
> This should be IS_BROXTON instead of IS_BROADWELL.
Well spotted, guess I was distracted. :)
>
>> + ppgtt->kunmap_page_dma = kunmap_page_dma_flush;
>> + else
>> + ppgtt->kunmap_page_dma = kunmap_page_dma;
>> +
>
> This virtual function assignment comes before the gen8_init_scratch call
> while all the others are after that. To keep all of them together we
> could move the call to gen8_init_scratch further down.
Yes I agree once more, I'll send a new version which will look a bit
better in this respect.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* [PATCH v2] drm/i915: Add ppgtt->kunmap_page_dma vfunc
2016-05-18 11:53 ` [PATCH] " Daniele Ceraolo Spurio
2016-05-18 12:05 ` Tvrtko Ursulin
@ 2016-05-18 12:06 ` Tvrtko Ursulin
2016-05-18 12:22 ` Chris Wilson
1 sibling, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2016-05-18 12:06 UTC (permalink / raw)
To: Intel-gfx
From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Rather than asking itself "am I a Broadwell, am I a Cherryview,
or am I neither of the two" on on low level page table operations,
like inserting and clearing PTEs; add a new vfunc kunmap_page_dma
and set it to appropriate flavour at ppgtt init time.
v2: Fix platfrom condition and group vfunc init more together.
(Daniele Ceraolo Spurio)
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 59 ++++++++++++++++++++++---------------
drivers/gpu/drm/i915/i915_gem_gtt.h | 1 +
2 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 7eab619a3eb2..892f8b41dca7 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -361,29 +361,30 @@ static void *kmap_page_dma(struct i915_page_dma *p)
return kmap_atomic(p->page);
}
+static void kunmap_page_dma(void *vaddr)
+{
+ kunmap_atomic(vaddr);
+}
+
/* We use the flushing unmap only with ppgtt structures:
* page directories, page tables and scratch pages.
*/
-static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
+static void kunmap_page_dma_flush(void *vaddr)
{
- /* There are only few exceptions for gen >=6. chv and bxt.
- * And we are not sure about the latter so play safe for now.
- */
- if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
- drm_clflush_virt_range(vaddr, PAGE_SIZE);
+ drm_clflush_virt_range(vaddr, PAGE_SIZE);
kunmap_atomic(vaddr);
}
#define kmap_px(px) kmap_page_dma(px_base(px))
-#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))
+#define kunmap_px(ppgtt, vaddr) (ppgtt)->kunmap_page_dma((vaddr))
#define setup_px(dev, px) setup_page_dma((dev), px_base(px))
#define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
-#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
-#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))
+#define fill_px(ppgtt, px, v) fill_page_dma((ppgtt), px_base(px), (v))
+#define fill32_px(ppgtt, px, v) fill_page_dma_32((ppgtt), px_base(px), (v))
-static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
+static void fill_page_dma(struct i915_hw_ppgtt *ppgtt, struct i915_page_dma *p,
const uint64_t val)
{
int i;
@@ -392,17 +393,17 @@ static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
for (i = 0; i < 512; i++)
vaddr[i] = val;
- kunmap_page_dma(dev, vaddr);
+ ppgtt->kunmap_page_dma(vaddr);
}
-static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
- const uint32_t val32)
+static void fill_page_dma_32(struct i915_hw_ppgtt *ppgtt,
+ struct i915_page_dma *p, const uint32_t val32)
{
uint64_t v = val32;
v = v << 32 | val32;
- fill_page_dma(dev, p, v);
+ fill_page_dma(ppgtt, p, v);
}
static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
@@ -480,7 +481,7 @@ static void gen8_initialize_pt(struct i915_address_space *vm,
scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
I915_CACHE_LLC, true);
- fill_px(vm->dev, pt, scratch_pte);
+ fill_px(i915_vm_to_ppgtt(vm), pt, scratch_pte);
}
static void gen6_initialize_pt(struct i915_address_space *vm,
@@ -493,7 +494,7 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
I915_CACHE_LLC, true, 0);
- fill32_px(vm->dev, pt, scratch_pte);
+ fill32_px(i915_vm_to_ppgtt(vm), pt, scratch_pte);
}
static struct i915_page_directory *alloc_pd(struct drm_device *dev)
@@ -540,7 +541,7 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
- fill_px(vm->dev, pd, scratch_pde);
+ fill_px(i915_vm_to_ppgtt(vm), pd, scratch_pde);
}
static int __pdp_init(struct drm_device *dev,
@@ -621,7 +622,7 @@ static void gen8_initialize_pdp(struct i915_address_space *vm,
scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
- fill_px(vm->dev, pdp, scratch_pdpe);
+ fill_px(i915_vm_to_ppgtt(vm), pdp, scratch_pdpe);
}
static void gen8_initialize_pml4(struct i915_address_space *vm,
@@ -632,7 +633,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
scratch_pml4e = gen8_pml4e_encode(px_dma(vm->scratch_pdp),
I915_CACHE_LLC);
- fill_px(vm->dev, pml4, scratch_pml4e);
+ fill_px(i915_vm_to_ppgtt(vm), pml4, scratch_pml4e);
}
static void
@@ -1512,12 +1513,9 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
*/
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
{
+ struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);
int ret;
- ret = gen8_init_scratch(&ppgtt->base);
- if (ret)
- return ret;
-
ppgtt->base.start = 0;
ppgtt->base.cleanup = gen8_ppgtt_cleanup;
ppgtt->base.allocate_va_range = gen8_alloc_va_range;
@@ -1527,6 +1525,18 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.bind_vma = ppgtt_bind_vma;
ppgtt->debug_dump = gen8_dump_ppgtt;
+ /* There are only few exceptions for gen >=6. chv and bxt.
+ * And we are not sure about the latter so play safe for now.
+ */
+ if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
+ ppgtt->kunmap_page_dma = kunmap_page_dma_flush;
+ else
+ ppgtt->kunmap_page_dma = kunmap_page_dma;
+
+ ret = gen8_init_scratch(&ppgtt->base);
+ if (ret)
+ return ret;
+
if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
if (ret)
@@ -2073,6 +2083,9 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
int ret;
ppgtt->base.pte_encode = ggtt->base.pte_encode;
+
+ ppgtt->kunmap_page_dma = kunmap_page_dma;
+
if (IS_GEN6(dev)) {
ppgtt->switch_mm = gen6_mm_switch;
} else if (IS_HASWELL(dev)) {
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 62be77cac5cd..b36b997406c6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -379,6 +379,7 @@ struct i915_hw_ppgtt {
gen6_pte_t __iomem *pd_addr;
+ void (*kunmap_page_dma)(void *vaddr);
int (*enable)(struct i915_hw_ppgtt *ppgtt);
int (*switch_mm)(struct i915_hw_ppgtt *ppgtt,
struct drm_i915_gem_request *req);
--
1.9.1
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 9+ messages in thread
* Re: [PATCH v2] drm/i915: Add ppgtt->kunmap_page_dma vfunc
2016-05-18 12:06 ` [PATCH v2] " Tvrtko Ursulin
@ 2016-05-18 12:22 ` Chris Wilson
2016-05-18 13:31 ` Tvrtko Ursulin
0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2016-05-18 12:22 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: Intel-gfx
On Wed, May 18, 2016 at 01:06:06PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>
> Rather than asking itself "am I a Broadwell, am I a Cherryview,
> or am I neither of the two" on on low level page table operations,
> like inserting and clearing PTEs; add a new vfunc kunmap_page_dma
> and set it to appropriate flavour at ppgtt init time.
>
> v2: Fix platfrom condition and group vfunc init more together.
> (Daniele Ceraolo Spurio)
Or we can take a different approach and use a WC mapping for the page.
The patch is a bit messy since we need to feed not the device into the
unmap function but the vm, but the guts are:
@@ -323,19 +323,21 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
return pte;
}
-static int __setup_page_dma(struct drm_device *dev,
- struct i915_page_dma *p, gfp_t flags)
+static int __setup_page_dma(struct i915_address_space *vm,
+ struct i915_page_dma *p,
+ gfp_t flags)
{
- struct device *device = &dev->pdev->dev;
-
p->page = alloc_page(flags);
if (!p->page)
return -ENOMEM;
- p->daddr = dma_map_page(device,
- p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
+ if (vm->pt_kmap_wc)
+ set_pages_array_wc(&p->page, 1);
+
+ p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
+ PCI_DMA_BIDIRECTIONAL);
- if (dma_mapping_error(device, p->daddr)) {
+ if (dma_mapping_error(vm->dma, p->daddr)) {
__free_page(p->page);
return -EINVAL;
}
@@ -343,94 +345,89 @@ static int __setup_page_dma(struct drm_device *dev,
return 0;
}
-static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
+static int setup_page_dma(struct i915_address_space *vm,
+ struct i915_page_dma *p)
{
- return __setup_page_dma(dev, p, I915_GFP_DMA);
+ return __setup_page_dma(vm, p, I915_GFP_DMA);
}
-static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
+static void cleanup_page_dma(struct i915_address_space *vm,
+ struct i915_page_dma *p)
{
- if (WARN_ON(!p->page))
- return;
+ dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+
+ if (vm->pt_kmap_wc)
+ set_pages_array_wb(&p->page, 1);
- dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
__free_page(p->page);
- memset(p, 0, sizeof(*p));
}
@@ -1484,8 +1475,16 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.bind_vma = ppgtt_bind_vma;
ppgtt->debug_dump = gen8_dump_ppgtt;
+ /* There are only few exceptions for gen >=6. chv and bxt.
+ * And we are not sure about the latter so play safe for now.
+ */
+ if (IS_CHERRYVIEW(ppgtt->base.dev) || IS_BROXTON(ppgtt->base.dev)) {
+ ppgtt->base.pt_kmap_wc = true;
+ ppgtt->base.pt_kmap_prot = pgprot_writecombine(PAGE_KERNEL_IO);
+ }
+
Advantage: we avoid the clflush after every update
Disadvantage: we invoke set_memory_*() on every page used by the ppggtt.
(To reduce that cost, I have in made keeping a pagevec cache of WC
pages.)
Sadly, we can't just use kmap_atomic_prot() as it is 32-bit only!!!
Note that I started with exactly this patch (using a kunmap vfunc) many
moons ago and switched to the pgprot_t based approach instead.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* ✗ Ro.CI.BAT: failure for drm/i915: Add ppgtt->kunmap_page_dma vfunc (rev2)
2016-05-17 13:34 [PATCH] drm/i915: Add ppgtt->kunmap_page_dma vfunc Tvrtko Ursulin
2016-05-17 15:38 ` ✗ Ro.CI.BAT: failure for " Patchwork
2016-05-18 11:53 ` [PATCH] " Daniele Ceraolo Spurio
@ 2016-05-18 12:49 ` Patchwork
2 siblings, 0 replies; 9+ messages in thread
From: Patchwork @ 2016-05-18 12:49 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: intel-gfx
== Series Details ==
Series: drm/i915: Add ppgtt->kunmap_page_dma vfunc (rev2)
URL : https://patchwork.freedesktop.org/series/7296/
State : failure
== Summary ==
Series 7296v2 drm/i915: Add ppgtt->kunmap_page_dma vfunc
http://patchwork.freedesktop.org/api/1.0/series/7296/revisions/2/mbox
Test gem_exec_flush:
Subgroup basic-batch-kernel-default-cmd:
pass -> FAIL (ro-byt-n2820)
Test kms_pipe_crc_basic:
Subgroup nonblocking-crc-pipe-c-frame-sequence:
pass -> SKIP (fi-hsw-i7-4770r)
Test kms_sink_crc_basic:
pass -> SKIP (ro-skl-i7-6700hq)
fi-bdw-i7-5557u total:219 pass:206 dwarn:0 dfail:0 fail:0 skip:13
fi-bsw-n3050 total:218 pass:174 dwarn:0 dfail:0 fail:2 skip:42
fi-byt-n2820 total:218 pass:175 dwarn:0 dfail:0 fail:2 skip:41
fi-hsw-i7-4770k total:219 pass:198 dwarn:0 dfail:0 fail:0 skip:21
fi-hsw-i7-4770r total:219 pass:192 dwarn:0 dfail:0 fail:0 skip:27
fi-skl-i7-6700k total:219 pass:191 dwarn:0 dfail:0 fail:0 skip:28
ro-bdw-i5-5250u total:219 pass:181 dwarn:0 dfail:0 fail:0 skip:38
ro-bdw-i7-5557U total:219 pass:206 dwarn:0 dfail:0 fail:0 skip:13
ro-bdw-i7-5600u total:219 pass:187 dwarn:0 dfail:0 fail:0 skip:32
ro-bsw-n3050 total:219 pass:175 dwarn:0 dfail:0 fail:2 skip:42
ro-byt-n2820 total:218 pass:174 dwarn:0 dfail:0 fail:3 skip:41
ro-hsw-i3-4010u total:218 pass:193 dwarn:0 dfail:0 fail:0 skip:25
ro-hsw-i7-4770r total:219 pass:194 dwarn:0 dfail:0 fail:0 skip:25
ro-ilk-i7-620lm total:219 pass:151 dwarn:0 dfail:0 fail:1 skip:67
ro-ilk1-i5-650 total:214 pass:151 dwarn:0 dfail:0 fail:2 skip:61
ro-ivb-i7-3770 total:219 pass:183 dwarn:0 dfail:0 fail:0 skip:36
ro-ivb2-i7-3770 total:219 pass:187 dwarn:0 dfail:0 fail:0 skip:32
ro-skl-i7-6700hq total:214 pass:189 dwarn:0 dfail:0 fail:0 skip:25
ro-snb-i7-2620M total:219 pass:177 dwarn:0 dfail:0 fail:1 skip:41
Results at /archive/results/CI_IGT_test/RO_Patchwork_928/
1f8b54c drm-intel-nightly: 2016y-05m-18d-11h-49m-33s UTC integration manifest
eda05f2 drm/i915: Add ppgtt->kunmap_page_dma vfunc
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] drm/i915: Add ppgtt->kunmap_page_dma vfunc
2016-05-18 12:22 ` Chris Wilson
@ 2016-05-18 13:31 ` Tvrtko Ursulin
2016-05-18 13:51 ` Chris Wilson
0 siblings, 1 reply; 9+ messages in thread
From: Tvrtko Ursulin @ 2016-05-18 13:31 UTC (permalink / raw)
To: Chris Wilson, Intel-gfx
On 18/05/16 13:22, Chris Wilson wrote:
> On Wed, May 18, 2016 at 01:06:06PM +0100, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
>>
>> Rather than asking itself "am I a Broadwell, am I a Cherryview,
>> or am I neither of the two" on on low level page table operations,
>> like inserting and clearing PTEs; add a new vfunc kunmap_page_dma
>> and set it to appropriate flavour at ppgtt init time.
>>
>> v2: Fix platfrom condition and group vfunc init more together.
>> (Daniele Ceraolo Spurio)
>
> Or we can take a different approach and use a WC mapping for the page.
>
> The patch is a bit messy since we need to feed not the device into the
> unmap function but the vm, but the guts are:
>
> @@ -323,19 +323,21 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
> return pte;
> }
>
> -static int __setup_page_dma(struct drm_device *dev,
> - struct i915_page_dma *p, gfp_t flags)
> +static int __setup_page_dma(struct i915_address_space *vm,
> + struct i915_page_dma *p,
> + gfp_t flags)
> {
> - struct device *device = &dev->pdev->dev;
> -
> p->page = alloc_page(flags);
> if (!p->page)
> return -ENOMEM;
>
> - p->daddr = dma_map_page(device,
> - p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
> + if (vm->pt_kmap_wc)
> + set_pages_array_wc(&p->page, 1);
> +
> + p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
> + PCI_DMA_BIDIRECTIONAL);
>
> - if (dma_mapping_error(device, p->daddr)) {
> + if (dma_mapping_error(vm->dma, p->daddr)) {
> __free_page(p->page);
> return -EINVAL;
> }
> @@ -343,94 +345,89 @@ static int __setup_page_dma(struct drm_device *dev,
> return 0;
> }
>
> -static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
> +static int setup_page_dma(struct i915_address_space *vm,
> + struct i915_page_dma *p)
> {
> - return __setup_page_dma(dev, p, I915_GFP_DMA);
> + return __setup_page_dma(vm, p, I915_GFP_DMA);
> }
>
> -static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
> +static void cleanup_page_dma(struct i915_address_space *vm,
> + struct i915_page_dma *p)
> {
> - if (WARN_ON(!p->page))
> - return;
> + dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
> +
> + if (vm->pt_kmap_wc)
> + set_pages_array_wb(&p->page, 1);
>
> - dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
> __free_page(p->page);
> - memset(p, 0, sizeof(*p));
> }
>
>
>
> @@ -1484,8 +1475,16 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> ppgtt->base.bind_vma = ppgtt_bind_vma;
> ppgtt->debug_dump = gen8_dump_ppgtt;
>
> + /* There are only few exceptions for gen >=6. chv and bxt.
> + * And we are not sure about the latter so play safe for now.
> + */
> + if (IS_CHERRYVIEW(ppgtt->base.dev) || IS_BROXTON(ppgtt->base.dev)) {
> + ppgtt->base.pt_kmap_wc = true;
> + ppgtt->base.pt_kmap_prot = pgprot_writecombine(PAGE_KERNEL_IO);
> + }
> +
>
> Advantage: we avoid the clflush after every update
> Disadvantage: we invoke set_memory_*() on every page used by the ppggtt.
> (To reduce that cost, I have in made keeping a pagevec cache of WC
> pages.)
>
> Sadly, we can't just use kmap_atomic_prot() as it is 32-bit only!!!
>
> Note that I started with exactly this patch (using a kunmap vfunc) many
> moons ago and switched to the pgprot_t based approach instead.
The pattern of writes is such that write-combine makes a difference here
versus an explicit flush? Taking into consideration the complexity of
the pagevec cache you mention is required for the set_memory_wc route?
Many moons ago comment makes me think we could have at least had an
inter-rim vfunc solution in place all this time.
Regards,
Tvrtko
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
* Re: [PATCH v2] drm/i915: Add ppgtt->kunmap_page_dma vfunc
2016-05-18 13:31 ` Tvrtko Ursulin
@ 2016-05-18 13:51 ` Chris Wilson
0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2016-05-18 13:51 UTC (permalink / raw)
To: Tvrtko Ursulin; +Cc: Intel-gfx
On Wed, May 18, 2016 at 02:31:28PM +0100, Tvrtko Ursulin wrote:
>
> On 18/05/16 13:22, Chris Wilson wrote:
> >Advantage: we avoid the clflush after every update
> >Disadvantage: we invoke set_memory_*() on every page used by the ppggtt.
> >(To reduce that cost, I have in made keeping a pagevec cache of WC
> >pages.)
> >
> >Sadly, we can't just use kmap_atomic_prot() as it is 32-bit only!!!
> >
> >Note that I started with exactly this patch (using a kunmap vfunc) many
> >moons ago and switched to the pgprot_t based approach instead.
>
> The pattern of writes is such that write-combine makes a difference
> here versus an explicit flush? Taking into consideration the
> complexity of the pagevec cache you mention is required for the
> set_memory_wc route?
Yes, I think the WCB is the preferred path here (versus doing a sweep
over the whole page to clflush). Most frequent updates tends to be
framebuffers being passed between clients (since we close and unbind the
VMA every time) which are ~8 whole pages each. (We can expect to have
flush about 10 pages to misalignment, and so even if we were able to
match WC we still do 25% more clflushes).
It's not required for the set_memory_wc route, just will be a nice
addition. The principle issue should be fixed already in mm/vmalloc.c -
and note that we already have the issue with set_memory_uc/_wb being
invoked per context for no good reason.
> Many moons ago comment makes me think we could have at least had an
> inter-rim vfunc solution in place all this time.
Similarly there is a queue of patches to try and tackle some of the
regressions over the last couple of year that are now lost because no one
dares review them.
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 9+ messages in thread
end of thread, other threads:[~2016-05-18 13:51 UTC | newest]
Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-05-17 13:34 [PATCH] drm/i915: Add ppgtt->kunmap_page_dma vfunc Tvrtko Ursulin
2016-05-17 15:38 ` ✗ Ro.CI.BAT: failure for " Patchwork
2016-05-18 11:53 ` [PATCH] " Daniele Ceraolo Spurio
2016-05-18 12:05 ` Tvrtko Ursulin
2016-05-18 12:06 ` [PATCH v2] " Tvrtko Ursulin
2016-05-18 12:22 ` Chris Wilson
2016-05-18 13:31 ` Tvrtko Ursulin
2016-05-18 13:51 ` Chris Wilson
2016-05-18 12:49 ` ✗ Ro.CI.BAT: failure for drm/i915: Add ppgtt->kunmap_page_dma vfunc (rev2) Patchwork
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox