Intel-GFX Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] drm/i915: Use a cached mapping for the physical HWS
@ 2017-05-17 13:02 Chris Wilson
  2017-05-19  8:53 ` Joonas Lahtinen
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Chris Wilson @ 2017-05-17 13:02 UTC (permalink / raw)
  To: intel-gfx

Older gen use a physical address for the hardware status page, for which
we use cache-coherent writes. As the writes are into the cpu cache, we use
a normal WB mapped page to read the HWS, used for our seqno tracking.

Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
which so far have not reoccurred with this patch. How reliable that
evidence is remains to be seen.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 -
 drivers/gpu/drm/i915/intel_ringbuffer.c | 34 ++++++++++++++++++++-------------
 drivers/gpu/drm/i915/intel_ringbuffer.h |  5 ++++-
 3 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 8ec3bb2913d4..bc223471d391 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2061,7 +2061,6 @@ struct drm_i915_private {
 	struct i915_gem_context *kernel_context;
 	struct intel_engine_cs *engine[I915_NUM_ENGINES];
 
-	struct drm_dma_handle *status_page_dmah;
 	struct resource mch_res;
 
 	/* protects the irq masks */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c32a4ba9579f..3d80cbcd5d94 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -342,9 +342,10 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
 	struct drm_i915_private *dev_priv = engine->i915;
 	u32 addr;
 
-	addr = dev_priv->status_page_dmah->busaddr;
+	addr = engine->status_page.dma_addr;
 	if (INTEL_GEN(dev_priv) >= 4)
-		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
+		addr |= (engine->status_page.dma_addr >> 28) & 0xf0;
+
 	I915_WRITE(HWS_PGA, addr);
 }
 
@@ -1000,12 +1001,14 @@ i915_emit_bb_start(struct drm_i915_gem_request *req,
 
 static void cleanup_phys_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	if (!dev_priv->status_page_dmah)
+	if (!engine->status_page.page_addr)
 		return;
 
-	drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
+	dma_unmap_page(engine->i915->drm.dev,
+		       engine->status_page.dma_addr, PAGE_SIZE,
+		       PCI_DMA_BIDIRECTIONAL);
+
+	__free_page(virt_to_page(engine->status_page.page_addr));
 	engine->status_page.page_addr = NULL;
 }
 
@@ -1091,17 +1094,22 @@ static int init_status_page(struct intel_engine_cs *engine)
 
 static int init_phys_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
+	struct page *page;
 
-	GEM_BUG_ON(engine->id != RCS);
+	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (!page)
+		return -ENOMEM;
 
-	dev_priv->status_page_dmah =
-		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
-	if (!dev_priv->status_page_dmah)
+	engine->status_page.dma_addr =
+		dma_map_page(engine->i915->drm.dev, page, 0, PAGE_SIZE,
+			     PCI_DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(engine->i915->drm.dev,
+			      engine->status_page.dma_addr)) {
+		__free_page(page);
 		return -ENOMEM;
+	}
 
-	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
-	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
+	engine->status_page.page_addr = page_address(page);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 46927e9530a2..fc3a2ac8914e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -18,9 +18,12 @@
 #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
 
 struct intel_hw_status_page {
-	struct i915_vma *vma;
 	u32 *page_addr;
 	u32 ggtt_offset;
+	union {
+		struct i915_vma *vma;
+		dma_addr_t dma_addr;
+	};
 };
 
 #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Use a cached mapping for the physical HWS
  2017-05-17 13:02 [PATCH] drm/i915: Use a cached mapping for the physical HWS Chris Wilson
@ 2017-05-19  8:53 ` Joonas Lahtinen
  2017-05-19  9:08   ` Chris Wilson
  2017-05-22  8:55 ` Daniel Vetter
  2017-05-22 11:55 ` [PATCH v3] " Chris Wilson
  2 siblings, 1 reply; 9+ messages in thread
From: Joonas Lahtinen @ 2017-05-19  8:53 UTC (permalink / raw)
  To: Chris Wilson, intel-gfx

On ke, 2017-05-17 at 14:02 +0100, Chris Wilson wrote:
> Older gen use a physical address for the hardware status page, for which
> we use cache-coherent writes. As the writes are into the cpu cache, we use
> a normal WB mapped page to read the HWS, used for our seqno tracking.
> 
> Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
> which so far have not reoccurred with this patch. How reliable that
> evidence is remains to be seen.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

<SNIP>

> @@ -1091,17 +1094,22 @@ static int init_status_page(struct intel_engine_cs *engine)
>  
>  static int init_phys_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> +	struct page *page;
>  
> -	GEM_BUG_ON(engine->id != RCS);

Was this removal deliberate?

> +	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!page)
> +		return -ENOMEM;
>  
> -	dev_priv->status_page_dmah =
> -		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> -	if (!dev_priv->status_page_dmah)
> +	engine->status_page.dma_addr =
> +		dma_map_page(engine->i915->drm.dev, page, 0, PAGE_SIZE,
> +			     PCI_DMA_BIDIRECTIONAL);
> +	if (dma_mapping_error(engine->i915->drm.dev,
> +			      engine->status_page.dma_addr)) {
> +		__free_page(page);
>  		return -ENOMEM;

Nitpicking, but -ENOSPC would be more accurate?

Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

Regards, Joonas
-- 
Joonas Lahtinen
Open Source Technology Center
Intel Corporation
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Use a cached mapping for the physical HWS
  2017-05-19  8:53 ` Joonas Lahtinen
@ 2017-05-19  9:08   ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2017-05-19  9:08 UTC (permalink / raw)
  To: Joonas Lahtinen; +Cc: intel-gfx

On Fri, May 19, 2017 at 11:53:17AM +0300, Joonas Lahtinen wrote:
> On ke, 2017-05-17 at 14:02 +0100, Chris Wilson wrote:
> > Older gen use a physical address for the hardware status page, for which
> > we use cache-coherent writes. As the writes are into the cpu cache, we use
> > a normal WB mapped page to read the HWS, used for our seqno tracking.
> > 
> > Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
> > which so far have not reoccurred with this patch. How reliable that
> > evidence is remains to be seen.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> <SNIP>
> 
> > @@ -1091,17 +1094,22 @@ static int init_status_page(struct intel_engine_cs *engine)
> >  
> >  static int init_phys_status_page(struct intel_engine_cs *engine)
> >  {
> > -	struct drm_i915_private *dev_priv = engine->i915;
> > +	struct page *page;
> >  
> > -	GEM_BUG_ON(engine->id != RCS);
> 
> Was this removal deliberate?

Yes, at this point we are purely engine local and generic. It doesn't
make sense to allocate a page for a non-existent engine, but in theory
since we aren't touching HW here, it will work. It's only the application
to HW that is restricted now.
 
> > +	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> > +	if (!page)
> > +		return -ENOMEM;
> >  
> > -	dev_priv->status_page_dmah =
> > -		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> > -	if (!dev_priv->status_page_dmah)
> > +	engine->status_page.dma_addr =
> > +		dma_map_page(engine->i915->drm.dev, page, 0, PAGE_SIZE,
> > +			     PCI_DMA_BIDIRECTIONAL);
> > +	if (dma_mapping_error(engine->i915->drm.dev,
> > +			      engine->status_page.dma_addr)) {
> > +		__free_page(page);
> >  		return -ENOMEM;
> 
> Nitpicking, but -ENOSPC would be more accurate?

More often I have seen ENOMEM from mapping than an actual out-of-space.
ENOSPC has a special meaning for me of being out of GTT space (too long
spent in execbuf). But what can we do if an API doesn't report the true
error?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Use a cached mapping for the physical HWS
  2017-05-17 13:02 [PATCH] drm/i915: Use a cached mapping for the physical HWS Chris Wilson
  2017-05-19  8:53 ` Joonas Lahtinen
@ 2017-05-22  8:55 ` Daniel Vetter
  2017-05-22  9:13   ` Chris Wilson
  2017-05-22 11:55 ` [PATCH v3] " Chris Wilson
  2 siblings, 1 reply; 9+ messages in thread
From: Daniel Vetter @ 2017-05-22  8:55 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Wed, May 17, 2017 at 02:02:50PM +0100, Chris Wilson wrote:
> Older gen use a physical address for the hardware status page, for which
> we use cache-coherent writes. As the writes are into the cpu cache, we use
> a normal WB mapped page to read the HWS, used for our seqno tracking.
> 
> Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
> which so far have not reoccurred with this patch. How reliable that
> evidence is remains to be seen.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>

dma is assumed to be coherent, so this should give you the same thing in
the end. Except that dma_map_page can use bounce buffers, whereas
dma_alloc_coherent will make sure you're not doing that. So I don't think
this makes stuff better (or changes anything fwiw).

Getting rid of the drm_pci_alloc stuff otoh would be nice either way.
-Daniel
> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  1 -
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 34 ++++++++++++++++++++-------------
>  drivers/gpu/drm/i915/intel_ringbuffer.h |  5 ++++-
>  3 files changed, 25 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 8ec3bb2913d4..bc223471d391 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2061,7 +2061,6 @@ struct drm_i915_private {
>  	struct i915_gem_context *kernel_context;
>  	struct intel_engine_cs *engine[I915_NUM_ENGINES];
>  
> -	struct drm_dma_handle *status_page_dmah;
>  	struct resource mch_res;
>  
>  	/* protects the irq masks */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c32a4ba9579f..3d80cbcd5d94 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -342,9 +342,10 @@ static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
>  	struct drm_i915_private *dev_priv = engine->i915;
>  	u32 addr;
>  
> -	addr = dev_priv->status_page_dmah->busaddr;
> +	addr = engine->status_page.dma_addr;
>  	if (INTEL_GEN(dev_priv) >= 4)
> -		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
> +		addr |= (engine->status_page.dma_addr >> 28) & 0xf0;
> +
>  	I915_WRITE(HWS_PGA, addr);
>  }
>  
> @@ -1000,12 +1001,14 @@ i915_emit_bb_start(struct drm_i915_gem_request *req,
>  
>  static void cleanup_phys_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	if (!dev_priv->status_page_dmah)
> +	if (!engine->status_page.page_addr)
>  		return;
>  
> -	drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> +	dma_unmap_page(engine->i915->drm.dev,
> +		       engine->status_page.dma_addr, PAGE_SIZE,
> +		       PCI_DMA_BIDIRECTIONAL);
> +
> +	__free_page(virt_to_page(engine->status_page.page_addr));
>  	engine->status_page.page_addr = NULL;
>  }
>  
> @@ -1091,17 +1094,22 @@ static int init_status_page(struct intel_engine_cs *engine)
>  
>  static int init_phys_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> +	struct page *page;
>  
> -	GEM_BUG_ON(engine->id != RCS);
> +	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
> +	if (!page)
> +		return -ENOMEM;
>  
> -	dev_priv->status_page_dmah =
> -		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> -	if (!dev_priv->status_page_dmah)
> +	engine->status_page.dma_addr =
> +		dma_map_page(engine->i915->drm.dev, page, 0, PAGE_SIZE,
> +			     PCI_DMA_BIDIRECTIONAL);
> +	if (dma_mapping_error(engine->i915->drm.dev,
> +			      engine->status_page.dma_addr)) {
> +		__free_page(page);
>  		return -ENOMEM;
> +	}
>  
> -	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> -	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> +	engine->status_page.page_addr = page_address(page);
>  
>  	return 0;
>  }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 46927e9530a2..fc3a2ac8914e 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -18,9 +18,12 @@
>  #define CACHELINE_DWORDS (CACHELINE_BYTES / sizeof(uint32_t))
>  
>  struct intel_hw_status_page {
> -	struct i915_vma *vma;
>  	u32 *page_addr;
>  	u32 ggtt_offset;
> +	union {
> +		struct i915_vma *vma;
> +		dma_addr_t dma_addr;
> +	};
>  };
>  
>  #define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
> -- 
> 2.11.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Use a cached mapping for the physical HWS
  2017-05-22  8:55 ` Daniel Vetter
@ 2017-05-22  9:13   ` Chris Wilson
  2017-05-26  8:18     ` Daniel Vetter
  0 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2017-05-22  9:13 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Mon, May 22, 2017 at 10:55:01AM +0200, Daniel Vetter wrote:
> On Wed, May 17, 2017 at 02:02:50PM +0100, Chris Wilson wrote:
> > Older gen use a physical address for the hardware status page, for which
> > we use cache-coherent writes. As the writes are into the cpu cache, we use
> > a normal WB mapped page to read the HWS, used for our seqno tracking.
> > 
> > Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
> > which so far have not reoccurred with this patch. How reliable that
> > evidence is remains to be seen.
> > 
> > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> 
> dma is assumed to be coherent, so this should give you the same thing in
> the end.

Hmm, DMA historically hasn't been coherent, to get coherency required
UC - being able to snoop was an ability of the GMCH, I thought. Looking
at drivers/base/dma-coherent.c it is allocating UC (WC if you are
lucky) from a device region, though I'm lost working out how the dynamic
allocaton is treated.

> Except that dma_map_page can use bounce buffers, whereas
> dma_alloc_coherent will make sure you're not doing that.

Hmm, on these machines we don't have DMAR, we really should be feeding
in physical addresses. That was probably being too polite. Fancy
without?
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v3] drm/i915: Use a cached mapping for the physical HWS
  2017-05-17 13:02 [PATCH] drm/i915: Use a cached mapping for the physical HWS Chris Wilson
  2017-05-19  8:53 ` Joonas Lahtinen
  2017-05-22  8:55 ` Daniel Vetter
@ 2017-05-22 11:55 ` Chris Wilson
  2017-05-31  9:35   ` Daniel Vetter
  2 siblings, 1 reply; 9+ messages in thread
From: Chris Wilson @ 2017-05-22 11:55 UTC (permalink / raw)
  To: intel-gfx

Older gen use a physical address for the hardware status page, for which
we use cache-coherent writes. As the writes are into the cpu cache, we use
a normal WB mapped page to read the HWS, used for our seqno tracking.

Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
which so far have not reoccurred with this patch. How reliable that
evidence is remains to be seen.

v2: Explicitly pass the expected physical address to the hw
v3: Also remember the wild writes we once had for HWS above 4G.

Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 -
 drivers/gpu/drm/i915/intel_ringbuffer.c | 29 +++++++++++++++--------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 90787e0084f2..1a7961ef4399 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2109,7 +2109,6 @@ struct drm_i915_private {
 	struct i915_gem_context *kernel_context;
 	struct intel_engine_cs *engine[I915_NUM_ENGINES];
 
-	struct drm_dma_handle *status_page_dmah;
 	struct resource mch_res;
 
 	/* protects the irq masks */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c32a4ba9579f..26808f2bf748 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -340,11 +340,14 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
 static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
 {
 	struct drm_i915_private *dev_priv = engine->i915;
+	struct page *page = virt_to_page(engine->status_page.page_addr);
+	phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
 	u32 addr;
 
-	addr = dev_priv->status_page_dmah->busaddr;
+	addr = lower_32_bits(phys);
 	if (INTEL_GEN(dev_priv) >= 4)
-		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
+		addr |= (phys >> 28) & 0xf0;
+
 	I915_WRITE(HWS_PGA, addr);
 }
 
@@ -1000,12 +1003,10 @@ i915_emit_bb_start(struct drm_i915_gem_request *req,
 
 static void cleanup_phys_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	if (!dev_priv->status_page_dmah)
+	if (!engine->status_page.page_addr)
 		return;
 
-	drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
+	__free_page(virt_to_page(engine->status_page.page_addr));
 	engine->status_page.page_addr = NULL;
 }
 
@@ -1091,17 +1092,17 @@ static int init_status_page(struct intel_engine_cs *engine)
 
 static int init_phys_status_page(struct intel_engine_cs *engine)
 {
-	struct drm_i915_private *dev_priv = engine->i915;
-
-	GEM_BUG_ON(engine->id != RCS);
+	struct page *page;
 
-	dev_priv->status_page_dmah =
-		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
-	if (!dev_priv->status_page_dmah)
+	/* Though the HWS register does support 36bit addresses, historically
+	 * we have had hangs and corruption reported due to wild writes if
+	 * the HWS is placed above 4G.
+	 */
+	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
+	if (!page)
 		return -ENOMEM;
 
-	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
-	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
+	engine->status_page.page_addr = page_address(page);
 
 	return 0;
 }
-- 
2.11.0

_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Use a cached mapping for the physical HWS
  2017-05-22  9:13   ` Chris Wilson
@ 2017-05-26  8:18     ` Daniel Vetter
  2017-05-26  9:16       ` Chris Wilson
  0 siblings, 1 reply; 9+ messages in thread
From: Daniel Vetter @ 2017-05-26  8:18 UTC (permalink / raw)
  To: Chris Wilson, Daniel Vetter, intel-gfx

On Mon, May 22, 2017 at 10:13:44AM +0100, Chris Wilson wrote:
> On Mon, May 22, 2017 at 10:55:01AM +0200, Daniel Vetter wrote:
> > On Wed, May 17, 2017 at 02:02:50PM +0100, Chris Wilson wrote:
> > > Older gen use a physical address for the hardware status page, for which
> > > we use cache-coherent writes. As the writes are into the cpu cache, we use
> > > a normal WB mapped page to read the HWS, used for our seqno tracking.
> > > 
> > > Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
> > > which so far have not reoccurred with this patch. How reliable that
> > > evidence is remains to be seen.
> > > 
> > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > 
> > dma is assumed to be coherent, so this should give you the same thing in
> > the end.
> 
> Hmm, DMA historically hasn't been coherent, to get coherency required
> UC - being able to snoop was an ability of the GMCH, I thought. Looking
> at drivers/base/dma-coherent.c it is allocating UC (WC if you are
> lucky) from a device region, though I'm lost working out how the dynamic
> allocaton is treated.

Yes, gpu dma wasn't coherent, but x86 dma api always assumed that
everything is coherent (since for non-gpus that was the case). At least
that's how I thought things worked.

> > Except that dma_map_page can use bounce buffers, whereas
> > dma_alloc_coherent will make sure you're not doing that.
> 
> Hmm, on these machines we don't have DMAR, we really should be feeding
> in physical addresses. That was probably being too polite. Fancy
> without?

Not sure what you mean ... page_to_pfn, plus comment?
-Daniel
-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] drm/i915: Use a cached mapping for the physical HWS
  2017-05-26  8:18     ` Daniel Vetter
@ 2017-05-26  9:16       ` Chris Wilson
  0 siblings, 0 replies; 9+ messages in thread
From: Chris Wilson @ 2017-05-26  9:16 UTC (permalink / raw)
  To: Daniel Vetter; +Cc: intel-gfx

On Fri, May 26, 2017 at 10:18:30AM +0200, Daniel Vetter wrote:
> On Mon, May 22, 2017 at 10:13:44AM +0100, Chris Wilson wrote:
> > On Mon, May 22, 2017 at 10:55:01AM +0200, Daniel Vetter wrote:
> > > On Wed, May 17, 2017 at 02:02:50PM +0100, Chris Wilson wrote:
> > > > Older gen use a physical address for the hardware status page, for which
> > > > we use cache-coherent writes. As the writes are into the cpu cache, we use
> > > > a normal WB mapped page to read the HWS, used for our seqno tracking.
> > > > 
> > > > Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
> > > > which so far have not reoccurred with this patch. How reliable that
> > > > evidence is remains to be seen.
> > > > 
> > > > Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> > > 
> > > dma is assumed to be coherent, so this should give you the same thing in
> > > the end.
> > 
> > Hmm, DMA historically hasn't been coherent, to get coherency required
> > UC - being able to snoop was an ability of the GMCH, I thought. Looking
> > at drivers/base/dma-coherent.c it is allocating UC (WC if you are
> > lucky) from a device region, though I'm lost working out how the dynamic
> > allocaton is treated.
> 
> Yes, gpu dma wasn't coherent, but x86 dma api always assumed that
> everything is coherent (since for non-gpus that was the case). At least
> that's how I thought things worked.

But it can only be coherent by disabling the cpu cache for that range. I
think Broadwell is the earliest that allowed pci-e devices to write into
the CPU cache (specifically to support 10G nic iirc), but I can't
remember if that came to pass.
 
> > > Except that dma_map_page can use bounce buffers, whereas
> > > dma_alloc_coherent will make sure you're not doing that.
> > 
> > Hmm, on these machines we don't have DMAR, we really should be feeding
> > in physical addresses. That was probably being too polite. Fancy
> > without?
> 
> Not sure what you mean ... page_to_pfn, plus comment?

See v3, just feed in the physical address of the page (and not the
kernel's page_address()) which is just its pfn << PAGE_SHIFT. Completely
avoid passing it a dma mapped address, just in case it does something.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v3] drm/i915: Use a cached mapping for the physical HWS
  2017-05-22 11:55 ` [PATCH v3] " Chris Wilson
@ 2017-05-31  9:35   ` Daniel Vetter
  0 siblings, 0 replies; 9+ messages in thread
From: Daniel Vetter @ 2017-05-31  9:35 UTC (permalink / raw)
  To: Chris Wilson; +Cc: intel-gfx

On Mon, May 22, 2017 at 12:55:14PM +0100, Chris Wilson wrote:
> Older gen use a physical address for the hardware status page, for which
> we use cache-coherent writes. As the writes are into the cpu cache, we use
> a normal WB mapped page to read the HWS, used for our seqno tracking.
> 
> Anecdotally, I observed lost breadcrumbs writes into the HWS on i965gm,
> which so far have not reoccurred with this patch. How reliable that
> evidence is remains to be seen.
> 
> v2: Explicitly pass the expected physical address to the hw
> v3: Also remember the wild writes we once had for HWS above 4G.
> 
> Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
> Cc: Daniel Vetter <daniel@ffwll.ch>
> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>

I'm still not sold on the story, but as a cleanup it seems a ok idea.
Getting rid of the drm dma api wrappers is definitely good, since
drm-the-OS-abstraction is dead.

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>

> ---
>  drivers/gpu/drm/i915/i915_drv.h         |  1 -
>  drivers/gpu/drm/i915/intel_ringbuffer.c | 29 +++++++++++++++--------------
>  2 files changed, 15 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 90787e0084f2..1a7961ef4399 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2109,7 +2109,6 @@ struct drm_i915_private {
>  	struct i915_gem_context *kernel_context;
>  	struct intel_engine_cs *engine[I915_NUM_ENGINES];
>  
> -	struct drm_dma_handle *status_page_dmah;
>  	struct resource mch_res;
>  
>  	/* protects the irq masks */
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index c32a4ba9579f..26808f2bf748 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -340,11 +340,14 @@ gen7_render_ring_flush(struct drm_i915_gem_request *req, u32 mode)
>  static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
>  {
>  	struct drm_i915_private *dev_priv = engine->i915;
> +	struct page *page = virt_to_page(engine->status_page.page_addr);
> +	phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
>  	u32 addr;
>  
> -	addr = dev_priv->status_page_dmah->busaddr;
> +	addr = lower_32_bits(phys);
>  	if (INTEL_GEN(dev_priv) >= 4)
> -		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
> +		addr |= (phys >> 28) & 0xf0;
> +
>  	I915_WRITE(HWS_PGA, addr);
>  }
>  
> @@ -1000,12 +1003,10 @@ i915_emit_bb_start(struct drm_i915_gem_request *req,
>  
>  static void cleanup_phys_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	if (!dev_priv->status_page_dmah)
> +	if (!engine->status_page.page_addr)
>  		return;
>  
> -	drm_pci_free(&dev_priv->drm, dev_priv->status_page_dmah);
> +	__free_page(virt_to_page(engine->status_page.page_addr));
>  	engine->status_page.page_addr = NULL;
>  }
>  
> @@ -1091,17 +1092,17 @@ static int init_status_page(struct intel_engine_cs *engine)
>  
>  static int init_phys_status_page(struct intel_engine_cs *engine)
>  {
> -	struct drm_i915_private *dev_priv = engine->i915;
> -
> -	GEM_BUG_ON(engine->id != RCS);
> +	struct page *page;
>  
> -	dev_priv->status_page_dmah =
> -		drm_pci_alloc(&dev_priv->drm, PAGE_SIZE, PAGE_SIZE);
> -	if (!dev_priv->status_page_dmah)
> +	/* Though the HWS register does support 36bit addresses, historically
> +	 * we have had hangs and corruption reported due to wild writes if
> +	 * the HWS is placed above 4G.
> +	 */
> +	page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
> +	if (!page)
>  		return -ENOMEM;
>  
> -	engine->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
> -	memset(engine->status_page.page_addr, 0, PAGE_SIZE);
> +	engine->status_page.page_addr = page_address(page);
>  
>  	return 0;
>  }
> -- 
> 2.11.0
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-05-31  9:35 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-05-17 13:02 [PATCH] drm/i915: Use a cached mapping for the physical HWS Chris Wilson
2017-05-19  8:53 ` Joonas Lahtinen
2017-05-19  9:08   ` Chris Wilson
2017-05-22  8:55 ` Daniel Vetter
2017-05-22  9:13   ` Chris Wilson
2017-05-26  8:18     ` Daniel Vetter
2017-05-26  9:16       ` Chris Wilson
2017-05-22 11:55 ` [PATCH v3] " Chris Wilson
2017-05-31  9:35   ` Daniel Vetter

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox