From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ville =?iso-8859-1?Q?Syrj=E4l=E4?= Subject: Re: [PATCH v4] drm/i915: Replaced Blitter ring based flips with MMIO flips for VLV Date: Thu, 15 May 2014 15:27:46 +0300 Message-ID: <20140515122746.GA27580@intel.com> References: <20140509171848.GA18465@intel.com> <1400134657-2417-1-git-send-email-sourab.gupta@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mga09.intel.com (mga09.intel.com [134.134.136.24]) by gabe.freedesktop.org (Postfix) with ESMTP id 13F156E16E for ; Thu, 15 May 2014 05:27:56 -0700 (PDT) Content-Disposition: inline In-Reply-To: <1400134657-2417-1-git-send-email-sourab.gupta@intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Errors-To: intel-gfx-bounces@lists.freedesktop.org Sender: "Intel-gfx" To: sourab.gupta@intel.com Cc: intel-gfx@lists.freedesktop.org, Akash Goel List-Id: intel-gfx@lists.freedesktop.org On Thu, May 15, 2014 at 11:47:37AM +0530, sourab.gupta@intel.com wrote: > From: Sourab Gupta > = > Using MMIO based flips on Gen5+ for Media power well residency optimizati= on. > The blitter ring is currently being used just for command streamer based > flip calls. For pure 3D workloads, with MMIO flips, there will be no use > of blitter ring and this will ensure the 100% residency for Media well. > = > In a subsequent patch, we can make the selection between CS vs MMIO flip > based on a module parameter to give more testing coverage. > = > v2: The MMIO flips now use the interrupt driven mechanism for issuing the > flips when target seqno is reached. (Incorporating Ville's idea) > = > v3: Rebasing on latest code. Code restructuring after incorporating > Damien's comments > = > v4: Addressing Ville's review comments > -general cleanup > -updating only base addr instead of calling update_primary_plane > -extending patch for gen5+ platforms > = > Signed-off-by: Sourab Gupta > Signed-off-by: Akash Goel > --- > drivers/gpu/drm/i915/i915_dma.c | 1 + > drivers/gpu/drm/i915/i915_drv.h | 6 ++ > drivers/gpu/drm/i915/i915_gem.c | 2 +- > drivers/gpu/drm/i915/i915_irq.c | 2 + > drivers/gpu/drm/i915/intel_display.c | 115 +++++++++++++++++++++++++++++= ++++++ > drivers/gpu/drm/i915/intel_drv.h | 6 ++ > 6 files changed, 131 insertions(+), 1 deletion(-) > = > diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_= dma.c > index 46f1dec..672c28f 100644 > --- a/drivers/gpu/drm/i915/i915_dma.c > +++ b/drivers/gpu/drm/i915/i915_dma.c > @@ -1570,6 +1570,7 @@ int i915_driver_load(struct drm_device *dev, unsign= ed long flags) > spin_lock_init(&dev_priv->backlight_lock); > spin_lock_init(&dev_priv->uncore.lock); > spin_lock_init(&dev_priv->mm.object_stat_lock); > + spin_lock_init(&dev_priv->mmio_flip_lock); > dev_priv->ring_index =3D 0; > mutex_init(&dev_priv->dpio_lock); > mutex_init(&dev_priv->modeset_restore_lock); > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_= drv.h > index 4006dfe..38c0820 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -1543,6 +1543,8 @@ struct drm_i915_private { > struct i915_ums_state ums; > /* the indicator for dispatch video commands on two BSD rings */ > int ring_index; > + /* protects the mmio flip data */ > + spinlock_t mmio_flip_lock; > }; > = > static inline struct drm_i915_private *to_i915(const struct drm_device *= dev) > @@ -2209,6 +2211,7 @@ bool i915_gem_retire_requests(struct drm_device *de= v); > void i915_gem_retire_requests_ring(struct intel_ring_buffer *ring); > int __must_check i915_gem_check_wedge(struct i915_gpu_error *error, > bool interruptible); > +int i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno); > static inline bool i915_reset_in_progress(struct i915_gpu_error *error) > { > return unlikely(atomic_read(&error->reset_counter) > @@ -2580,6 +2583,9 @@ int i915_reg_read_ioctl(struct drm_device *dev, voi= d *data, > int i915_get_reset_stats_ioctl(struct drm_device *dev, void *data, > struct drm_file *file); > = > +void intel_notify_mmio_flip(struct drm_device *dev, > + struct intel_ring_buffer *ring); > + > /* overlay */ > extern struct intel_overlay_error_state *intel_overlay_capture_error_sta= te(struct drm_device *dev); > extern void intel_overlay_print_error_state(struct drm_i915_error_state_= buf *e, > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_= gem.c > index fa5b5ab..5b4e953 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -975,7 +975,7 @@ i915_gem_check_wedge(struct i915_gpu_error *error, > * Compare seqno against outstanding lazy request. Emit a request if the= y are > * equal. > */ > -static int > +int > i915_gem_check_olr(struct intel_ring_buffer *ring, u32 seqno) > { > int ret; > diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_= irq.c > index b10fbde..a353693 100644 > --- a/drivers/gpu/drm/i915/i915_irq.c > +++ b/drivers/gpu/drm/i915/i915_irq.c > @@ -1084,6 +1084,8 @@ static void notify_ring(struct drm_device *dev, > = > trace_i915_gem_request_complete(ring); > = > + intel_notify_mmio_flip(dev, ring); > + Hmm. How badly is this going to explode with UMS? > wake_up_all(&ring->irq_queue); > i915_queue_hangcheck(dev); > } > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/= intel_display.c > index 0f8f9bc..9d190c2 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -9037,6 +9037,110 @@ err: > return ret; > } > = > +static void intel_do_mmio_flip(struct drm_device *dev, > + struct drm_crtc *crtc) nit: no need to pass dev and crtc. You can dig out dev_priv from the crtc in the function. Passing intel_crtc instead of drm_crtc could also avoid some extra variables. > +{ > + struct drm_i915_private *dev_priv =3D dev->dev_private; > + struct drm_i915_gem_object *obj; > + struct intel_framebuffer *intel_fb; > + struct intel_crtc *intel_crtc =3D to_intel_crtc(crtc); > + int plane =3D intel_crtc->plane; > + > + intel_mark_page_flip_active(intel_crtc); > + > + intel_fb =3D to_intel_framebuffer(crtc->primary->fb); > + obj =3D intel_fb->obj; nit: could be done as part of the declaration > + > + /* Update the base address reg for the plane */ Useless comment. > + I915_WRITE(DSPSURF(plane), i915_gem_obj_ggtt_offset(obj) + > + intel_crtc->dspaddr_offset); Should probably have a POSTING_READ() here. > +} > + > +static bool intel_postpone_flip(struct drm_i915_gem_object *obj) > +{ > + if (!obj->ring) > + return false; > + > + if (i915_seqno_passed(obj->ring->get_seqno(obj->ring, false), Still not using lazy_coherency. > + obj->last_write_seqno)) > + return false; > + > + i915_gem_check_olr(obj->ring, obj->last_write_seqno); Error handling is missing. In fact if we get an error from this I guess we should fail .queue_flip(). A bool return isn't sufficient to convey both errors and whether we need to wait for the GPU or not. I guess you could encode that into an int with >0,=3D=3D0,<0 meaning different things, or you may just need to inline this stuff into intel_queue_mmio_flip(). > + > + if (WARN_ON(!obj->ring->irq_get(obj->ring))) > + return false; > + > + return true; > +} > + > +void intel_notify_mmio_flip(struct drm_device *dev, > + struct intel_ring_buffer *ring) Again could just pass ring and dig out dev from it. > +{ > + struct drm_i915_private *dev_priv =3D dev->dev_private; > + struct drm_crtc *crtc; > + unsigned long irq_flags; > + u32 seqno; > + > + seqno =3D ring->get_seqno(ring, false); > + > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > + for_each_crtc(dev, crtc) { > + struct intel_crtc *intel_crtc; Could use for_each_intel_crtc() to avoid some extra variables. > + struct intel_mmio_flip *mmio_flip_data; > + > + intel_crtc =3D to_intel_crtc(crtc); > + mmio_flip_data =3D &intel_crtc->mmio_flip_data; > + > + if (mmio_flip_data->seqno =3D=3D 0) > + continue; > + if (ring->id !=3D mmio_flip_data->ring_id) > + continue; > + > + if (i915_seqno_passed(seqno, mmio_flip_data->seqno)) { > + intel_do_mmio_flip(dev, crtc); > + mmio_flip_data->seqno =3D 0; > + ring->irq_put(ring); > + } > + } > + > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > +} > + > +static int intel_queue_mmio_flip(struct drm_device *dev, > + struct drm_crtc *crtc, > + struct drm_framebuffer *fb, > + struct drm_i915_gem_object *obj, > + uint32_t flags) > +{ > + struct drm_i915_private *dev_priv =3D dev->dev_private; > + struct intel_crtc *intel_crtc =3D to_intel_crtc(crtc); > + unsigned long irq_flags; > + int ret; > + > + ret =3D intel_pin_and_fence_fb_obj(dev, obj, obj->ring); > + if (ret) > + goto err; > + > + if (!intel_postpone_flip(obj)) { > + intel_do_mmio_flip(dev, crtc); > + return 0; > + } > + > + spin_lock_irqsave(&dev_priv->mmio_flip_lock, irq_flags); > + intel_crtc->mmio_flip_data.seqno =3D obj->last_write_seqno; > + intel_crtc->mmio_flip_data.ring_id =3D obj->ring->id; > + spin_unlock_irqrestore(&dev_priv->mmio_flip_lock, irq_flags); > + > + /* Double check to catch cases where irq fired before > + * mmio flip data was ready > + */ > + intel_notify_mmio_flip(dev, obj->ring); > + return 0; > + > +err: > + return ret; > +} > + > static int intel_gen7_queue_flip(struct drm_device *dev, > struct drm_crtc *crtc, > struct drm_framebuffer *fb, > @@ -11377,7 +11481,18 @@ static void intel_init_display(struct drm_device= *dev) > break; > } > = > + /* Using MMIO based flips starting from Gen5, for Media power well > + * residency optimization. This is not currently being used for > + * older platforms because of non-availability of flip done interrupt. > + * The other alternative of having Render ring based flip calls is > + * not being used, as the performance(FPS) of certain 3D Apps gets > + * severly affected. > + */ > + if (INTEL_INFO(dev)->gen >=3D 5) > + dev_priv->display.queue_flip =3D intel_queue_mmio_flip; I still think we need a module param for this, and default to CS for now (except maybe for VLV). > + > intel_panel_init_backlight_funcs(dev); > + > } > = > /* > diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/inte= l_drv.h > index 32a74e1..7953ed6 100644 > --- a/drivers/gpu/drm/i915/intel_drv.h > +++ b/drivers/gpu/drm/i915/intel_drv.h > @@ -351,6 +351,11 @@ struct intel_pipe_wm { > bool sprites_scaled; > }; > = > +struct intel_mmio_flip { > + u32 seqno; > + u32 ring_id; > +}; > + > struct intel_crtc { > struct drm_crtc base; > enum pipe pipe; > @@ -403,6 +408,7 @@ struct intel_crtc { > } wm; > = > wait_queue_head_t vbl_wait; > + struct intel_mmio_flip mmio_flip_data; ^ Should be a space, not tab. > }; > = > struct intel_plane_wm_parameters { > -- = > 1.8.5.1 -- = Ville Syrj=E4l=E4 Intel OTC