From mboxrd@z Thu Jan 1 00:00:00 1970 From: Ville =?iso-8859-1?Q?Syrj=E4l=E4?= Subject: Re: [PATCH 4/5] drm/i915: properly set HSW WM_LP watermarks Date: Wed, 29 May 2013 19:24:49 +0300 Message-ID: <20130529162449.GT5004@intel.com> References: <20130524161134.GI5004@intel.com> <1369433112-4476-1-git-send-email-przanoni@gmail.com> <20130529160615.GR5004@intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by gabe.freedesktop.org (Postfix) with ESMTP id 7E43DE64AF for ; Wed, 29 May 2013 09:25:00 -0700 (PDT) Content-Disposition: inline In-Reply-To: <20130529160615.GR5004@intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org To: Paulo Zanoni Cc: intel-gfx@lists.freedesktop.org, Paulo Zanoni List-Id: intel-gfx@lists.freedesktop.org On Wed, May 29, 2013 at 07:06:15PM +0300, Ville Syrj=E4l=E4 wrote: > On Fri, May 24, 2013 at 07:05:12PM -0300, Paulo Zanoni wrote: > > From: Paulo Zanoni > > = > > We were previously only setting the WM_PIPE registers, now we are > > setting the LP watermark registers. This should allow deeper PC > > states, resulting in power savings. > > = > > We're only using 1/2 data buffer partitioning for now. > > = > > v2: Merge both hsw_compute_pri_wm_* functions (Ville) > > = > > Signed-off-by: Paulo Zanoni > > --- > > drivers/gpu/drm/i915/i915_reg.h | 4 + > > drivers/gpu/drm/i915/intel_pm.c | 201 ++++++++++++++++++++++++++++++++= ++++---- > > 2 files changed, 187 insertions(+), 18 deletions(-) > > = > > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i91= 5_reg.h > > index e86606c..58230ea 100644 > > --- a/drivers/gpu/drm/i915/i915_reg.h > > +++ b/drivers/gpu/drm/i915/i915_reg.h > > @@ -3057,6 +3057,10 @@ > > #define WM3S_LP_IVB 0x45128 > > #define WM1S_LP_EN (1<<31) > > = > > +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \ > > + (WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \ > > + ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur)) > > + > > /* Memory latency timer register */ > > #define MLTR_ILK 0x11222 > > #define MLTR_WM1_SHIFT 0 > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/int= el_pm.c > > index ef58a1a..872e2a8 100644 > > --- a/drivers/gpu/drm/i915/intel_pm.c > > +++ b/drivers/gpu/drm/i915/intel_pm.c > > @@ -2129,6 +2129,12 @@ static uint32_t hsw_wm_method2(uint32_t pixel_ra= te, uint32_t pipe_htotal, > > return ret; > > } > > = > > +static uint32_t hsw_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels, > > + uint8_t bytes_per_pixel) > > +{ > > + return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2; > > +} > > + > > struct hsw_pipe_wm_parameters { > > bool active; > > bool sprite_enabled; > > @@ -2142,11 +2148,28 @@ struct hsw_pipe_wm_parameters { > > uint32_t pixel_rate; > > }; > > = > > +struct hsw_wm_maximums { > > + uint16_t pri; > > + uint16_t spr; > > + uint16_t cur; > > + uint16_t fbc; > > +}; > > + > > +struct hsw_lp_wm_result { > > + bool enable; > > + bool fbc_enable; > > + uint32_t pri_val; > > + uint32_t spr_val; > > + uint32_t cur_val; > > + uint32_t fbc_val; > > +}; > > + > > struct hsw_wm_values { > > uint32_t wm_pipe[3]; > > uint32_t wm_lp[3]; > > uint32_t wm_lp_spr[3]; > > uint32_t wm_linetime[3]; > > + bool enable_fbc_wm; > > }; > > = > > enum hsw_data_buf_partitioning { > > @@ -2154,17 +2177,31 @@ enum hsw_data_buf_partitioning { > > HSW_DATA_BUF_PART_5_6, > > }; > > = > > -/* Only for WM_PIPE. */ > > -static uint32_t hsw_compute_pri_wm_pipe(struct hsw_pipe_wm_parameters = *params, > > - uint32_t mem_value) > > +/* For both WM_PIPE and WM_LP. */ > > +static uint32_t hsw_compute_pri_wm(struct hsw_pipe_wm_parameters *para= ms, > > + uint32_t mem_value, > > + bool is_lp) > > { > > + uint32_t method1, method2; > > + > > /* TODO: for now, assume the primary plane is always enabled. */ > > if (!params->active) > > return 0; > > = > > - return hsw_wm_method1(params->pixel_rate, > > - params->pri_bytes_per_pixel, > > - mem_value); > > + method1 =3D hsw_wm_method1(params->pixel_rate, > > + params->pri_bytes_per_pixel, > > + mem_value); > > + > > + if (!is_lp) > > + return method1; > > + > > + method2 =3D hsw_wm_method2(params->pixel_rate, > > + params->pipe_htotal, > > + params->pri_horiz_pixels, > > + params->pri_bytes_per_pixel, > > + mem_value); > > + > > + return min(method1, method2); > > } > > = > > /* For both WM_PIPE and WM_LP. */ > > @@ -2201,13 +2238,59 @@ static uint32_t hsw_compute_cur_wm(struct hsw_p= ipe_wm_parameters *params, > > mem_value); > > } > > = > > +/* Only for WM_LP. */ > > +static uint32_t hsw_compute_fbc_wm(struct hsw_pipe_wm_parameters *para= ms, > > + uint32_t pri_val, > > + uint32_t mem_value) > > +{ > > + if (!params->active) > > + return 0; > > + > > + return hsw_wm_fbc(pri_val, > > + params->pri_horiz_pixels, > > + params->pri_bytes_per_pixel); > > +} > > + > > +static void hsw_compute_lp_wm(uint32_t mem_value, struct hsw_wm_maximu= ms *max, > > + struct hsw_pipe_wm_parameters *params, > > + struct hsw_lp_wm_result *result) > > +{ > > + enum pipe pipe; > > + uint32_t pri_val[3], spr_val[3], cur_val[3], fbc_val[3]; > > + > > + for (pipe =3D PIPE_A; pipe <=3D PIPE_C; pipe++) { > > + struct hsw_pipe_wm_parameters *p =3D ¶ms[pipe]; > > + > > + pri_val[pipe] =3D hsw_compute_pri_wm(p, mem_value, true); > > + spr_val[pipe] =3D hsw_compute_spr_wm(p, mem_value); > > + cur_val[pipe] =3D hsw_compute_cur_wm(p, mem_value); > > + fbc_val[pipe] =3D hsw_compute_fbc_wm(p, pri_val[pipe], mem_value); > > + } > > + > > + result->pri_val =3D max3(pri_val[0], pri_val[1], pri_val[2]); > > + result->spr_val =3D max3(spr_val[0], spr_val[1], spr_val[2]); > > + result->cur_val =3D max3(cur_val[0], cur_val[1], cur_val[2]); > > + result->fbc_val =3D max3(fbc_val[0], fbc_val[1], fbc_val[2]); > > + > > + if (result->fbc_val > max->fbc) { > > + result->fbc_enable =3D false; > > + result->fbc_val =3D 0; > > + } else { > > + result->fbc_enable =3D true; > > + } > > + > > + result->enable =3D result->pri_val <=3D max->pri && > > + result->spr_val <=3D max->spr && > > + result->cur_val <=3D max->cur; > > +} > > + > > static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv, > > uint32_t mem_value, enum pipe pipe, > > struct hsw_pipe_wm_parameters *params) > > { > > uint32_t pri_val, cur_val, spr_val; > > = > > - pri_val =3D hsw_compute_pri_wm_pipe(params, mem_value); > > + pri_val =3D hsw_compute_pri_wm(params, mem_value, false); > > spr_val =3D hsw_compute_spr_wm(params, mem_value); > > cur_val =3D hsw_compute_cur_wm(params, mem_value); > > = > > @@ -2250,13 +2333,15 @@ hsw_compute_linetime_wm(struct drm_device *dev,= struct drm_crtc *crtc) > > = > > static void hsw_compute_wm_parameters(struct drm_device *dev, > > struct hsw_pipe_wm_parameters *params, > > - uint32_t *wm) > > + uint32_t *wm, > > + struct hsw_wm_maximums *lp_max_1_2) > > { > > struct drm_i915_private *dev_priv =3D dev->dev_private; > > struct drm_crtc *crtc; > > struct drm_plane *plane; > > uint64_t sskpd =3D I915_READ64(MCH_SSKPD); > > enum pipe pipe; > > + int pipes_active =3D 0, sprites_enabled =3D 0; > > = > > if ((sskpd >> 56) & 0xFF) > > wm[0] =3D (sskpd >> 56) & 0xFF; > > @@ -2278,6 +2363,8 @@ static void hsw_compute_wm_parameters(struct drm_= device *dev, > > if (!p->active) > > continue; > > = > > + pipes_active++; > > + > > p->pipe_htotal =3D intel_crtc->config.adjusted_mode.htotal; > > p->pixel_rate =3D hsw_wm_get_pixel_rate(dev, crtc); > > p->pri_bytes_per_pixel =3D crtc->fb->bits_per_pixel / 8; > > @@ -2297,25 +2384,89 @@ static void hsw_compute_wm_parameters(struct dr= m_device *dev, > > p->sprite_enabled =3D intel_plane->wm.enable; > > p->spr_bytes_per_pixel =3D intel_plane->wm.bytes_per_pixel; > > p->spr_horiz_pixels =3D intel_plane->wm.horiz_pixels; > > + > > + if (p->sprite_enabled) > > + sprites_enabled++; > > + } > > + > > + if (pipes_active > 1) { > > + lp_max_1_2->pri =3D sprites_enabled ? 128 : 256; > > + lp_max_1_2->spr =3D 128; > > + lp_max_1_2->cur =3D 64; > > + } else { > > + lp_max_1_2->pri =3D sprites_enabled ? 384 : 768; > > + lp_max_1_2->spr =3D 384; > > + lp_max_1_2->cur =3D 255; > > } > > + lp_max_1_2->fbc =3D 15; > > } > > = > > static void hsw_compute_wm_results(struct drm_device *dev, > > struct hsw_pipe_wm_parameters *params, > > uint32_t *wm, > > + struct hsw_wm_maximums *lp_maximums, > > struct hsw_wm_values *results) > > { > > struct drm_i915_private *dev_priv =3D dev->dev_private; > > struct drm_crtc *crtc; > > + struct hsw_lp_wm_result lp_results[4]; > > enum pipe pipe; > > + int i; > > = > > - /* No support for LP WMs yet. */ > > - results->wm_lp[2] =3D 0; > > - results->wm_lp[1] =3D 0; > > - results->wm_lp[0] =3D 0; > > - results->wm_lp_spr[2] =3D 0; > > - results->wm_lp_spr[1] =3D 0; > > - results->wm_lp_spr[0] =3D 0; > > + hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]); > > + hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]); > > + hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]); > > + hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]); > > + > > + /* The spec says it is preferred to disable FBC WMs instead of disabl= ing > > + * a WM level. */ > > + results->enable_fbc_wm =3D true; > > + for (i =3D 0; i < 4; i++) { > > + if (lp_results[i].enable && !lp_results[i].fbc_enable) { > > + results->enable_fbc_wm =3D false; > > + break; > > + } > > + } > > + > > + if (lp_results[3].enable) { > = > Here you seem to assume that if lp[3] is valid, that also lp[2] and lp[0] > are valid... > = > > + results->wm_lp[2] =3D HSW_WM_LP_VAL(8, lp_results[3].fbc_val, > > + lp_results[3].pri_val, > > + lp_results[3].cur_val); > > + results->wm_lp_spr[2] =3D lp_results[3].spr_val; > > + } else if (lp_results[2].enable) { > > + results->wm_lp[2] =3D HSW_WM_LP_VAL(6, lp_results[2].fbc_val, > > + lp_results[2].pri_val, > > + lp_results[2].cur_val); > > + results->wm_lp_spr[2] =3D lp_results[2].spr_val; > > + } else { > > + results->wm_lp[2] =3D 0; > > + results->wm_lp_spr[2] =3D 0; > > + } > > + > > + if (lp_results[3].enable && lp_results[2].enable) { > = > ... but here you check if lp[2] is actually valid. > = > So it seems that in theory (if the latency values are really weird) the c= ode > could enable LP3 and leave LP2 or LP1 disabled, which is illegal. > = > I think this could be cleared up a bit like so: > struct hsw_lp_wm_result lp_results[4] =3D {}; > = > for (level =3D 1; level <=3D 4; level++) > if (!hsw_compute_lp_wm(wm[level], lp_maximums, params, &lp_results[l= evel-1])) > break; > = > and obviously have hsw_compute_lp_wm() return result->enable. It could > also save a few cycles by not computing watermark levels that will never > be used. > = > = > To simplify the results handling you could take another page out of my > WM patch. Something like this: > = > memset(results, 0, sizeof *results); > = > for (wm_lp =3D 1; wm_lp <=3D 3; wm_lp++) { > int level =3D wm_lp + (wm_lp >=3D 2 && lp_results[3].enable) > const struct hsw_lp_wm_result *r =3D &lp_results[level-1]; > = > if (!r->enable) > break; > = > results->wm_lp[wm_lp] =3D HSW_WM_LP_VAL(level << 1, r->fbc_val, r->pri= _val, r->cur_val); > results->wm_lp_spr[wm_lp] =3D r->spr_val; > } And I forgot the -1 from the results->foo[wm_lp] indexing. That's one = reason I don't quite like this split between LP0/level=3D0 and and LP1+/level>=3D1; The indexes no longer match the LP/level. But I think we can ignore that for now and clear it up later. > = > Quite a bit less code, and avoids all those awkward checks for which > levels are valid. > = > > + results->wm_lp[1] =3D HSW_WM_LP_VAL(6, lp_results[2].fbc_val, > > + lp_results[2].pri_val, > > + lp_results[2].cur_val); > > + results->wm_lp_spr[1] =3D lp_results[2].spr_val; > > + } else if (!lp_results[3].enable && lp_results[1].enable) { > > + results->wm_lp[1] =3D HSW_WM_LP_VAL(4, lp_results[1].fbc_val, > > + lp_results[1].pri_val, > > + lp_results[1].cur_val); > > + results->wm_lp_spr[1] =3D lp_results[1].spr_val; > > + } else { > > + results->wm_lp[1] =3D 0; > > + results->wm_lp_spr[1] =3D 0; > > + } > > + > > + if (lp_results[0].enable) { > > + results->wm_lp[0] =3D HSW_WM_LP_VAL(2, lp_results[0].fbc_val, > > + lp_results[0].pri_val, > > + lp_results[0].cur_val); > > + results->wm_lp_spr[0] =3D lp_results[0].spr_val; > > + } else { > > + results->wm_lp[0] =3D 0; > > + results->wm_lp_spr[0] =3D 0; > > + } > > = > > for_each_pipe(pipe) > > results->wm_pipe[pipe] =3D hsw_compute_wm_pipe(dev_priv, wm[0], > > @@ -2339,6 +2490,7 @@ static void hsw_write_wm_values(struct drm_i915_p= rivate *dev_priv, > > struct hsw_wm_values previous; > > uint32_t val; > > enum hsw_data_buf_partitioning prev_partitioning; > > + bool prev_enable_fbc_wm; > > = > > previous.wm_pipe[0] =3D I915_READ(WM0_PIPEA_ILK); > > previous.wm_pipe[1] =3D I915_READ(WM0_PIPEB_ILK); > > @@ -2356,11 +2508,14 @@ static void hsw_write_wm_values(struct drm_i915= _private *dev_priv, > > prev_partitioning =3D (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_= 6) ? > > HSW_DATA_BUF_PART_5_6 : HSW_DATA_BUF_PART_1_2; > > = > > + prev_enable_fbc_wm =3D !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS); > > + > > if (memcmp(results->wm_pipe, previous.wm_pipe, 3) =3D=3D 0 && > > memcmp(results->wm_lp, previous.wm_lp, 3) =3D=3D 0 && > > memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) =3D=3D 0 && > > memcmp(results->wm_linetime, previous.wm_linetime, 3) =3D=3D 0 && > > - partitioning =3D=3D prev_partitioning) > > + partitioning =3D=3D prev_partitioning && > > + results->enable_fbc_wm =3D=3D prev_enable_fbc_wm) > > return; > > = > > if (previous.wm_lp[2] !=3D 0) > > @@ -2393,6 +2548,15 @@ static void hsw_write_wm_values(struct drm_i915_= private *dev_priv, > > I915_WRITE(WM_MISC, val); > > } > > = > > + if (prev_enable_fbc_wm !=3D results->enable_fbc_wm) { > > + val =3D I915_READ(DISP_ARB_CTL); > > + if (results->enable_fbc_wm) > > + val &=3D ~DISP_FBC_WM_DIS; > > + else > > + val |=3D DISP_FBC_WM_DIS; > > + I915_WRITE(DISP_ARB_CTL, val); > > + } > > + > > if (previous.wm_lp_spr[0] !=3D results->wm_lp_spr[0]) > > I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]); > > if (previous.wm_lp_spr[1] !=3D results->wm_lp_spr[1]) > > @@ -2411,12 +2575,13 @@ static void hsw_write_wm_values(struct drm_i915= _private *dev_priv, > > static void haswell_update_wm(struct drm_device *dev) > > { > > struct drm_i915_private *dev_priv =3D dev->dev_private; > > + struct hsw_wm_maximums lp_max_1_2; > > struct hsw_pipe_wm_parameters params[3]; > > struct hsw_wm_values results; > > uint32_t wm[5]; > > = > > - hsw_compute_wm_parameters(dev, params, wm); > > - hsw_compute_wm_results(dev, params, wm, &results); > > + hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2); > > + hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results); > > hsw_write_wm_values(dev_priv, &results, HSW_DATA_BUF_PART_1_2); > > } > > = > > -- = > > 1.8.1.2 > > = > > _______________________________________________ > > Intel-gfx mailing list > > Intel-gfx@lists.freedesktop.org > > http://lists.freedesktop.org/mailman/listinfo/intel-gfx > = > -- = > Ville Syrj=E4l=E4 > Intel OTC > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- = Ville Syrj=E4l=E4 Intel OTC