From: "Ville Syrjälä" <ville.syrjala@linux.intel.com>
To: Paulo Zanoni <przanoni@gmail.com>
Cc: intel-gfx@lists.freedesktop.org, Paulo Zanoni <paulo.r.zanoni@intel.com>
Subject: Re: [PATCH 4/5] drm/i915: properly set HSW WM_LP watermarks
Date: Wed, 29 May 2013 19:24:49 +0300 [thread overview]
Message-ID: <20130529162449.GT5004@intel.com> (raw)
In-Reply-To: <20130529160615.GR5004@intel.com>
On Wed, May 29, 2013 at 07:06:15PM +0300, Ville Syrjälä wrote:
> On Fri, May 24, 2013 at 07:05:12PM -0300, Paulo Zanoni wrote:
> > From: Paulo Zanoni <paulo.r.zanoni@intel.com>
> >
> > We were previously only setting the WM_PIPE registers, now we are
> > setting the LP watermark registers. This should allow deeper PC
> > states, resulting in power savings.
> >
> > We're only using 1/2 data buffer partitioning for now.
> >
> > v2: Merge both hsw_compute_pri_wm_* functions (Ville)
> >
> > Signed-off-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_reg.h | 4 +
> > drivers/gpu/drm/i915/intel_pm.c | 201 ++++++++++++++++++++++++++++++++++++----
> > 2 files changed, 187 insertions(+), 18 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index e86606c..58230ea 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -3057,6 +3057,10 @@
> > #define WM3S_LP_IVB 0x45128
> > #define WM1S_LP_EN (1<<31)
> >
> > +#define HSW_WM_LP_VAL(lat, fbc, pri, cur) \
> > + (WM3_LP_EN | ((lat) << WM1_LP_LATENCY_SHIFT) | \
> > + ((fbc) << WM1_LP_FBC_SHIFT) | ((pri) << WM1_LP_SR_SHIFT) | (cur))
> > +
> > /* Memory latency timer register */
> > #define MLTR_ILK 0x11222
> > #define MLTR_WM1_SHIFT 0
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index ef58a1a..872e2a8 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -2129,6 +2129,12 @@ static uint32_t hsw_wm_method2(uint32_t pixel_rate, uint32_t pipe_htotal,
> > return ret;
> > }
> >
> > +static uint32_t hsw_wm_fbc(uint32_t pri_val, uint32_t horiz_pixels,
> > + uint8_t bytes_per_pixel)
> > +{
> > + return DIV_ROUND_UP(pri_val * 64, horiz_pixels * bytes_per_pixel) + 2;
> > +}
> > +
> > struct hsw_pipe_wm_parameters {
> > bool active;
> > bool sprite_enabled;
> > @@ -2142,11 +2148,28 @@ struct hsw_pipe_wm_parameters {
> > uint32_t pixel_rate;
> > };
> >
> > +struct hsw_wm_maximums {
> > + uint16_t pri;
> > + uint16_t spr;
> > + uint16_t cur;
> > + uint16_t fbc;
> > +};
> > +
> > +struct hsw_lp_wm_result {
> > + bool enable;
> > + bool fbc_enable;
> > + uint32_t pri_val;
> > + uint32_t spr_val;
> > + uint32_t cur_val;
> > + uint32_t fbc_val;
> > +};
> > +
> > struct hsw_wm_values {
> > uint32_t wm_pipe[3];
> > uint32_t wm_lp[3];
> > uint32_t wm_lp_spr[3];
> > uint32_t wm_linetime[3];
> > + bool enable_fbc_wm;
> > };
> >
> > enum hsw_data_buf_partitioning {
> > @@ -2154,17 +2177,31 @@ enum hsw_data_buf_partitioning {
> > HSW_DATA_BUF_PART_5_6,
> > };
> >
> > -/* Only for WM_PIPE. */
> > -static uint32_t hsw_compute_pri_wm_pipe(struct hsw_pipe_wm_parameters *params,
> > - uint32_t mem_value)
> > +/* For both WM_PIPE and WM_LP. */
> > +static uint32_t hsw_compute_pri_wm(struct hsw_pipe_wm_parameters *params,
> > + uint32_t mem_value,
> > + bool is_lp)
> > {
> > + uint32_t method1, method2;
> > +
> > /* TODO: for now, assume the primary plane is always enabled. */
> > if (!params->active)
> > return 0;
> >
> > - return hsw_wm_method1(params->pixel_rate,
> > - params->pri_bytes_per_pixel,
> > - mem_value);
> > + method1 = hsw_wm_method1(params->pixel_rate,
> > + params->pri_bytes_per_pixel,
> > + mem_value);
> > +
> > + if (!is_lp)
> > + return method1;
> > +
> > + method2 = hsw_wm_method2(params->pixel_rate,
> > + params->pipe_htotal,
> > + params->pri_horiz_pixels,
> > + params->pri_bytes_per_pixel,
> > + mem_value);
> > +
> > + return min(method1, method2);
> > }
> >
> > /* For both WM_PIPE and WM_LP. */
> > @@ -2201,13 +2238,59 @@ static uint32_t hsw_compute_cur_wm(struct hsw_pipe_wm_parameters *params,
> > mem_value);
> > }
> >
> > +/* Only for WM_LP. */
> > +static uint32_t hsw_compute_fbc_wm(struct hsw_pipe_wm_parameters *params,
> > + uint32_t pri_val,
> > + uint32_t mem_value)
> > +{
> > + if (!params->active)
> > + return 0;
> > +
> > + return hsw_wm_fbc(pri_val,
> > + params->pri_horiz_pixels,
> > + params->pri_bytes_per_pixel);
> > +}
> > +
> > +static void hsw_compute_lp_wm(uint32_t mem_value, struct hsw_wm_maximums *max,
> > + struct hsw_pipe_wm_parameters *params,
> > + struct hsw_lp_wm_result *result)
> > +{
> > + enum pipe pipe;
> > + uint32_t pri_val[3], spr_val[3], cur_val[3], fbc_val[3];
> > +
> > + for (pipe = PIPE_A; pipe <= PIPE_C; pipe++) {
> > + struct hsw_pipe_wm_parameters *p = ¶ms[pipe];
> > +
> > + pri_val[pipe] = hsw_compute_pri_wm(p, mem_value, true);
> > + spr_val[pipe] = hsw_compute_spr_wm(p, mem_value);
> > + cur_val[pipe] = hsw_compute_cur_wm(p, mem_value);
> > + fbc_val[pipe] = hsw_compute_fbc_wm(p, pri_val[pipe], mem_value);
> > + }
> > +
> > + result->pri_val = max3(pri_val[0], pri_val[1], pri_val[2]);
> > + result->spr_val = max3(spr_val[0], spr_val[1], spr_val[2]);
> > + result->cur_val = max3(cur_val[0], cur_val[1], cur_val[2]);
> > + result->fbc_val = max3(fbc_val[0], fbc_val[1], fbc_val[2]);
> > +
> > + if (result->fbc_val > max->fbc) {
> > + result->fbc_enable = false;
> > + result->fbc_val = 0;
> > + } else {
> > + result->fbc_enable = true;
> > + }
> > +
> > + result->enable = result->pri_val <= max->pri &&
> > + result->spr_val <= max->spr &&
> > + result->cur_val <= max->cur;
> > +}
> > +
> > static uint32_t hsw_compute_wm_pipe(struct drm_i915_private *dev_priv,
> > uint32_t mem_value, enum pipe pipe,
> > struct hsw_pipe_wm_parameters *params)
> > {
> > uint32_t pri_val, cur_val, spr_val;
> >
> > - pri_val = hsw_compute_pri_wm_pipe(params, mem_value);
> > + pri_val = hsw_compute_pri_wm(params, mem_value, false);
> > spr_val = hsw_compute_spr_wm(params, mem_value);
> > cur_val = hsw_compute_cur_wm(params, mem_value);
> >
> > @@ -2250,13 +2333,15 @@ hsw_compute_linetime_wm(struct drm_device *dev, struct drm_crtc *crtc)
> >
> > static void hsw_compute_wm_parameters(struct drm_device *dev,
> > struct hsw_pipe_wm_parameters *params,
> > - uint32_t *wm)
> > + uint32_t *wm,
> > + struct hsw_wm_maximums *lp_max_1_2)
> > {
> > struct drm_i915_private *dev_priv = dev->dev_private;
> > struct drm_crtc *crtc;
> > struct drm_plane *plane;
> > uint64_t sskpd = I915_READ64(MCH_SSKPD);
> > enum pipe pipe;
> > + int pipes_active = 0, sprites_enabled = 0;
> >
> > if ((sskpd >> 56) & 0xFF)
> > wm[0] = (sskpd >> 56) & 0xFF;
> > @@ -2278,6 +2363,8 @@ static void hsw_compute_wm_parameters(struct drm_device *dev,
> > if (!p->active)
> > continue;
> >
> > + pipes_active++;
> > +
> > p->pipe_htotal = intel_crtc->config.adjusted_mode.htotal;
> > p->pixel_rate = hsw_wm_get_pixel_rate(dev, crtc);
> > p->pri_bytes_per_pixel = crtc->fb->bits_per_pixel / 8;
> > @@ -2297,25 +2384,89 @@ static void hsw_compute_wm_parameters(struct drm_device *dev,
> > p->sprite_enabled = intel_plane->wm.enable;
> > p->spr_bytes_per_pixel = intel_plane->wm.bytes_per_pixel;
> > p->spr_horiz_pixels = intel_plane->wm.horiz_pixels;
> > +
> > + if (p->sprite_enabled)
> > + sprites_enabled++;
> > + }
> > +
> > + if (pipes_active > 1) {
> > + lp_max_1_2->pri = sprites_enabled ? 128 : 256;
> > + lp_max_1_2->spr = 128;
> > + lp_max_1_2->cur = 64;
> > + } else {
> > + lp_max_1_2->pri = sprites_enabled ? 384 : 768;
> > + lp_max_1_2->spr = 384;
> > + lp_max_1_2->cur = 255;
> > }
> > + lp_max_1_2->fbc = 15;
> > }
> >
> > static void hsw_compute_wm_results(struct drm_device *dev,
> > struct hsw_pipe_wm_parameters *params,
> > uint32_t *wm,
> > + struct hsw_wm_maximums *lp_maximums,
> > struct hsw_wm_values *results)
> > {
> > struct drm_i915_private *dev_priv = dev->dev_private;
> > struct drm_crtc *crtc;
> > + struct hsw_lp_wm_result lp_results[4];
> > enum pipe pipe;
> > + int i;
> >
> > - /* No support for LP WMs yet. */
> > - results->wm_lp[2] = 0;
> > - results->wm_lp[1] = 0;
> > - results->wm_lp[0] = 0;
> > - results->wm_lp_spr[2] = 0;
> > - results->wm_lp_spr[1] = 0;
> > - results->wm_lp_spr[0] = 0;
> > + hsw_compute_lp_wm(wm[1], lp_maximums, params, &lp_results[0]);
> > + hsw_compute_lp_wm(wm[2], lp_maximums, params, &lp_results[1]);
> > + hsw_compute_lp_wm(wm[3], lp_maximums, params, &lp_results[2]);
> > + hsw_compute_lp_wm(wm[4], lp_maximums, params, &lp_results[3]);
> > +
> > + /* The spec says it is preferred to disable FBC WMs instead of disabling
> > + * a WM level. */
> > + results->enable_fbc_wm = true;
> > + for (i = 0; i < 4; i++) {
> > + if (lp_results[i].enable && !lp_results[i].fbc_enable) {
> > + results->enable_fbc_wm = false;
> > + break;
> > + }
> > + }
> > +
> > + if (lp_results[3].enable) {
>
> Here you seem to assume that if lp[3] is valid, that also lp[2] and lp[0]
> are valid...
>
> > + results->wm_lp[2] = HSW_WM_LP_VAL(8, lp_results[3].fbc_val,
> > + lp_results[3].pri_val,
> > + lp_results[3].cur_val);
> > + results->wm_lp_spr[2] = lp_results[3].spr_val;
> > + } else if (lp_results[2].enable) {
> > + results->wm_lp[2] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> > + lp_results[2].pri_val,
> > + lp_results[2].cur_val);
> > + results->wm_lp_spr[2] = lp_results[2].spr_val;
> > + } else {
> > + results->wm_lp[2] = 0;
> > + results->wm_lp_spr[2] = 0;
> > + }
> > +
> > + if (lp_results[3].enable && lp_results[2].enable) {
>
> ... but here you check if lp[2] is actually valid.
>
> So it seems that in theory (if the latency values are really weird) the code
> could enable LP3 and leave LP2 or LP1 disabled, which is illegal.
>
> I think this could be cleared up a bit like so:
> struct hsw_lp_wm_result lp_results[4] = {};
>
> for (level = 1; level <= 4; level++)
> if (!hsw_compute_lp_wm(wm[level], lp_maximums, params, &lp_results[level-1]))
> break;
>
> and obviously have hsw_compute_lp_wm() return result->enable. It could
> also save a few cycles by not computing watermark levels that will never
> be used.
>
>
> To simplify the results handling you could take another page out of my
> WM patch. Something like this:
>
> memset(results, 0, sizeof *results);
>
> for (wm_lp = 1; wm_lp <= 3; wm_lp++) {
> int level = wm_lp + (wm_lp >= 2 && lp_results[3].enable)
> const struct hsw_lp_wm_result *r = &lp_results[level-1];
>
> if (!r->enable)
> break;
>
> results->wm_lp[wm_lp] = HSW_WM_LP_VAL(level << 1, r->fbc_val, r->pri_val, r->cur_val);
> results->wm_lp_spr[wm_lp] = r->spr_val;
> }
And I forgot the -1 from the results->foo[wm_lp] indexing. That's one
reason I don't quite like this split between LP0/level=0 and and
LP1+/level>=1; The indexes no longer match the LP/level. But I think
we can ignore that for now and clear it up later.
>
> Quite a bit less code, and avoids all those awkward checks for which
> levels are valid.
>
> > + results->wm_lp[1] = HSW_WM_LP_VAL(6, lp_results[2].fbc_val,
> > + lp_results[2].pri_val,
> > + lp_results[2].cur_val);
> > + results->wm_lp_spr[1] = lp_results[2].spr_val;
> > + } else if (!lp_results[3].enable && lp_results[1].enable) {
> > + results->wm_lp[1] = HSW_WM_LP_VAL(4, lp_results[1].fbc_val,
> > + lp_results[1].pri_val,
> > + lp_results[1].cur_val);
> > + results->wm_lp_spr[1] = lp_results[1].spr_val;
> > + } else {
> > + results->wm_lp[1] = 0;
> > + results->wm_lp_spr[1] = 0;
> > + }
> > +
> > + if (lp_results[0].enable) {
> > + results->wm_lp[0] = HSW_WM_LP_VAL(2, lp_results[0].fbc_val,
> > + lp_results[0].pri_val,
> > + lp_results[0].cur_val);
> > + results->wm_lp_spr[0] = lp_results[0].spr_val;
> > + } else {
> > + results->wm_lp[0] = 0;
> > + results->wm_lp_spr[0] = 0;
> > + }
> >
> > for_each_pipe(pipe)
> > results->wm_pipe[pipe] = hsw_compute_wm_pipe(dev_priv, wm[0],
> > @@ -2339,6 +2490,7 @@ static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
> > struct hsw_wm_values previous;
> > uint32_t val;
> > enum hsw_data_buf_partitioning prev_partitioning;
> > + bool prev_enable_fbc_wm;
> >
> > previous.wm_pipe[0] = I915_READ(WM0_PIPEA_ILK);
> > previous.wm_pipe[1] = I915_READ(WM0_PIPEB_ILK);
> > @@ -2356,11 +2508,14 @@ static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
> > prev_partitioning = (I915_READ(WM_MISC) & WM_MISC_DATA_PARTITION_5_6) ?
> > HSW_DATA_BUF_PART_5_6 : HSW_DATA_BUF_PART_1_2;
> >
> > + prev_enable_fbc_wm = !(I915_READ(DISP_ARB_CTL) & DISP_FBC_WM_DIS);
> > +
> > if (memcmp(results->wm_pipe, previous.wm_pipe, 3) == 0 &&
> > memcmp(results->wm_lp, previous.wm_lp, 3) == 0 &&
> > memcmp(results->wm_lp_spr, previous.wm_lp_spr, 3) == 0 &&
> > memcmp(results->wm_linetime, previous.wm_linetime, 3) == 0 &&
> > - partitioning == prev_partitioning)
> > + partitioning == prev_partitioning &&
> > + results->enable_fbc_wm == prev_enable_fbc_wm)
> > return;
> >
> > if (previous.wm_lp[2] != 0)
> > @@ -2393,6 +2548,15 @@ static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
> > I915_WRITE(WM_MISC, val);
> > }
> >
> > + if (prev_enable_fbc_wm != results->enable_fbc_wm) {
> > + val = I915_READ(DISP_ARB_CTL);
> > + if (results->enable_fbc_wm)
> > + val &= ~DISP_FBC_WM_DIS;
> > + else
> > + val |= DISP_FBC_WM_DIS;
> > + I915_WRITE(DISP_ARB_CTL, val);
> > + }
> > +
> > if (previous.wm_lp_spr[0] != results->wm_lp_spr[0])
> > I915_WRITE(WM1S_LP_ILK, results->wm_lp_spr[0]);
> > if (previous.wm_lp_spr[1] != results->wm_lp_spr[1])
> > @@ -2411,12 +2575,13 @@ static void hsw_write_wm_values(struct drm_i915_private *dev_priv,
> > static void haswell_update_wm(struct drm_device *dev)
> > {
> > struct drm_i915_private *dev_priv = dev->dev_private;
> > + struct hsw_wm_maximums lp_max_1_2;
> > struct hsw_pipe_wm_parameters params[3];
> > struct hsw_wm_values results;
> > uint32_t wm[5];
> >
> > - hsw_compute_wm_parameters(dev, params, wm);
> > - hsw_compute_wm_results(dev, params, wm, &results);
> > + hsw_compute_wm_parameters(dev, params, wm, &lp_max_1_2);
> > + hsw_compute_wm_results(dev, params, wm, &lp_max_1_2, &results);
> > hsw_write_wm_values(dev_priv, &results, HSW_DATA_BUF_PART_1_2);
> > }
> >
> > --
> > 1.8.1.2
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Ville Syrjälä
> Intel OTC
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Ville Syrjälä
Intel OTC
next prev parent reply other threads:[~2013-05-29 16:25 UTC|newest]
Thread overview: 29+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-05-24 14:59 [PATCH 0/5] Haswell watermarks Paulo Zanoni
2013-05-24 14:59 ` [PATCH 1/5] drm/i915: add "enable" argument to intel_update_sprite_watermarks Paulo Zanoni
2013-05-24 16:22 ` Ville Syrjälä
2013-05-24 14:59 ` [PATCH 2/5] drm/i915: add haswell_update_sprite_wm Paulo Zanoni
2013-05-24 17:00 ` Ville Syrjälä
2013-05-24 19:35 ` Daniel Vetter
2013-05-24 14:59 ` [PATCH 3/5] drm/i915: properly set HSW WM_PIPE registers Paulo Zanoni
2013-05-24 16:07 ` Ville Syrjälä
2013-05-24 22:00 ` Paulo Zanoni
2013-05-24 22:02 ` Paulo Zanoni
2013-05-27 11:07 ` Ville Syrjälä
2013-05-27 19:21 ` Paulo Zanoni
2013-05-29 15:39 ` Ville Syrjälä
2013-05-31 13:08 ` [PATCH 1/3] " Paulo Zanoni
2013-05-31 15:03 ` Ville Syrjälä
2013-05-24 14:59 ` [PATCH 4/5] drm/i915: properly set HSW WM_LP watermarks Paulo Zanoni
2013-05-24 16:11 ` Ville Syrjälä
2013-05-24 22:05 ` Paulo Zanoni
2013-05-29 16:06 ` Ville Syrjälä
2013-05-29 16:24 ` Ville Syrjälä [this message]
2013-05-31 13:12 ` [PATCH 2/3] " Paulo Zanoni
2013-05-31 13:58 ` Ville Syrjälä
2013-05-31 14:45 ` Paulo Zanoni
2013-05-31 15:05 ` Ville Syrjälä
2013-05-24 14:59 ` [PATCH 5/5] drm/i915: add support for 5/6 data buffer partitioning on Haswell Paulo Zanoni
2013-05-29 16:17 ` Ville Syrjälä
2013-05-31 13:19 ` [PATCH 3/3] " Paulo Zanoni
2013-05-31 13:44 ` Ville Syrjälä
2013-05-31 15:19 ` Daniel Vetter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130529162449.GT5004@intel.com \
--to=ville.syrjala@linux.intel.com \
--cc=intel-gfx@lists.freedesktop.org \
--cc=paulo.r.zanoni@intel.com \
--cc=przanoni@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.