From mboxrd@z Thu Jan 1 00:00:00 1970 From: Daniel Vetter Subject: Re: [PATCH] drm/i915: Rewrite vlv_find_best_dpll() Date: Tue, 24 Sep 2013 11:23:31 +0200 Message-ID: <20130924092331.GH13668@phenom.ffwll.local> References: <1379959390-2686-1-git-send-email-ville.syrjala@linux.intel.com> Mime-Version: 1.0 Content-Type: text/plain; charset="iso-8859-1" Content-Transfer-Encoding: quoted-printable Return-path: Received: from mail-ee0-f53.google.com (mail-ee0-f53.google.com [74.125.83.53]) by gabe.freedesktop.org (Postfix) with ESMTP id 9A654E63D6 for ; Tue, 24 Sep 2013 02:23:14 -0700 (PDT) Received: by mail-ee0-f53.google.com with SMTP id b15so2355072eek.12 for ; Tue, 24 Sep 2013 02:23:13 -0700 (PDT) Content-Disposition: inline In-Reply-To: <1379959390-2686-1-git-send-email-ville.syrjala@linux.intel.com> List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Sender: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org Errors-To: intel-gfx-bounces+gcfxdi-intel-gfx=m.gmane.org@lists.freedesktop.org To: ville.syrjala@linux.intel.com Cc: intel-gfx@lists.freedesktop.org List-Id: intel-gfx@lists.freedesktop.org On Mon, Sep 23, 2013 at 09:03:10PM +0300, ville.syrjala@linux.intel.com wro= te: > From: Ville Syrj=E4l=E4 > = > vlv_find_best_dpll() has several integer over/underflow issues, > includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of > variables, some slightly weird math, and it doesn't look very > nice either. > = > Rather than try to deal with each issue separately I just decided > to rewrite the function a bit. > = > WARNING: Entirely untested > = > Signed-off-by: Ville Syrj=E4l=E4 > --- > drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++---------------= ------ > 1 file changed, 40 insertions(+), 54 deletions(-) > = > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/= intel_display.c > index 3b06250..f89fb12 100644 > --- a/drivers/gpu/drm/i915/intel_display.c > +++ b/drivers/gpu/drm/i915/intel_display.c > @@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, stru= ct drm_crtc *crtc, > int target, int refclk, intel_clock_t *match_clock, > intel_clock_t *best_clock) > { > - u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2; > - u32 m, n, fastclk; > - u32 updrate, minupdate, p; > - unsigned long bestppm, ppm, absppm; > - int dotclk, flag; > - > - flag =3D 0; > - dotclk =3D target * 1000; > - bestppm =3D 1000000; > - ppm =3D absppm =3D 0; > - fastclk =3D dotclk / (2*100); > - updrate =3D 0; > - minupdate =3D 19200; > - n =3D p =3D p1 =3D p2 =3D m =3D m1 =3D m2 =3D vco =3D bestn =3D 0; > - bestm1 =3D bestm2 =3D bestp1 =3D bestp2 =3D 0; > + intel_clock_t clock =3D { > + .dot =3D target * 5, /* fast clock */ > + }; > + unsigned int bestppm =3D 1000000; > + /* min update 19.2 MHz */ > + int max_n =3D min(limit->n.max, refclk / 19200); > = > /* based on hardware requirement, prefer smaller n to precision */ > - for (n =3D limit->n.min; n <=3D ((refclk) / minupdate); n++) { > - updrate =3D refclk / n; > - for (p1 =3D limit->p1.max; p1 > limit->p1.min; p1--) { > - for (p2 =3D limit->p2.p2_fast+1; p2 > 0; p2--) { > - if (p2 > 10) > - p2 =3D p2 - 1; > - p =3D p1 * p2; > - /* based on hardware requirement, prefer bigger m1,m2 values */ > - for (m1 =3D limit->m1.min; m1 <=3D limit->m1.max; m1++) { > - m2 =3D (((2*(fastclk * p * n / m1 )) + > - refclk) / (2*refclk)); > - m =3D m1 * m2; > - vco =3D updrate * m; > - if (vco >=3D limit->vco.min && vco < limit->vco.max) { > - ppm =3D 1000000 * ((vco / p) - fastclk) / fastclk; > - absppm =3D (ppm > 0) ? ppm : (-ppm); > - if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) { > - bestppm =3D 0; > - flag =3D 1; > - } > - if (absppm < bestppm - 10) { > - bestppm =3D absppm; > - flag =3D 1; > - } > - if (flag) { > - bestn =3D n; > - bestm1 =3D m1; > - bestm2 =3D m2; > - bestp1 =3D p1; > - bestp2 =3D p2; > - flag =3D 0; > - } > - } > - } > + for (clock.n =3D limit->n.min; clock.n <=3D max_n; clock.n++) { > + for (clock.p1 =3D limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) { > + for (clock.p2 =3D limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) { I think that's going to upset the coding style police ;-) I guess it would be simple to extract a vlv_compute_clock like we have for pnv/i9xx that's both used here and in the get_clock code from Jesse. -Daniel > + if (clock.p2 > 10) > + clock.p2--; > + clock.p =3D clock.p1 * clock.p2; > + > + /* based on hardware requirement, prefer bigger m1,m2 values */ > + for (clock.m1 =3D limit->m1.min; clock.m1 <=3D limit->m1.max; clock.m1= ++) { > + unsigned int ppm, diff; > + > + clock.m2 =3D DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n, > + clock.m1 * refclk); > + clock.m =3D clock.m1 * clock.m2; > + > + clock.vco =3D refclk * clock.m / clock.n; > + > + if (clock.vco < limit->vco.min || > + clock.vco >=3D limit->vco.max) > + continue; > + > + diff =3D abs(clock.vco / clock.p - clock.dot); > + ppm =3D div_u64(1000000ULL * diff, clock.dot); > + > + if (ppm < 100 && clock.p > best_clock->p) { > + bestppm =3D 0; > + *best_clock =3D clock; > + } > + > + if (ppm + 10 < bestppm) { > + bestppm =3D ppm; > + *best_clock =3D clock; > } > } > } > - best_clock->n =3D bestn; > - best_clock->m1 =3D bestm1; > - best_clock->m2 =3D bestm2; > - best_clock->p1 =3D bestp1; > - best_clock->p2 =3D bestp2; > + } > + } > = > return true; > } > -- = > 1.8.1.5 > = > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/intel-gfx -- = Daniel Vetter Software Engineer, Intel Corporation +41 (0) 79 365 57 48 - http://blog.ffwll.ch