* [PATCH] drm/i915: Rewrite vlv_find_best_dpll()
@ 2013-09-23 18:03 ville.syrjala
2013-09-24 9:23 ` Daniel Vetter
0 siblings, 1 reply; 3+ messages in thread
From: ville.syrjala @ 2013-09-23 18:03 UTC (permalink / raw)
To: intel-gfx
From: Ville Syrjälä <ville.syrjala@linux.intel.com>
vlv_find_best_dpll() has several integer over/underflow issues,
includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of
variables, some slightly weird math, and it doesn't look very
nice either.
Rather than try to deal with each issue separately I just decided
to rewrite the function a bit.
WARNING: Entirely untested
Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
---
drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++---------------------
1 file changed, 40 insertions(+), 54 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3b06250..f89fb12 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
int target, int refclk, intel_clock_t *match_clock,
intel_clock_t *best_clock)
{
- u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
- u32 m, n, fastclk;
- u32 updrate, minupdate, p;
- unsigned long bestppm, ppm, absppm;
- int dotclk, flag;
-
- flag = 0;
- dotclk = target * 1000;
- bestppm = 1000000;
- ppm = absppm = 0;
- fastclk = dotclk / (2*100);
- updrate = 0;
- minupdate = 19200;
- n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
- bestm1 = bestm2 = bestp1 = bestp2 = 0;
+ intel_clock_t clock = {
+ .dot = target * 5, /* fast clock */
+ };
+ unsigned int bestppm = 1000000;
+ /* min update 19.2 MHz */
+ int max_n = min(limit->n.max, refclk / 19200);
/* based on hardware requirement, prefer smaller n to precision */
- for (n = limit->n.min; n <= ((refclk) / minupdate); n++) {
- updrate = refclk / n;
- for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) {
- for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) {
- if (p2 > 10)
- p2 = p2 - 1;
- p = p1 * p2;
- /* based on hardware requirement, prefer bigger m1,m2 values */
- for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) {
- m2 = (((2*(fastclk * p * n / m1 )) +
- refclk) / (2*refclk));
- m = m1 * m2;
- vco = updrate * m;
- if (vco >= limit->vco.min && vco < limit->vco.max) {
- ppm = 1000000 * ((vco / p) - fastclk) / fastclk;
- absppm = (ppm > 0) ? ppm : (-ppm);
- if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) {
- bestppm = 0;
- flag = 1;
- }
- if (absppm < bestppm - 10) {
- bestppm = absppm;
- flag = 1;
- }
- if (flag) {
- bestn = n;
- bestm1 = m1;
- bestm2 = m2;
- bestp1 = p1;
- bestp2 = p2;
- flag = 0;
- }
- }
- }
+ for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) {
+ for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) {
+ for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) {
+ if (clock.p2 > 10)
+ clock.p2--;
+ clock.p = clock.p1 * clock.p2;
+
+ /* based on hardware requirement, prefer bigger m1,m2 values */
+ for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) {
+ unsigned int ppm, diff;
+
+ clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n,
+ clock.m1 * refclk);
+ clock.m = clock.m1 * clock.m2;
+
+ clock.vco = refclk * clock.m / clock.n;
+
+ if (clock.vco < limit->vco.min ||
+ clock.vco >= limit->vco.max)
+ continue;
+
+ diff = abs(clock.vco / clock.p - clock.dot);
+ ppm = div_u64(1000000ULL * diff, clock.dot);
+
+ if (ppm < 100 && clock.p > best_clock->p) {
+ bestppm = 0;
+ *best_clock = clock;
+ }
+
+ if (ppm + 10 < bestppm) {
+ bestppm = ppm;
+ *best_clock = clock;
}
}
}
- best_clock->n = bestn;
- best_clock->m1 = bestm1;
- best_clock->m2 = bestm2;
- best_clock->p1 = bestp1;
- best_clock->p2 = bestp2;
+ }
+ }
return true;
}
--
1.8.1.5
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] drm/i915: Rewrite vlv_find_best_dpll()
2013-09-23 18:03 [PATCH] drm/i915: Rewrite vlv_find_best_dpll() ville.syrjala
@ 2013-09-24 9:23 ` Daniel Vetter
2013-09-24 11:15 ` Ville Syrjälä
0 siblings, 1 reply; 3+ messages in thread
From: Daniel Vetter @ 2013-09-24 9:23 UTC (permalink / raw)
To: ville.syrjala; +Cc: intel-gfx
On Mon, Sep 23, 2013 at 09:03:10PM +0300, ville.syrjala@linux.intel.com wrote:
> From: Ville Syrjälä <ville.syrjala@linux.intel.com>
>
> vlv_find_best_dpll() has several integer over/underflow issues,
> includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of
> variables, some slightly weird math, and it doesn't look very
> nice either.
>
> Rather than try to deal with each issue separately I just decided
> to rewrite the function a bit.
>
> WARNING: Entirely untested
>
> Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> ---
> drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++---------------------
> 1 file changed, 40 insertions(+), 54 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> index 3b06250..f89fb12 100644
> --- a/drivers/gpu/drm/i915/intel_display.c
> +++ b/drivers/gpu/drm/i915/intel_display.c
> @@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
> int target, int refclk, intel_clock_t *match_clock,
> intel_clock_t *best_clock)
> {
> - u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
> - u32 m, n, fastclk;
> - u32 updrate, minupdate, p;
> - unsigned long bestppm, ppm, absppm;
> - int dotclk, flag;
> -
> - flag = 0;
> - dotclk = target * 1000;
> - bestppm = 1000000;
> - ppm = absppm = 0;
> - fastclk = dotclk / (2*100);
> - updrate = 0;
> - minupdate = 19200;
> - n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
> - bestm1 = bestm2 = bestp1 = bestp2 = 0;
> + intel_clock_t clock = {
> + .dot = target * 5, /* fast clock */
> + };
> + unsigned int bestppm = 1000000;
> + /* min update 19.2 MHz */
> + int max_n = min(limit->n.max, refclk / 19200);
>
> /* based on hardware requirement, prefer smaller n to precision */
> - for (n = limit->n.min; n <= ((refclk) / minupdate); n++) {
> - updrate = refclk / n;
> - for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) {
> - for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) {
> - if (p2 > 10)
> - p2 = p2 - 1;
> - p = p1 * p2;
> - /* based on hardware requirement, prefer bigger m1,m2 values */
> - for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) {
> - m2 = (((2*(fastclk * p * n / m1 )) +
> - refclk) / (2*refclk));
> - m = m1 * m2;
> - vco = updrate * m;
> - if (vco >= limit->vco.min && vco < limit->vco.max) {
> - ppm = 1000000 * ((vco / p) - fastclk) / fastclk;
> - absppm = (ppm > 0) ? ppm : (-ppm);
> - if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) {
> - bestppm = 0;
> - flag = 1;
> - }
> - if (absppm < bestppm - 10) {
> - bestppm = absppm;
> - flag = 1;
> - }
> - if (flag) {
> - bestn = n;
> - bestm1 = m1;
> - bestm2 = m2;
> - bestp1 = p1;
> - bestp2 = p2;
> - flag = 0;
> - }
> - }
> - }
> + for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) {
> + for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) {
> + for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) {
I think that's going to upset the coding style police ;-) I guess it would
be simple to extract a vlv_compute_clock like we have for pnv/i9xx that's
both used here and in the get_clock code from Jesse.
-Daniel
> + if (clock.p2 > 10)
> + clock.p2--;
> + clock.p = clock.p1 * clock.p2;
> +
> + /* based on hardware requirement, prefer bigger m1,m2 values */
> + for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) {
> + unsigned int ppm, diff;
> +
> + clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n,
> + clock.m1 * refclk);
> + clock.m = clock.m1 * clock.m2;
> +
> + clock.vco = refclk * clock.m / clock.n;
> +
> + if (clock.vco < limit->vco.min ||
> + clock.vco >= limit->vco.max)
> + continue;
> +
> + diff = abs(clock.vco / clock.p - clock.dot);
> + ppm = div_u64(1000000ULL * diff, clock.dot);
> +
> + if (ppm < 100 && clock.p > best_clock->p) {
> + bestppm = 0;
> + *best_clock = clock;
> + }
> +
> + if (ppm + 10 < bestppm) {
> + bestppm = ppm;
> + *best_clock = clock;
> }
> }
> }
> - best_clock->n = bestn;
> - best_clock->m1 = bestm1;
> - best_clock->m2 = bestm2;
> - best_clock->p1 = bestp1;
> - best_clock->p2 = bestp2;
> + }
> + }
>
> return true;
> }
> --
> 1.8.1.5
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
+41 (0) 79 365 57 48 - http://blog.ffwll.ch
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [PATCH] drm/i915: Rewrite vlv_find_best_dpll()
2013-09-24 9:23 ` Daniel Vetter
@ 2013-09-24 11:15 ` Ville Syrjälä
0 siblings, 0 replies; 3+ messages in thread
From: Ville Syrjälä @ 2013-09-24 11:15 UTC (permalink / raw)
To: Daniel Vetter; +Cc: intel-gfx
On Tue, Sep 24, 2013 at 11:23:31AM +0200, Daniel Vetter wrote:
> On Mon, Sep 23, 2013 at 09:03:10PM +0300, ville.syrjala@linux.intel.com wrote:
> > From: Ville Syrjälä <ville.syrjala@linux.intel.com>
> >
> > vlv_find_best_dpll() has several integer over/underflow issues,
> > includes a hand rolled DIV_ROUND_CLOSEST(), has a boat load of
> > variables, some slightly weird math, and it doesn't look very
> > nice either.
> >
> > Rather than try to deal with each issue separately I just decided
> > to rewrite the function a bit.
> >
> > WARNING: Entirely untested
> >
> > Signed-off-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
> > ---
> > drivers/gpu/drm/i915/intel_display.c | 94 +++++++++++++++---------------------
> > 1 file changed, 40 insertions(+), 54 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
> > index 3b06250..f89fb12 100644
> > --- a/drivers/gpu/drm/i915/intel_display.c
> > +++ b/drivers/gpu/drm/i915/intel_display.c
> > @@ -670,65 +670,51 @@ vlv_find_best_dpll(const intel_limit_t *limit, struct drm_crtc *crtc,
> > int target, int refclk, intel_clock_t *match_clock,
> > intel_clock_t *best_clock)
> > {
> > - u32 p1, p2, m1, m2, vco, bestn, bestm1, bestm2, bestp1, bestp2;
> > - u32 m, n, fastclk;
> > - u32 updrate, minupdate, p;
> > - unsigned long bestppm, ppm, absppm;
> > - int dotclk, flag;
> > -
> > - flag = 0;
> > - dotclk = target * 1000;
> > - bestppm = 1000000;
> > - ppm = absppm = 0;
> > - fastclk = dotclk / (2*100);
> > - updrate = 0;
> > - minupdate = 19200;
> > - n = p = p1 = p2 = m = m1 = m2 = vco = bestn = 0;
> > - bestm1 = bestm2 = bestp1 = bestp2 = 0;
> > + intel_clock_t clock = {
> > + .dot = target * 5, /* fast clock */
> > + };
> > + unsigned int bestppm = 1000000;
> > + /* min update 19.2 MHz */
> > + int max_n = min(limit->n.max, refclk / 19200);
> >
> > /* based on hardware requirement, prefer smaller n to precision */
> > - for (n = limit->n.min; n <= ((refclk) / minupdate); n++) {
> > - updrate = refclk / n;
> > - for (p1 = limit->p1.max; p1 > limit->p1.min; p1--) {
> > - for (p2 = limit->p2.p2_fast+1; p2 > 0; p2--) {
> > - if (p2 > 10)
> > - p2 = p2 - 1;
> > - p = p1 * p2;
> > - /* based on hardware requirement, prefer bigger m1,m2 values */
> > - for (m1 = limit->m1.min; m1 <= limit->m1.max; m1++) {
> > - m2 = (((2*(fastclk * p * n / m1 )) +
> > - refclk) / (2*refclk));
> > - m = m1 * m2;
> > - vco = updrate * m;
> > - if (vco >= limit->vco.min && vco < limit->vco.max) {
> > - ppm = 1000000 * ((vco / p) - fastclk) / fastclk;
> > - absppm = (ppm > 0) ? ppm : (-ppm);
> > - if (absppm < 100 && ((p1 * p2) > (bestp1 * bestp2))) {
> > - bestppm = 0;
> > - flag = 1;
> > - }
> > - if (absppm < bestppm - 10) {
> > - bestppm = absppm;
> > - flag = 1;
> > - }
> > - if (flag) {
> > - bestn = n;
> > - bestm1 = m1;
> > - bestm2 = m2;
> > - bestp1 = p1;
> > - bestp2 = p2;
> > - flag = 0;
> > - }
> > - }
> > - }
> > + for (clock.n = limit->n.min; clock.n <= max_n; clock.n++) {
> > + for (clock.p1 = limit->p1.max; clock.p1 > limit->p1.min; clock.p1--) {
> > + for (clock.p2 = limit->p2.p2_fast+1; clock.p2 > 0; clock.p2--) {
>
> I think that's going to upset the coding style police ;-)
I suppose. But I think it's a semi-decent way of avoiding deep nesting
in these loops-within-loops situations. Obviosuly if there's any code
other than the internal loop contained in the outer loop, I would not
use it. But I'm not really attached to this approach, so I'm fine with
indenting each loop if that's what people prefer.
BTW now that I look at the code again, I'm wondering why we're checking
'p1 > p1.min' instead of 'p1 >= p1.min'?
> I guess it would
> be simple to extract a vlv_compute_clock like we have for pnv/i9xx that's
> both used here and in the get_clock code from Jesse.
Right. I can do that.
> -Daniel
>
>
> > + if (clock.p2 > 10)
> > + clock.p2--;
> > + clock.p = clock.p1 * clock.p2;
> > +
> > + /* based on hardware requirement, prefer bigger m1,m2 values */
> > + for (clock.m1 = limit->m1.min; clock.m1 <= limit->m1.max; clock.m1++) {
> > + unsigned int ppm, diff;
> > +
> > + clock.m2 = DIV_ROUND_CLOSEST(clock.dot * clock.p * clock.n,
> > + clock.m1 * refclk);
> > + clock.m = clock.m1 * clock.m2;
> > +
> > + clock.vco = refclk * clock.m / clock.n;
> > +
> > + if (clock.vco < limit->vco.min ||
> > + clock.vco >= limit->vco.max)
> > + continue;
> > +
> > + diff = abs(clock.vco / clock.p - clock.dot);
> > + ppm = div_u64(1000000ULL * diff, clock.dot);
> > +
> > + if (ppm < 100 && clock.p > best_clock->p) {
> > + bestppm = 0;
> > + *best_clock = clock;
> > + }
> > +
> > + if (ppm + 10 < bestppm) {
> > + bestppm = ppm;
> > + *best_clock = clock;
> > }
> > }
> > }
> > - best_clock->n = bestn;
> > - best_clock->m1 = bestm1;
> > - best_clock->m2 = bestm2;
> > - best_clock->p1 = bestp1;
> > - best_clock->p2 = bestp2;
> > + }
> > + }
> >
> > return true;
> > }
> > --
> > 1.8.1.5
> >
> > _______________________________________________
> > Intel-gfx mailing list
> > Intel-gfx@lists.freedesktop.org
> > http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch
--
Ville Syrjälä
Intel OTC
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2013-09-24 11:15 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-23 18:03 [PATCH] drm/i915: Rewrite vlv_find_best_dpll() ville.syrjala
2013-09-24 9:23 ` Daniel Vetter
2013-09-24 11:15 ` Ville Syrjälä
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox