linux-pm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] cpuidle: menu: find the typical interval by a heuristic classification method
@ 2025-07-17 10:11 朱恺乾
  2025-07-17 18:23 ` Rafael J. Wysocki
  2025-07-28 10:04 ` Christian Loehle
  0 siblings, 2 replies; 5+ messages in thread
From: 朱恺乾 @ 2025-07-17 10:11 UTC (permalink / raw)
  To: rafael@kernel.org
  Cc: Daniel Lezcano, christian.loehle@arm.com,
	quic_zhonhan@quicinc.com, linux-pm@vger.kernel.org,
	linux-kernel@vger.kernel.org

The iterations of deviation calculation gives too less predictions on
the idle interval by trying to find a single repeating pattern from the
whole history. This is not always the case when the workload is flowing.

This algorithm assumes there're multiple repeating patterns heuristically,
and tries to determine which is the most promising one from the averages
of different idle states. It also takes the occurrence sequence into
consideration, and gives the prediction close to the recent idle.

This increased the shallow idle states detected, but the difference in deep
sleep time didn't change a lot. The performance on my platform, as
expected, has improved.

Before:
Multi-Core Score              7279
Overall    above   under
  34107    0.00    2.75
   8200   59.90    7.02
  29881   57.06    0.00

After:
Multi-Core Score              7365
Overall    above   under
  49913    0.00    6.43
   7881   44.51   18.08
  23108   52.38    0.00

There's another re-classification method, which, instead of looking for the
repeating-interval, tends to find the repeating state. It gives a better result
on performance gain, but may hurt the power consumption.

if (best_state == drv->state_count - 1 || state_avg[best_state] == 0) {
adj_weight[best_state] += weights[i];
adj_avg[best_state] += value;
adj_hit[best_state]++;
} else if (best_diff < state_avg[best_state] * 2) {
adj_weight[best_state] += weights[i];
adj_avg[best_state] += value;
adj_hit[best_state]++;
} else {
adj_weight[best_state + 1] += weights[i];
adj_avg[best_state + 1] += value;
adj_hit[best_state + 1]++;
}

Repeating State:
Multi-Core Score              7421
Overall    above   under
  60857    0.00    8.30
   3838   29.88   18.42
  15318   39.05    0.00


Signed-off-by: Kaiqian Zhu <zhukaiqian@xiaomi.com>
---
 drivers/cpuidle/governors/menu.c | 174 ++++++++++++++++---------------
 1 file changed, 89 insertions(+), 85 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 52d5d26fc7c6..52723ec1a0a6 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -99,109 +99,113 @@ static DEFINE_PER_CPU(struct menu_device, menu_devices);

 static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);

-/*
- * Try detecting repeating patterns by keeping track of the last 8
- * intervals, and checking if the standard deviation of that set
- * of points is below a threshold. If it is... then use the
- * average of these 8 points as the estimated value.
- */
-static unsigned int get_typical_interval(struct menu_device *data)
+static int get_actual_state(struct cpuidle_driver *drv,
+    struct cpuidle_device *dev,
+    int duration_us)
 {
-s64 value, min_thresh = -1, max_thresh = UINT_MAX;
-unsigned int max, min, divisor;
-u64 avg, variance, avg_sq;
-int i;
+int actual;

-again:
-/* Compute the average and variance of past intervals. */
-max = 0;
-min = UINT_MAX;
-avg = 0;
-variance = 0;
-divisor = 0;
-for (i = 0; i < INTERVALS; i++) {
-value = data->intervals[i];
-/*
- * Discard the samples outside the interval between the min and
- * max thresholds.
- */
-if (value <= min_thresh || value >= max_thresh)
-continue;
+for (int i = 0; i < drv->state_count; i++) {
+if (duration_us < drv->states[i].target_residency)
+break;
+
+actual = i;
+}
+
+return actual;
+}
+
+static unsigned int get_typical_interval(struct cpuidle_driver *drv,
+ struct cpuidle_device *dev,
+ struct menu_device *data)
+{
+int cnt = 0;
+
+int state_hit[CPUIDLE_STATE_MAX];
+int state_avg[CPUIDLE_STATE_MAX];
+int adj_weight[CPUIDLE_STATE_MAX];
+int adj_avg[CPUIDLE_STATE_MAX];
+int adj_hit[CPUIDLE_STATE_MAX];
+int hit_thres = max(2, INTERVALS / drv->state_count);
+int weights[INTERVALS] = {5, 3, 2, 1};
+int weight = 0;
+int high_state = -1;

-divisor++;

-avg += value;
-variance += value * value;
+/* Going through the history, and divide them by the actual state */
+for (int i = 0; i < INTERVALS; i++) {
+int actual = get_actual_state(drv, dev, data->intervals[i]);

-if (value > max)
-max = value;
+/* Count the idle states hit in the history */
+state_avg[actual] += data->intervals[i];
+state_hit[actual]++;

-if (value < min)
-min = value;
+cnt++;
 }

-if (!max)
+if (cnt < hit_thres)
 return UINT_MAX;

-if (divisor == INTERVALS) {
-avg >>= INTERVAL_SHIFT;
-variance >>= INTERVAL_SHIFT;
-} else {
-do_div(avg, divisor);
-do_div(variance, divisor);
+/* calculate the average of each state */
+for (int i = 0; i < drv->state_count; i++) {
+if (state_hit[i] > 1)
+state_avg[i] /= state_hit[i];
 }

-avg_sq = avg * avg;
-variance -= avg_sq;
+/* try to re-assign the data points by the closeness */
+for (int i = 0; i < INTERVALS; i++) {
+/* Starting from the recent history */
+int idx = ((data->interval_ptr - i - 1) + INTERVALS) % INTERVALS;
+unsigned int diff;
+unsigned int best_diff = UINT_MAX;
+unsigned int best_state, next_state;
+unsigned int value = data->intervals[idx];
+
+for (int state = 0; state < drv->state_count; state++) {
+diff = abs(state_avg[state] - value);
+if (diff < best_diff) {
+best_diff = diff;
+best_state = state;
+}
+}

-/*
- * The typical interval is obtained when standard deviation is
- * small (stddev <= 20 us, variance <= 400 us^2) or standard
- * deviation is small compared to the average interval (avg >
- * 6*stddev, avg^2 > 36*variance). The average is smaller than
- * UINT_MAX aka U32_MAX, so computing its square does not
- * overflow a u64. We simply reject this candidate average if
- * the standard deviation is greater than 715 s (which is
- * rather unlikely).
- *
- * Use this result only if there is no timer to wake us up sooner.
- */
-if (likely(variance <= U64_MAX/36)) {
-if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) ||
-    variance <= 400)
-return avg;
+if (best_diff < (state_avg[best_state] >> 2)) {
+adj_avg[best_state] += value;
+adj_hit[best_state]++;
+adj_weight[best_state] += weights[i];
+} else if (best_state < drv->state_count - 1) {
+next_state = best_state + 1;
+diff = abs(state_avg[next_state] - value);
+if (diff < (state_avg[next_state] >> 2)) {
+adj_avg[next_state] += value;
+adj_hit[next_state]++;
+adj_weight[next_state] += weights[i];
+}
+}
 }

-/*
- * If there are outliers, discard them by setting thresholds to exclude
- * data points at a large enough distance from the average, then
- * calculate the average and standard deviation again. Once we get
- * down to the last 3/4 of our samples, stop excluding samples.
- *
- * This can deal with workloads that have long pauses interspersed
- * with sporadic activity with a bunch of short pauses.
+/* We've adjusted the hit status by the closeness, if one state is still
+ * hit more often and selected recently, we can assume that state is more
+ * likely to happen in the future
  */
-if (divisor * 4 <= INTERVALS * 3) {
-/*
- * If there are sufficiently many data points still under
- * consideration after the outliers have been eliminated,
- * returning without a prediction would be a mistake because it
- * is likely that the next interval will not exceed the current
- * maximum, so return the latter in that case.
- */
-if (divisor >= INTERVALS / 2)
-return max;
-
-return UINT_MAX;
+for (int state = 0; state < drv->state_count; state++) {
+if (adj_weight[state] > 1 && adj_hit[state] >= hit_thres) {
+adj_avg[state] /= adj_hit[state];
+
+if (adj_weight[state] > weight) {
+weight = adj_weight[state];
+high_state = state;
+} else if (adj_weight[state] == weight) {
+if (adj_hit[state] > adj_hit[high_state])
+high_state = state;
+}
+}
 }

-/* Update the thresholds for the next round. */
-if (avg - min > max - avg)
-min_thresh = min;
-else
-max_thresh = max;
+if (weight)
+return adj_avg[high_state];

-goto again;
+return UINT_MAX;
 }

 /**
@@ -225,7 +229,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 }

 /* Find the shortest expected idle interval. */
-predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
+predicted_ns = get_typical_interval(drv, dev, data) * NSEC_PER_USEC;
 if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
 unsigned int timer_us;

--
2.34.1

#/******本邮件及其附件含有小米公司的保密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本邮件! This e-mail and its attachments contain confidential information from XIAOMI, which is intended only for the person or entity whose address is listed above. Any use of the information contained herein in any way (including, but not limited to, total or partial disclosure, reproduction, or dissemination) by persons other than the intended recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender by phone or email immediately and delete it!******/#

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpuidle: menu: find the typical interval by a heuristic classification method
  2025-07-17 10:11 [PATCH] cpuidle: menu: find the typical interval by a heuristic classification method 朱恺乾
@ 2025-07-17 18:23 ` Rafael J. Wysocki
  2025-07-28 10:04 ` Christian Loehle
  1 sibling, 0 replies; 5+ messages in thread
From: Rafael J. Wysocki @ 2025-07-17 18:23 UTC (permalink / raw)
  To: 朱恺乾
  Cc: rafael@kernel.org, Daniel Lezcano, christian.loehle@arm.com,
	quic_zhonhan@quicinc.com, linux-pm@vger.kernel.org,
	linux-kernel@vger.kernel.org

On Thu, Jul 17, 2025 at 12:12 PM 朱恺乾 <zhukaiqian@xiaomi.com> wrote:
>
> The iterations of deviation calculation gives too less predictions on
> the idle interval by trying to find a single repeating pattern from the
> whole history. This is not always the case when the workload is flowing.
>
> This algorithm assumes there're multiple repeating patterns heuristically,
> and tries to determine which is the most promising one from the averages
> of different idle states. It also takes the occurrence sequence into
> consideration, and gives the prediction close to the recent idle.

Instead of saying "this algorithm", please describe it or give a
reference to a description of it that is not going to vanish from the
Web at one point.

> This increased the shallow idle states detected, but the difference in deep
> sleep time didn't change a lot. The performance on my platform, as
> expected, has improved.
>
> Before:
> Multi-Core Score              7279
> Overall    above   under
>   34107    0.00    2.75
>    8200   59.90    7.02
>   29881   57.06    0.00
>
> After:
> Multi-Core Score              7365
> Overall    above   under
>   49913    0.00    6.43
>    7881   44.51   18.08
>   23108   52.38    0.00

It is unclear what the columns above mean and what's represented by
the numbers, so qualifying the improvement is rather hard.

> There's another re-classification method, which, instead of looking for the
> repeating-interval, tends to find the repeating state. It gives a better result
> on performance gain, but may hurt the power consumption.
>
> if (best_state == drv->state_count - 1 || state_avg[best_state] == 0) {
> adj_weight[best_state] += weights[i];
> adj_avg[best_state] += value;
> adj_hit[best_state]++;
> } else if (best_diff < state_avg[best_state] * 2) {
> adj_weight[best_state] += weights[i];
> adj_avg[best_state] += value;
> adj_hit[best_state]++;
> } else {
> adj_weight[best_state + 1] += weights[i];
> adj_avg[best_state + 1] += value;
> adj_hit[best_state + 1]++;
> }
>
> Repeating State:
> Multi-Core Score              7421
> Overall    above   under
>   60857    0.00    8.30
>    3838   29.88   18.42
>   15318   39.05    0.00

I'm not sure what the above part of the changelog means at all.

> Signed-off-by: Kaiqian Zhu <zhukaiqian@xiaomi.com>
> ---
>  drivers/cpuidle/governors/menu.c | 174 ++++++++++++++++---------------
>  1 file changed, 89 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
> index 52d5d26fc7c6..52723ec1a0a6 100644
> --- a/drivers/cpuidle/governors/menu.c
> +++ b/drivers/cpuidle/governors/menu.c
> @@ -99,109 +99,113 @@ static DEFINE_PER_CPU(struct menu_device, menu_devices);
>
>  static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
>
> -/*
> - * Try detecting repeating patterns by keeping track of the last 8
> - * intervals, and checking if the standard deviation of that set
> - * of points is below a threshold. If it is... then use the
> - * average of these 8 points as the estimated value.
> - */
> -static unsigned int get_typical_interval(struct menu_device *data)
> +static int get_actual_state(struct cpuidle_driver *drv,
> +    struct cpuidle_device *dev,
> +    int duration_us)
>  {
> -s64 value, min_thresh = -1, max_thresh = UINT_MAX;
> -unsigned int max, min, divisor;
> -u64 avg, variance, avg_sq;
> -int i;
> +int actual;
>
> -again:
> -/* Compute the average and variance of past intervals. */
> -max = 0;
> -min = UINT_MAX;
> -avg = 0;
> -variance = 0;
> -divisor = 0;
> -for (i = 0; i < INTERVALS; i++) {
> -value = data->intervals[i];
> -/*
> - * Discard the samples outside the interval between the min and
> - * max thresholds.
> - */
> -if (value <= min_thresh || value >= max_thresh)
> -continue;
> +for (int i = 0; i < drv->state_count; i++) {
> +if (duration_us < drv->states[i].target_residency)
> +break;
> +
> +actual = i;
> +}
> +
> +return actual;
> +}
> +
> +static unsigned int get_typical_interval(struct cpuidle_driver *drv,
> + struct cpuidle_device *dev,
> + struct menu_device *data)
> +{
> +int cnt = 0;
> +
> +int state_hit[CPUIDLE_STATE_MAX];
> +int state_avg[CPUIDLE_STATE_MAX];
> +int adj_weight[CPUIDLE_STATE_MAX];
> +int adj_avg[CPUIDLE_STATE_MAX];
> +int adj_hit[CPUIDLE_STATE_MAX];
> +int hit_thres = max(2, INTERVALS / drv->state_count);
> +int weights[INTERVALS] = {5, 3, 2, 1};
> +int weight = 0;
> +int high_state = -1;
>
> -divisor++;
>
> -avg += value;
> -variance += value * value;
> +/* Going through the history, and divide them by the actual state */
> +for (int i = 0; i < INTERVALS; i++) {
> +int actual = get_actual_state(drv, dev, data->intervals[i]);
>
> -if (value > max)
> -max = value;
> +/* Count the idle states hit in the history */
> +state_avg[actual] += data->intervals[i];
> +state_hit[actual]++;
>
> -if (value < min)
> -min = value;
> +cnt++;
>  }
>
> -if (!max)
> +if (cnt < hit_thres)
>  return UINT_MAX;
>
> -if (divisor == INTERVALS) {
> -avg >>= INTERVAL_SHIFT;
> -variance >>= INTERVAL_SHIFT;
> -} else {
> -do_div(avg, divisor);
> -do_div(variance, divisor);
> +/* calculate the average of each state */
> +for (int i = 0; i < drv->state_count; i++) {
> +if (state_hit[i] > 1)
> +state_avg[i] /= state_hit[i];
>  }
>
> -avg_sq = avg * avg;
> -variance -= avg_sq;
> +/* try to re-assign the data points by the closeness */
> +for (int i = 0; i < INTERVALS; i++) {
> +/* Starting from the recent history */
> +int idx = ((data->interval_ptr - i - 1) + INTERVALS) % INTERVALS;
> +unsigned int diff;
> +unsigned int best_diff = UINT_MAX;
> +unsigned int best_state, next_state;
> +unsigned int value = data->intervals[idx];
> +
> +for (int state = 0; state < drv->state_count; state++) {
> +diff = abs(state_avg[state] - value);
> +if (diff < best_diff) {
> +best_diff = diff;
> +best_state = state;
> +}
> +}
>
> -/*
> - * The typical interval is obtained when standard deviation is
> - * small (stddev <= 20 us, variance <= 400 us^2) or standard
> - * deviation is small compared to the average interval (avg >
> - * 6*stddev, avg^2 > 36*variance). The average is smaller than
> - * UINT_MAX aka U32_MAX, so computing its square does not
> - * overflow a u64. We simply reject this candidate average if
> - * the standard deviation is greater than 715 s (which is
> - * rather unlikely).
> - *
> - * Use this result only if there is no timer to wake us up sooner.
> - */
> -if (likely(variance <= U64_MAX/36)) {
> -if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) ||
> -    variance <= 400)
> -return avg;
> +if (best_diff < (state_avg[best_state] >> 2)) {
> +adj_avg[best_state] += value;
> +adj_hit[best_state]++;
> +adj_weight[best_state] += weights[i];
> +} else if (best_state < drv->state_count - 1) {
> +next_state = best_state + 1;
> +diff = abs(state_avg[next_state] - value);
> +if (diff < (state_avg[next_state] >> 2)) {
> +adj_avg[next_state] += value;
> +adj_hit[next_state]++;
> +adj_weight[next_state] += weights[i];
> +}
> +}
>  }
>
> -/*
> - * If there are outliers, discard them by setting thresholds to exclude
> - * data points at a large enough distance from the average, then
> - * calculate the average and standard deviation again. Once we get
> - * down to the last 3/4 of our samples, stop excluding samples.
> - *
> - * This can deal with workloads that have long pauses interspersed
> - * with sporadic activity with a bunch of short pauses.
> +/* We've adjusted the hit status by the closeness, if one state is still
> + * hit more often and selected recently, we can assume that state is more
> + * likely to happen in the future
>   */
> -if (divisor * 4 <= INTERVALS * 3) {
> -/*
> - * If there are sufficiently many data points still under
> - * consideration after the outliers have been eliminated,
> - * returning without a prediction would be a mistake because it
> - * is likely that the next interval will not exceed the current
> - * maximum, so return the latter in that case.
> - */
> -if (divisor >= INTERVALS / 2)
> -return max;
> -
> -return UINT_MAX;
> +for (int state = 0; state < drv->state_count; state++) {
> +if (adj_weight[state] > 1 && adj_hit[state] >= hit_thres) {
> +adj_avg[state] /= adj_hit[state];
> +
> +if (adj_weight[state] > weight) {
> +weight = adj_weight[state];
> +high_state = state;
> +} else if (adj_weight[state] == weight) {
> +if (adj_hit[state] > adj_hit[high_state])
> +high_state = state;
> +}
> +}
>  }
>
> -/* Update the thresholds for the next round. */
> -if (avg - min > max - avg)
> -min_thresh = min;
> -else
> -max_thresh = max;
> +if (weight)
> +return adj_avg[high_state];
>
> -goto again;
> +return UINT_MAX;
>  }
>
>  /**
> @@ -225,7 +229,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
>  }
>
>  /* Find the shortest expected idle interval. */
> -predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
> +predicted_ns = get_typical_interval(drv, dev, data) * NSEC_PER_USEC;
>  if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
>  unsigned int timer_us;
>
> --

If you want to change the get_typical_interval() algorithm, it needs
to be justified much better than in the changelog of this patch
because you are likely to affect at least some workloads adversely.

Moreover, there are obvious whitespace issues in the patch which need
to be addressed even before it can be reviewed.

Also this absolutely is not 6.17 material, so if you decide to pursue
it, please come back with it after 6.17-rc1 is out.

Thanks!

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpuidle: menu: find the typical interval by a heuristic classification method
@ 2025-07-18 10:52 朱恺乾
  0 siblings, 0 replies; 5+ messages in thread
From: 朱恺乾 @ 2025-07-18 10:52 UTC (permalink / raw)
  To: Rafael J. Wysocki
  Cc: Daniel Lezcano, christian.loehle@arm.com,
	quic_zhonhan@quicinc.com, linux-pm@vger.kernel.org,
	linux-kernel@vger.kernel.org

> >
> > The iterations of deviation calculation gives too less predictions on
> > the idle interval by trying to find a single repeating pattern from the
> > whole history. This is not always the case when the workload is flowing.
> >
> > This algorithm assumes there're multiple repeating patterns heuristically,
> > and tries to determine which is the most promising one from the averages
> > of different idle states. It also takes the occurrence sequence into
> > consideration, and gives the prediction close to the recent idle.
>
> Instead of saying "this algorithm", please describe it or give a
> reference to a description of it that is not going to vanish from the
> Web at one point.
>

It could be taken as the K-means but of 2 iterations only. The group number K
here is defined as the number of idle states.

In the first iteration, instead of creating the groups randomly, all the history
data are divided by the deepest state possible for the actual sleep time. Then
the average sleep time of each state is calculated.

In the second iteration, it tries to re-assign each of the history point by the
closeness. It's the difference between the actual sleep time and the averages.
By comparing the outputs, the state with the smallest closeness value absorbs
the history data. After iterating through all the history points, a new average
value is calculated for each state.

For example, assume there're 3 idle states, and the residency of each state is
(1, 1000, 3000),
the idle history collected are
(100, 800, 1100, 110, 150, 1200, 1500, 3500) by the time order.

After the 1st iteration, they're divided into
(100, 110, 150, 800), (1100, 1200, 1500), (3500),
and the averages are (290, 1266, 3500).

In the 2nd interation, since the history point 800 is more close to the average of
the 2nd state, 510 vs. 466. It would be re-assigned, and the new groups would look
like
(100, 110, 150), (800, 1100, 1200, 1500), (3500),
and the averages would be (120, 1150, 3500) finally.

To choose from the different groups, weights are added to the data by how far they
occur in the history. The most recent idle has the highest weight, while the oldest
data has the smallest. The state having the highest weight accumalted would be
selected.

In the code, it only weights the recent four data points by (5, 3, 2, 1), so it can respect
to the most recent idle history.

For the example above, since the total weight of the 1st state is 6, and the weight of
the 2nd is 5. The next interval predicted would be 120.

If the weights are same, it will also count the number of data in each state, and select
the average of the most common.

Let's change the history series a little bit,
(100, 800, 1100, 3500, 110, 150, 1200, 1500)

Then the weight of the 1st and 2nd are same now, since the 2nd state has 1 more data
point than the 1st state. The next interval predicted this time would be 1150.

This is the general idea of the algorithm, but in the implementation, data are not simply
re-assigned by the closeness. First because the residency gap between the states are
usually not even. Second, some data points can be too far away to the average, including
them will pollute the outcome, leading to an in-accurate prediction.

So, the method to re-assigned the data points could be platform specific, and there're
some other parameter could be tuned for the algorithm, like the history length and the hit
threshold.

> > This increased the shallow idle states detected, but the difference in deep
> > sleep time didn't change a lot. The performance on my platform, as
> > expected, has improved.
> >
> > Before:
> > Multi-Core Score              7279
> > Overall    above   under
> >   34107    0.00    2.75
> >    8200   59.90    7.02
> >   29881   57.06    0.00
> >
> > After:
> > Multi-Core Score              7365
> > Overall    above   under
> >   49913    0.00    6.43
> >    7881   44.51   18.08
> >   23108   52.38    0.00
>
> It is unclear what the columns above mean and what's represented by
> the numbers, so qualifying the improvement is rather hard.
>

The "Multi-Core Score" is the outcome by running the geekbench multi-core test.
For the table, "Overall" is the sum of all the idle happened on the CPUs, "above"
and "under" are the idle statistics collected by kernel of the same name presented
in percentage. Each row stands for an idle state. So, on my platform, there're 3 idle
states, and the shallowest idle is chosen more often after applying the algorithm,
but the underestimated number also increased by 3.7%

> > There's another re-classification method, which, instead of looking for the
> > repeating-interval, tends to find the repeating state. It gives a better result
> > on performance gain, but may hurt the power consumption.
> >
> > if (best_state == drv->state_count - 1 || state_avg[best_state] == 0) {
> > adj_weight[best_state] += weights[i];
> > adj_avg[best_state] += value;
> > adj_hit[best_state]++;
> > } else if (best_diff < state_avg[best_state] * 2) {
> > adj_weight[best_state] += weights[i];
> > adj_avg[best_state] += value;
> > adj_hit[best_state]++;
> > } else {
> > adj_weight[best_state + 1] += weights[i];
> > adj_avg[best_state + 1] += value;
> > adj_hit[best_state + 1]++;
> > }
> >
> > Repeating State:
> > Multi-Core Score              7421
> > Overall    above   under
> >   60857    0.00    8.30
> >    3838   29.88   18.42
> >   15318   39.05    0.00
>
> I'm not sure what the above part of the changelog means at all.
>
> > Signed-off-by: Kaiqian Zhu <zhukaiqian@xiaomi.com>
> > ---
> >  drivers/cpuidle/governors/menu.c | 174 ++++++++++++++++---------------
> >  1 file changed, 89 insertions(+), 85 deletions(-)
> >
> > diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
> > index 52d5d26fc7c6..52723ec1a0a6 100644
> > --- a/drivers/cpuidle/governors/menu.c
> > +++ b/drivers/cpuidle/governors/menu.c
> > @@ -99,109 +99,113 @@ static DEFINE_PER_CPU(struct menu_device, menu_devices);
> >
> >  static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
> >
> > -/*
> > - * Try detecting repeating patterns by keeping track of the last 8
> > - * intervals, and checking if the standard deviation of that set
> > - * of points is below a threshold. If it is... then use the
> > - * average of these 8 points as the estimated value.
> > - */
> > -static unsigned int get_typical_interval(struct menu_device *data)
> > +static int get_actual_state(struct cpuidle_driver *drv,
> > +    struct cpuidle_device *dev,
> > +    int duration_us)
> >  {
> > -s64 value, min_thresh = -1, max_thresh = UINT_MAX;
> > -unsigned int max, min, divisor;
> > -u64 avg, variance, avg_sq;
> > -int i;
> > +int actual;
> >
> > -again:
> > -/* Compute the average and variance of past intervals. */
> > -max = 0;
> > -min = UINT_MAX;
> > -avg = 0;
> > -variance = 0;
> > -divisor = 0;
> > -for (i = 0; i < INTERVALS; i++) {
> > -value = data->intervals[i];
> > -/*
> > - * Discard the samples outside the interval between the min and
> > - * max thresholds.
> > - */
> > -if (value <= min_thresh || value >= max_thresh)
> > -continue;
> > +for (int i = 0; i < drv->state_count; i++) {
> > +if (duration_us < drv->states[i].target_residency)
> > +break;
> > +
> > +actual = i;
> > +}
> > +
> > +return actual;
> > +}
> > +
> > +static unsigned int get_typical_interval(struct cpuidle_driver *drv,
> > + struct cpuidle_device *dev,
> > + struct menu_device *data)
> > +{
> > +int cnt = 0;
> > +
> > +int state_hit[CPUIDLE_STATE_MAX];
> > +int state_avg[CPUIDLE_STATE_MAX];
> > +int adj_weight[CPUIDLE_STATE_MAX];
> > +int adj_avg[CPUIDLE_STATE_MAX];
> > +int adj_hit[CPUIDLE_STATE_MAX];
> > +int hit_thres = max(2, INTERVALS / drv->state_count);
> > +int weights[INTERVALS] = {5, 3, 2, 1};
> > +int weight = 0;
> > +int high_state = -1;
> >
> > -divisor++;
> >
> > -avg += value;
> > -variance += value * value;
> > +/* Going through the history, and divide them by the actual state */
> > +for (int i = 0; i < INTERVALS; i++) {
> > +int actual = get_actual_state(drv, dev, data->intervals[i]);
> >
> > -if (value > max)
> > -max = value;
> > +/* Count the idle states hit in the history */
> > +state_avg[actual] += data->intervals[i];
> > +state_hit[actual]++;
> >
> > -if (value < min)
> > -min = value;
> > +cnt++;
> >  }
> >
> > -if (!max)
> > +if (cnt < hit_thres)
> >  return UINT_MAX;
> >
> > -if (divisor == INTERVALS) {
> > -avg >>= INTERVAL_SHIFT;
> > -variance >>= INTERVAL_SHIFT;
> > -} else {
> > -do_div(avg, divisor);
> > -do_div(variance, divisor);
> > +/* calculate the average of each state */
> > +for (int i = 0; i < drv->state_count; i++) {
> > +if (state_hit[i] > 1)
> > +state_avg[i] /= state_hit[i];
> >  }
> >
> > -avg_sq = avg * avg;
> > -variance -= avg_sq;
> > +/* try to re-assign the data points by the closeness */
> > +for (int i = 0; i < INTERVALS; i++) {
> > +/* Starting from the recent history */
> > +int idx = ((data->interval_ptr - i - 1) + INTERVALS) % INTERVALS;
> > +unsigned int diff;
> > +unsigned int best_diff = UINT_MAX;
> > +unsigned int best_state, next_state;
> > +unsigned int value = data->intervals[idx];
> > +
> > +for (int state = 0; state < drv->state_count; state++) {
> > +diff = abs(state_avg[state] - value);
> > +if (diff < best_diff) {
> > +best_diff = diff;
> > +best_state = state;
> > +}
> > +}
> >
> > -/*
> > - * The typical interval is obtained when standard deviation is
> > - * small (stddev <= 20 us, variance <= 400 us^2) or standard
> > - * deviation is small compared to the average interval (avg >
> > - * 6*stddev, avg^2 > 36*variance). The average is smaller than
> > - * UINT_MAX aka U32_MAX, so computing its square does not
> > - * overflow a u64. We simply reject this candidate average if
> > - * the standard deviation is greater than 715 s (which is
> > - * rather unlikely).
> > - *
> > - * Use this result only if there is no timer to wake us up sooner.
> > - */
> > -if (likely(variance <= U64_MAX/36)) {
> > -if ((avg_sq > variance * 36 && divisor * 4 >= INTERVALS * 3) ||
> > -    variance <= 400)
> > -return avg;
> > +if (best_diff < (state_avg[best_state] >> 2)) {
> > +adj_avg[best_state] += value;
> > +adj_hit[best_state]++;
> > +adj_weight[best_state] += weights[i];
> > +} else if (best_state < drv->state_count - 1) {
> > +next_state = best_state + 1;
> > +diff = abs(state_avg[next_state] - value);
> > +if (diff < (state_avg[next_state] >> 2)) {
> > +adj_avg[next_state] += value;
> > +adj_hit[next_state]++;
> > +adj_weight[next_state] += weights[i];
> > +}
> > +}
> >  }
> >
> > -/*
> > - * If there are outliers, discard them by setting thresholds to exclude
> > - * data points at a large enough distance from the average, then
> > - * calculate the average and standard deviation again. Once we get
> > - * down to the last 3/4 of our samples, stop excluding samples.
> > - *
> > - * This can deal with workloads that have long pauses interspersed
> > - * with sporadic activity with a bunch of short pauses.
> > +/* We've adjusted the hit status by the closeness, if one state is still
> > + * hit more often and selected recently, we can assume that state is more
> > + * likely to happen in the future
> >   */
> > -if (divisor * 4 <= INTERVALS * 3) {
> > -/*
> > - * If there are sufficiently many data points still under
> > - * consideration after the outliers have been eliminated,
> > - * returning without a prediction would be a mistake because it
> > - * is likely that the next interval will not exceed the current
> > - * maximum, so return the latter in that case.
> > - */
> > -if (divisor >= INTERVALS / 2)
> > -return max;
> > -
> > -return UINT_MAX;
> > +for (int state = 0; state < drv->state_count; state++) {
> > +if (adj_weight[state] > 1 && adj_hit[state] >= hit_thres) {
> > +adj_avg[state] /= adj_hit[state];
> > +
> > +if (adj_weight[state] > weight) {
> > +weight = adj_weight[state];
> > +high_state = state;
> > +} else if (adj_weight[state] == weight) {
> > +if (adj_hit[state] > adj_hit[high_state])
> > +high_state = state;
> > +}
> > +}
> >  }
> >
> > -/* Update the thresholds for the next round. */
> > -if (avg - min > max - avg)
> > -min_thresh = min;
> > -else
> > -max_thresh = max;
> > +if (weight)
> > +return adj_avg[high_state];
> >
> > -goto again;
> > +return UINT_MAX;
> >  }
> >
> >  /**
> > @@ -225,7 +229,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> >  }
> >
> >  /* Find the shortest expected idle interval. */
> > -predicted_ns = get_typical_interval(data) * NSEC_PER_USEC;
> > +predicted_ns = get_typical_interval(drv, dev, data) * NSEC_PER_USEC;
> >  if (predicted_ns > RESIDENCY_THRESHOLD_NS) {
> >  unsigned int timer_us;
> >
> > --
>
> If you want to change the get_typical_interval() algorithm, it needs
> to be justified much better than in the changelog of this patch
> because you are likely to affect at least some workloads adversely.
>

Yes, but I'm not sure which would be affected. Here I tested with benchmarks,
some user applications, like games, and workloads like fio. Do you have
some suggestion?

> Moreover, there are obvious whitespace issues in the patch which need
> to be addressed even before it can be reviewed.
>
> Also this absolutely is not 6.17 material, so if you decide to pursue
> it, please come back with it after 6.17-rc1 is out.
>

I know it could be tough to change an algorithm like this. There're too many arches,
and workload types. So, the initiative is to share the idea. If you think it worth a try,
I will.

> Thanks!
>
#/******本邮件及其附件含有小米公司的保密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本邮件! This e-mail and its attachments contain confidential information from XIAOMI, which is intended only for the person or entity whose address is listed above. Any use of the information contained herein in any way (including, but not limited to, total or partial disclosure, reproduction, or dissemination) by persons other than the intended recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender by phone or email immediately and delete it!******/#

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] cpuidle: menu: find the typical interval by a heuristic classification method
  2025-07-17 10:11 [PATCH] cpuidle: menu: find the typical interval by a heuristic classification method 朱恺乾
  2025-07-17 18:23 ` Rafael J. Wysocki
@ 2025-07-28 10:04 ` Christian Loehle
  2025-08-18  9:09   ` [External Mail]Re: " 朱恺乾
  1 sibling, 1 reply; 5+ messages in thread
From: Christian Loehle @ 2025-07-28 10:04 UTC (permalink / raw)
  To: 朱恺乾, rafael@kernel.org
  Cc: Daniel Lezcano, quic_zhonhan@quicinc.com,
	linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org

On 7/17/25 11:11, 朱恺乾 wrote:
> The iterations of deviation calculation gives too less predictions on
> the idle interval by trying to find a single repeating pattern from the
> whole history. This is not always the case when the workload is flowing.
> 
> This algorithm assumes there're multiple repeating patterns heuristically,
> and tries to determine which is the most promising one from the averages
> of different idle states. It also takes the occurrence sequence into
> consideration, and gives the prediction close to the recent idle.
> 
> This increased the shallow idle states detected, but the difference in deep
> sleep time didn't change a lot. The performance on my platform, as
> expected, has improved.
> 
> Before:
> Multi-Core Score              7279
> Overall    above   under
>   34107    0.00    2.75
>    8200   59.90    7.02
>   29881   57.06    0.00
> 
> After:
> Multi-Core Score              7365
> Overall    above   under
>   49913    0.00    6.43
>    7881   44.51   18.08
>   23108   52.38    0.00
> 
> There's another re-classification method, which, instead of looking for the
> repeating-interval, tends to find the repeating state. It gives a better result
> on performance gain, but may hurt the power consumption.
> 
> if (best_state == drv->state_count - 1 || state_avg[best_state] == 0) {
> adj_weight[best_state] += weights[i];
> adj_avg[best_state] += value;
> adj_hit[best_state]++;
> } else if (best_diff < state_avg[best_state] * 2) {
> adj_weight[best_state] += weights[i];
> adj_avg[best_state] += value;
> adj_hit[best_state]++;
> } else {
> adj_weight[best_state + 1] += weights[i];
> adj_avg[best_state + 1] += value;
> adj_hit[best_state + 1]++;
> }
> 
> Repeating State:
> Multi-Core Score              7421
> Overall    above   under
>   60857    0.00    8.30
>    3838   29.88   18.42
>   15318   39.05    0.00
> 
> 

Would be nice to have statistical significance on those (I'm assuming Geekbench)
scores and a description of the systems idle state and maybe a comparison to
shallowest (only WFI enabled) and deepest (only max idle state enabled).


^ permalink raw reply	[flat|nested] 5+ messages in thread

* RE: [External Mail]Re: [PATCH] cpuidle: menu: find the typical interval by a heuristic classification method
  2025-07-28 10:04 ` Christian Loehle
@ 2025-08-18  9:09   ` 朱恺乾
  0 siblings, 0 replies; 5+ messages in thread
From: 朱恺乾 @ 2025-08-18  9:09 UTC (permalink / raw)
  To: Christian Loehle, rafael@kernel.org
  Cc: Daniel Lezcano, quic_zhonhan@quicinc.com,
	linux-pm@vger.kernel.org, linux-kernel@vger.kernel.org

> Would be nice to have statistical significance on those (I'm assuming Geekbench)
> scores and a description of the systems idle state and maybe a comparison to
> shallowest (only WFI enabled) and deepest (only max idle state enabled).
Here's an overview of the geekbench score and the power consumption.
The results below are the average of three consecutive tests. In
general, menu costs the least power, but the performance also is the
worst.

           |      dvfs on       |       dvfs off
           |  gkb    cpu   sply |   gkb    cpu   sply
menu v6.6  | 7469   2326   5490 |  8179   2554   5611
menu v6.17 | 7690   2363   5495 |  8414   2590   5844
proposed   | 7679   2363   5525 |  8500   2587   5814
teo        | 7877   2420   5563 |  8648   2594   5914
deepest    | 7246               |  7954
wfi        | 7955               |  8663

By compairing the states selected, we can see the governor is selecting
too much deep idle states, and most of them are above the ideal state.
It can waste too much time & power on entering/exiting the deep state.

selected    state0   state1   state2
menu v6.6   136342    12255    48784
menu v6.17  151045     8317    25435
proposed    160869     5934    23462
teo         207094    12280    12011

above       state0   state1   state2
menu v6.6        0     8006    30737
menu v6.17       0     3494    10489
proposed         0     2221     8700
teo              0     4606     3344

The situation is much better in v6.17. Less deep states are selected, and better
the performance becomes. A more pleasing result is the overall idle time as
well as the deep idle time doesn't change. Which means the power efficiency of
the governor has increased.

Below is the percentage of idle time and each of the states for the different core
types in the system. Since they're all running the same benchmark,
we can assume the total time spent is identical across the tests.

From it, we can see that TEO tends to choose more shallow states, while all
the variants of menu governors will keep the CPUs sleep deep. The proposed
here is an update of the patch uploaded, as I see the change in v6.17 allows
the governor to use more power. This updated version behaves similar with
v6.17 menu in the benchmark score and power consumption, but if we look
at the states selected and the idle time, it could be a little bit shallower and
more accurate on the deep states than the v6.17.

type1         idle   state0   state1   state2
menu v6.6   50.69%   24.04%    1.09%   25.55%
menu v6.17  55.28%   29.99%    1.06%   24.22%
proposed    55.72%   30.70%    0.55%   24.46%
teo         50.68%   25.98%   12.34%   12.36%

type2         idle   state0   state1   state2
menu v6.6   71.08%    1.84%    2.61%   66.62%
menu v6.17  73.05%    3.43%    1.88%   67.74%
proposed    73.25%    4.89%    1.99%   66.36%
teo         71.93%   10.54%    8.68%   52.70%

type3         idle   state0   state1   state2
menu v6.6   78.78%    0.18%    0.44%   78.16%
menu v6.17  78.60%    0.71%    0.56%   77.32%
proposed    78.89%    0.99%    0.42%   77.48%
teo         78.45%    1.60%   10.15%   66.69%

type4         idle   state0   state1   state2
menu v6.6   76.26%    0.04%    0.50%   75.72%
menu v6.17  75.76%    0.33%    0.83%   74.61%
proposed    75.91%    0.54%    0.51%   74.85%
teo         74.90%    1.13%   13.82%   59.94%

About the idle states, the difference of the power consumption between the
states on type1 and type2 are negligible. As to type3 and type4, the largest
drop on power is seen between state0 and state1.

#/******本邮件及其附件含有小米公司的保密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本邮件! This e-mail and its attachments contain confidential information from XIAOMI, which is intended only for the person or entity whose address is listed above. Any use of the information contained herein in any way (including, but not limited to, total or partial disclosure, reproduction, or dissemination) by persons other than the intended recipient(s) is prohibited. If you receive this e-mail in error, please notify the sender by phone or email immediately and delete it!******/#

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2025-08-18  9:09 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-07-17 10:11 [PATCH] cpuidle: menu: find the typical interval by a heuristic classification method 朱恺乾
2025-07-17 18:23 ` Rafael J. Wysocki
2025-07-28 10:04 ` Christian Loehle
2025-08-18  9:09   ` [External Mail]Re: " 朱恺乾
  -- strict thread matches above, loose matches on Subject: below --
2025-07-18 10:52 朱恺乾

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).