* [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
@ 2017-08-10 13:06 Katarzyna Dec
2017-08-10 13:20 ` Chris Wilson
2017-08-10 13:29 ` ✗ Fi.CI.BAT: failure for " Patchwork
0 siblings, 2 replies; 15+ messages in thread
From: Katarzyna Dec @ 2017-08-10 13:06 UTC (permalink / raw)
To: intel-gfx
In addition to checking whether the frequency is in correct range
for certain scenario, we can also verify whether PM interrupts are
masked correctly. Few test cases were extended with such checks.
While I'm here, let'a apply some minor coding style cleanup to the rest
of the code. We can also become DRM master, to make sure that the test
isn't ran along Xorg (which will interfere with our mesurements)
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Jeff Mcgee <jeff.mcgee@intel.com>
Cc: Radoslaw Szwichtenberg <radoslaw.szwichtenberg@intel.com>
Signed-off-by: Katarzyna Dec <katarzyna.dec@intel.com>
---
tests/pm_rps.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 83 insertions(+), 9 deletions(-)
diff --git a/tests/pm_rps.c b/tests/pm_rps.c
index f0455e78..a4d525f0 100644
--- a/tests/pm_rps.c
+++ b/tests/pm_rps.c
@@ -23,6 +23,7 @@
* Authors:
* Ben Widawsky <ben@bwidawsk.net>
* Jeff McGee <jeff.mcgee@intel.com>
+ * Katarzyna Dec <katarzyna.dec@intel.com>
*
*/
@@ -39,6 +40,7 @@
#include <sys/wait.h>
#include "intel_bufmgr.h"
+#include "i915_reg.h"
static int drm_fd;
@@ -50,6 +52,7 @@ enum {
RP0,
RP1,
RPn,
+ BOOST,
NUMFREQ
};
@@ -60,7 +63,9 @@ struct junk {
const char *mode;
FILE *filp;
} stuff[] = {
- { "cur", "r", NULL }, { "min", "rb+", NULL }, { "max", "rb+", NULL }, { "RP0", "r", NULL }, { "RP1", "r", NULL }, { "RPn", "r", NULL }, { NULL, NULL, NULL }
+ { "cur", "r", NULL }, { "min", "rb+", NULL }, { "max", "rb+", NULL },
+ { "RP0", "r", NULL }, { "RP1", "r", NULL }, { "RPn", "r", NULL },
+ { "boost", "r", NULL }, { NULL, NULL, NULL }
};
static int readval(FILE *filp)
@@ -276,13 +281,14 @@ static void load_helper_run(enum load load)
emit_store_dword_imm(val);
intel_batchbuffer_flush_on_ring(lh.batch,
- I915_EXEC_BLT);
+ I915_EXEC_BLT);
val++;
gem_execbuf(drm_fd, &execbuf);
/* Lower the load by pausing after every submitted
- * write. */
+ * write.
+ */
if (lh.load == LOW)
usleep(LOAD_HELPER_PAUSE_USEC);
}
@@ -312,7 +318,8 @@ static void load_helper_init(void)
/* MI_STORE_DATA can only use GTT address on gen4+/g33 and needs
* snoopable mem on pre-gen6. Hence load-helper only works on gen6+, but
- * that's also all we care about for the rps testcase*/
+ * that's also all we care about for the rps testcase
+ */
igt_assert(intel_gen(lh.devid) >= 6);
lh.bufmgr = drm_intel_bufmgr_gem_init(drm_fd, 4096);
igt_assert(lh.bufmgr);
@@ -384,6 +391,54 @@ static int get_hw_rounded_freq(int target)
return ret;
}
+/* Checks if GPU is idle reading debugfs */
+static bool is_in_idle(void)
+{
+ char buf[1024];
+
+ igt_debugfs_read(drm_fd, "i915_runtime_pm_status", buf);
+ return strstr(buf, "GPU idle: yes\n");
+}
+
+#define UP_MASK (1<<5)
+#define DOWN_MASK (1<<4)
+/** Checks PMINTRMSK register for changes in up mask and down mask bits.
+ * Depending on current frequency down and up interrupts can be enabled or
+ * disabled.
+ * e.g. if current frequency is at MIN we expect down interrupts should be
+ * disabled.
+ * There is a special case when both down and up interrupts should be disabled
+ * - GPU is idle or MIN and MAX frequencies are equal.
+ */
+static void check_pmintrmsk(int *freqs)
+{
+ uint32_t pmintrmsk;
+ bool down_disabled, up_disabled;
+
+ read_freqs(freqs);
+ dump(freqs);
+ pmintrmsk = intel_register_read(GEN6_PMINTRMSK);
+
+ up_disabled = pmintrmsk & UP_MASK;
+ down_disabled = pmintrmsk & DOWN_MASK;
+
+ if ((freqs[CUR] == freqs[MIN] && freqs[CUR] == freqs[MAX])
+ || is_in_idle()) {
+ igt_assert_eq(up_disabled, 1);
+ igt_assert_eq(down_disabled, 1);
+ } else if (freqs[CUR] == freqs[MAX] || freqs[CUR] == freqs[BOOST]) {
+ igt_assert_eq(up_disabled, 1);
+ igt_assert_eq(down_disabled, 0);
+ } else if (freqs[CUR] == freqs[MIN]) {
+ igt_assert_eq(up_disabled, 0);
+ igt_assert_eq(down_disabled, 1);
+ } else {
+ igt_assert_eq(up_disabled, 0);
+ igt_assert_eq(down_disabled, 0);
+ }
+
+}
+
static void min_max_config(void (*check)(void), bool load_gpu)
{
int fmid = (origfreqs[RPn] + origfreqs[RP0]) / 2;
@@ -481,6 +536,7 @@ static void basic_check(void)
read_freqs(freqs);
dump(freqs);
checkit(freqs);
+ check_pmintrmsk(freqs);
}
#define IDLE_WAIT_TIMESTEP_MSEC 250
@@ -491,7 +547,8 @@ static void idle_check(void)
int wait = 0;
/* Monitor frequencies until cur settles down to min, which should
- * happen within the allotted time */
+ * happen within the allotted time
+ */
do {
read_freqs(freqs);
dump(freqs);
@@ -502,6 +559,7 @@ static void idle_check(void)
wait += IDLE_WAIT_TIMESTEP_MSEC;
} while (wait < IDLE_WAIT_TIMEOUT_MSEC);
+ check_pmintrmsk(freqs);
igt_assert_eq(freqs[CUR], freqs[RPn]);
igt_debug("Required %d msec to reach cur=idle\n", wait);
}
@@ -514,7 +572,8 @@ static void loaded_check(void)
int wait = 0;
/* Monitor frequencies until cur increases to max, which should
- * happen within the allotted time */
+ * happen within the allotted time
+ */
do {
read_freqs(freqs);
dump(freqs);
@@ -525,6 +584,7 @@ static void loaded_check(void)
wait += LOADED_WAIT_TIMESTEP_MSEC;
} while (wait < LOADED_WAIT_TIMEOUT_MSEC);
+ check_pmintrmsk(freqs);
igt_assert_lte(freqs[MAX], freqs[CUR]);
igt_debug("Required %d msec to reach cur=max\n", wait);
}
@@ -556,6 +616,7 @@ static void stabilize_check(int *out)
static void reset_gpu(void)
{
int fd = drm_open_driver(DRIVER_INTEL);
+
igt_post_hang_ring(fd, igt_hang_ring(fd, I915_EXEC_DEFAULT));
close(fd);
}
@@ -566,6 +627,7 @@ static void boost_freq(int fd, int *boost_freqs)
int ring = -1;
igt_spin_t *load;
+ igt_debug("Trying to boost freq\n");
load = igt_spin_batch_new(fd, ring, 0);
/* Waiting will grant us a boost to maximum */
@@ -573,6 +635,7 @@ static void boost_freq(int fd, int *boost_freqs)
read_freqs(boost_freqs);
dump(boost_freqs);
+ check_pmintrmsk(boost_freqs);
igt_spin_batch_free(fd, load);
}
@@ -639,15 +702,23 @@ igt_main
const int device = drm_get_card();
struct junk *junk = stuff;
int ret;
+ struct pci_device *pci_dev;
- /* Use drm_open_driver to verify device existence */
- drm_fd = drm_open_driver(DRIVER_INTEL);
+ /* Use drm_open_driver_master to force running tests as drm
+ * master
+ */
+ drm_fd = drm_open_driver_master(DRIVER_INTEL);
igt_require_gem(drm_fd);
igt_require(gem_can_store_dword(drm_fd, 0));
+ pci_dev = intel_get_pci_device();
+ igt_require(pci_dev);
+ intel_register_access_init(pci_dev, 0, drm_fd);
+
do {
int val = -1;
char *path;
+
ret = asprintf(&path, sysfs_base_path, device, junk->name);
igt_assert(ret != -1);
junk->filp = fopen(path, junk->mode);
@@ -657,7 +728,7 @@ igt_main
val = readval(junk->filp);
igt_assert(val >= 0);
junk++;
- } while(junk->name != NULL);
+ } while (junk->name != NULL);
read_freqs(origfreqs);
@@ -684,4 +755,7 @@ igt_main
igt_subtest("reset")
waitboost(true);
+ igt_fixture {
+ intel_register_access_fini();
+ }
}
--
2.13.4
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-10 13:06 [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value Katarzyna Dec
@ 2017-08-10 13:20 ` Chris Wilson
2017-08-11 8:00 ` Daniel Vetter
2017-08-10 13:29 ` ✗ Fi.CI.BAT: failure for " Patchwork
1 sibling, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-08-10 13:20 UTC (permalink / raw)
To: Katarzyna Dec, intel-gfx
Quoting Katarzyna Dec (2017-08-10 14:06:15)
> In addition to checking whether the frequency is in correct range
> for certain scenario, we can also verify whether PM interrupts are
> masked correctly.
What does correctly mean? It was a recommendation not a requirement.
You are checking internal kernel implementation details, just add the
check to our runtime suspend that we have applied the mask (which is a
reasonable check as the rps idling is decoupled from the rps suspend).
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* ✗ Fi.CI.BAT: failure for pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-10 13:06 [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value Katarzyna Dec
2017-08-10 13:20 ` Chris Wilson
@ 2017-08-10 13:29 ` Patchwork
1 sibling, 0 replies; 15+ messages in thread
From: Patchwork @ 2017-08-10 13:29 UTC (permalink / raw)
To: Katarzyna Dec; +Cc: intel-gfx
== Series Details ==
Series: pm_rps: Extended testcases with checking PMINTRMSK register value
URL : https://patchwork.freedesktop.org/series/28625/
State : failure
== Summary ==
IGT patchset tested on top of latest successful build
c129026622accef6f54c0cfb0dc55e930cfa60b5 igt: add syncobj_basic.
with latest DRM-Tip kernel build CI_DRM_2944
0f3edcc5d407 drm-tip: 2017y-08m-10d-11h-21m-35s UTC integration manifest
Test gem_exec_parallel:
Subgroup basic:
pass -> FAIL (fi-ilk-650) fdo#101735
Test kms_cursor_legacy:
Subgroup basic-busy-flip-before-cursor-atomic:
fail -> PASS (fi-snb-2600) fdo#100215
Test kms_flip:
Subgroup basic-flip-vs-modeset:
skip -> PASS (fi-skl-x1585l) fdo#101781
Test kms_pipe_crc_basic:
Subgroup suspend-read-crc-pipe-b:
dmesg-warn -> PASS (fi-byt-j1900) fdo#101705
Test pm_rps:
Subgroup basic-api:
pass -> FAIL (fi-byt-j1900)
fdo#101735 https://bugs.freedesktop.org/show_bug.cgi?id=101735
fdo#100215 https://bugs.freedesktop.org/show_bug.cgi?id=100215
fdo#101781 https://bugs.freedesktop.org/show_bug.cgi?id=101781
fdo#101705 https://bugs.freedesktop.org/show_bug.cgi?id=101705
fi-bdw-5557u total:279 pass:268 dwarn:0 dfail:0 fail:0 skip:11 time:435s
fi-bdw-gvtdvm total:279 pass:265 dwarn:0 dfail:0 fail:0 skip:14 time:420s
fi-blb-e6850 total:279 pass:224 dwarn:1 dfail:0 fail:0 skip:54 time:357s
fi-bsw-n3050 total:279 pass:243 dwarn:0 dfail:0 fail:0 skip:36 time:483s
fi-bxt-j4205 total:279 pass:260 dwarn:0 dfail:0 fail:0 skip:19 time:500s
fi-byt-j1900 total:279 pass:254 dwarn:0 dfail:0 fail:1 skip:24 time:520s
fi-byt-n2820 total:279 pass:250 dwarn:0 dfail:0 fail:1 skip:28 time:510s
fi-glk-2a total:279 pass:260 dwarn:0 dfail:0 fail:0 skip:19 time:589s
fi-hsw-4770 total:279 pass:263 dwarn:0 dfail:0 fail:0 skip:16 time:427s
fi-hsw-4770r total:279 pass:263 dwarn:0 dfail:0 fail:0 skip:16 time:403s
fi-ilk-650 total:279 pass:228 dwarn:0 dfail:0 fail:1 skip:50 time:422s
fi-ivb-3520m total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:497s
fi-ivb-3770 total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:480s
fi-kbl-7500u total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:459s
fi-kbl-7560u total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:567s
fi-kbl-r total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:575s
fi-pnv-d510 total:279 pass:223 dwarn:1 dfail:0 fail:0 skip:55 time:525s
fi-skl-6260u total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:451s
fi-skl-6700k total:279 pass:261 dwarn:0 dfail:0 fail:0 skip:18 time:646s
fi-skl-6770hq total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:461s
fi-skl-gvtdvm total:279 pass:266 dwarn:0 dfail:0 fail:0 skip:13 time:427s
fi-skl-x1585l total:279 pass:269 dwarn:0 dfail:0 fail:0 skip:10 time:485s
fi-snb-2600 total:279 pass:250 dwarn:0 dfail:0 fail:0 skip:29 time:411s
== Logs ==
For more details see: https://intel-gfx-ci.01.org/tree/drm-tip/IGTPW_54/
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-10 13:20 ` Chris Wilson
@ 2017-08-11 8:00 ` Daniel Vetter
2017-08-21 8:05 ` Dec, Katarzyna
0 siblings, 1 reply; 15+ messages in thread
From: Daniel Vetter @ 2017-08-11 8:00 UTC (permalink / raw)
To: Chris Wilson; +Cc: Katarzyna Dec, intel-gfx
On Thu, Aug 10, 2017 at 02:20:27PM +0100, Chris Wilson wrote:
> Quoting Katarzyna Dec (2017-08-10 14:06:15)
> > In addition to checking whether the frequency is in correct range
> > for certain scenario, we can also verify whether PM interrupts are
> > masked correctly.
>
> What does correctly mean? It was a recommendation not a requirement.
>
> You are checking internal kernel implementation details, just add the
> check to our runtime suspend that we have applied the mask (which is a
> reasonable check as the rps idling is decoupled from the rps suspend).
Yes, igt testcase cannot (well, only under very specific exceptions) make
assumptions about the kernel implementations. Doing so unecessarily ties
the test to a given implementation/platform, which destroy all the value
it provides for regression testing from one platform to the next.
But while you look into this, the testcase itself does sometimes fail in
our CI:
https://intel-gfx-ci.01.org/tree/drm-tip/shards.html
Both pm_rps@reset and pm_rps@waitboost seem to randomly fail on all
platforms we're currently testing.
Please take a look at these failures and try to fix them (either test or
kerenl), that would be an actual substantial improvement of igt.
Thanks, Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-11 8:00 ` Daniel Vetter
@ 2017-08-21 8:05 ` Dec, Katarzyna
2017-08-21 9:42 ` Arkadiusz Hiler
0 siblings, 1 reply; 15+ messages in thread
From: Dec, Katarzyna @ 2017-08-21 8:05 UTC (permalink / raw)
To: Daniel Vetter, Chris Wilson; +Cc: intel-gfx@lists.freedesktop.org
I understand we do not want to check registers in IGT tests. What about reading interrupt masks from debugfs (i915_frequency_info). Would that be better approach?
You guys suggested to get interested in kselftests for having such checks, but I am afraid that it could be too much job and we have too few hands to work.
Kasia
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-21 8:05 ` Dec, Katarzyna
@ 2017-08-21 9:42 ` Arkadiusz Hiler
2017-08-21 9:53 ` Chris Wilson
0 siblings, 1 reply; 15+ messages in thread
From: Arkadiusz Hiler @ 2017-08-21 9:42 UTC (permalink / raw)
To: Dec, Katarzyna; +Cc: intel-gfx@lists.freedesktop.org
On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> I understand we do not want to check registers in IGT tests. What
> about reading interrupt masks from debugfs (i915_frequency_info).
Hey Kasia
It would be pretty much the same thing, but instead of us reading the
PMINTRMASK directly we would ask the kernel to do that on our behalf.
That would just hide register read, not get rid of it.
I think you are missing the point. The idea is that we do not want to
test details of in-kernel implementation, not ban the register reads
completely.
Reading register directly, especially just to make sure that the kernel
set something correctly is a good indicator that we are trying to do
just that - test the internal details.
> Would that be better approach? You guys suggested to get interested in
> kselftests for having such checks, but I am afraid that it could be
> too much job and we have too few hands to work.
How much of an effort would the kselftest be, since it seems that you did some
investigation already?
--
Cheers,
Arek
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-21 9:42 ` Arkadiusz Hiler
@ 2017-08-21 9:53 ` Chris Wilson
2017-08-21 10:21 ` Chris Wilson
0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-08-21 9:53 UTC (permalink / raw)
To: Arkadiusz Hiler, Dec, Katarzyna; +Cc: intel-gfx@lists.freedesktop.org
Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> > I understand we do not want to check registers in IGT tests. What
> > about reading interrupt masks from debugfs (i915_frequency_info).
>
> Hey Kasia
>
> It would be pretty much the same thing, but instead of us reading the
> PMINTRMASK directly we would ask the kernel to do that on our behalf.
>
> That would just hide register read, not get rid of it.
>
>
> I think you are missing the point. The idea is that we do not want to
> test details of in-kernel implementation, not ban the register reads
> completely.
>
> Reading register directly, especially just to make sure that the kernel
> set something correctly is a good indicator that we are trying to do
> just that - test the internal details.
>
> > Would that be better approach? You guys suggested to get interested in
> > kselftests for having such checks, but I am afraid that it could be
> > too much job and we have too few hands to work.
>
> How much of an effort would the kselftest be, since it seems that you did some
> investigation already?
It doesn't even require a whole selftest, just something like
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 448e71af4772..e83b67fe0354 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -7733,7 +7733,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
intel_runtime_pm_put(dev_priv);
- /* gen6_rps_idle() will be called later to disable interrupts */
+ WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
+ gen6_sanitize_rps_pm_mask(dev_priv, ~0));
}
void intel_sanitize_gt_powersave(struct drm_i915_private *dev_priv)
(I would say an equivalent to GEM_WARN_ON for the driver, DRV_WARN_ON, or
PM_WARN_ON?)
By testing every time we do suspend the GT (i.e. after all requests are
idle and gen6_rps_idle() should have been called, that all the
interrupts are masked.) You can make a similar assertion before waking
up, but that isn't our concern.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-21 9:53 ` Chris Wilson
@ 2017-08-21 10:21 ` Chris Wilson
2017-08-21 19:39 ` Daniel Vetter
0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-08-21 10:21 UTC (permalink / raw)
To: Arkadiusz Hiler, Dec, Katarzyna; +Cc: intel-gfx@lists.freedesktop.org
Quoting Chris Wilson (2017-08-21 10:53:36)
> Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> > On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> > > I understand we do not want to check registers in IGT tests. What
> > > about reading interrupt masks from debugfs (i915_frequency_info).
> >
> > Hey Kasia
> >
> > It would be pretty much the same thing, but instead of us reading the
> > PMINTRMASK directly we would ask the kernel to do that on our behalf.
> >
> > That would just hide register read, not get rid of it.
> >
> >
> > I think you are missing the point. The idea is that we do not want to
> > test details of in-kernel implementation, not ban the register reads
> > completely.
> >
> > Reading register directly, especially just to make sure that the kernel
> > set something correctly is a good indicator that we are trying to do
> > just that - test the internal details.
> >
> > > Would that be better approach? You guys suggested to get interested in
> > > kselftests for having such checks, but I am afraid that it could be
> > > too much job and we have too few hands to work.
> >
> > How much of an effort would the kselftest be, since it seems that you did some
> > investigation already?
>
> It doesn't even require a whole selftest, just something like
>
> diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> index 448e71af4772..e83b67fe0354 100644
> --- a/drivers/gpu/drm/i915/intel_pm.c
> +++ b/drivers/gpu/drm/i915/intel_pm.c
> @@ -7733,7 +7733,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
> if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
> intel_runtime_pm_put(dev_priv);
>
> - /* gen6_rps_idle() will be called later to disable interrupts */
> + WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
> + gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> }
Wrong spot. We actually need a call from
intel_runtime_pm_disable_interrupts.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-21 10:21 ` Chris Wilson
@ 2017-08-21 19:39 ` Daniel Vetter
2017-08-21 22:31 ` Arkadiusz Hiler
0 siblings, 1 reply; 15+ messages in thread
From: Daniel Vetter @ 2017-08-21 19:39 UTC (permalink / raw)
To: Chris Wilson; +Cc: Dec, Katarzyna, intel-gfx@lists.freedesktop.org
On Mon, Aug 21, 2017 at 11:21:49AM +0100, Chris Wilson wrote:
> Quoting Chris Wilson (2017-08-21 10:53:36)
> > Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> > > On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> > > > I understand we do not want to check registers in IGT tests. What
> > > > about reading interrupt masks from debugfs (i915_frequency_info).
> > >
> > > Hey Kasia
> > >
> > > It would be pretty much the same thing, but instead of us reading the
> > > PMINTRMASK directly we would ask the kernel to do that on our behalf.
> > >
> > > That would just hide register read, not get rid of it.
> > >
> > >
> > > I think you are missing the point. The idea is that we do not want to
> > > test details of in-kernel implementation, not ban the register reads
> > > completely.
> > >
> > > Reading register directly, especially just to make sure that the kernel
> > > set something correctly is a good indicator that we are trying to do
> > > just that - test the internal details.
> > >
> > > > Would that be better approach? You guys suggested to get interested in
> > > > kselftests for having such checks, but I am afraid that it could be
> > > > too much job and we have too few hands to work.
> > >
> > > How much of an effort would the kselftest be, since it seems that you did some
> > > investigation already?
> >
> > It doesn't even require a whole selftest, just something like
> >
> > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > index 448e71af4772..e83b67fe0354 100644
> > --- a/drivers/gpu/drm/i915/intel_pm.c
> > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > @@ -7733,7 +7733,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
> > if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
> > intel_runtime_pm_put(dev_priv);
> >
> > - /* gen6_rps_idle() will be called later to disable interrupts */
> > + WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
> > + gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> > }
>
> Wrong spot. We actually need a call from
> intel_runtime_pm_disable_interrupts.
Yeah, for consistency checks which are very closely tied to the
implementation we tend to sprinkle WARN_ON all over the place. In some
cases those checks are too expensive for production, then we add a
compile-time-option to hide them (e.g. GEM_BUG_ON).
I chatted with Radek, and if I understand things correctly, the main value
you derive from these is making sure a frankenstein port to an older
kernel doesn't miss or miss-apply any critical patches. In-kernel
consistency checks unfortunately don't really help with that, but we
heavily rely on these for validation.
There's also examples of this (and again, they're very important) outside
of i915, like kasan, lockdep (and maybe we'll even get kmemleak somehow
integrated into CI/igt eventually).
So still no idea what would be the best suggestion here for your team.
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-21 19:39 ` Daniel Vetter
@ 2017-08-21 22:31 ` Arkadiusz Hiler
2017-08-22 11:56 ` Szwichtenberg, Radoslaw
0 siblings, 1 reply; 15+ messages in thread
From: Arkadiusz Hiler @ 2017-08-21 22:31 UTC (permalink / raw)
To: Daniel Vetter; +Cc: Dec, Katarzyna, intel-gfx@lists.freedesktop.org
On Mon, Aug 21, 2017 at 09:39:24PM +0200, Daniel Vetter wrote:
> On Mon, Aug 21, 2017 at 11:21:49AM +0100, Chris Wilson wrote:
> > Quoting Chris Wilson (2017-08-21 10:53:36)
> > > Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> > > > On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> > > > > I understand we do not want to check registers in IGT tests. What
> > > > > about reading interrupt masks from debugfs (i915_frequency_info).
> > > >
> > > > Hey Kasia
> > > >
> > > > It would be pretty much the same thing, but instead of us reading the
> > > > PMINTRMASK directly we would ask the kernel to do that on our behalf.
> > > >
> > > > That would just hide register read, not get rid of it.
> > > >
> > > >
> > > > I think you are missing the point. The idea is that we do not want to
> > > > test details of in-kernel implementation, not ban the register reads
> > > > completely.
> > > >
> > > > Reading register directly, especially just to make sure that the kernel
> > > > set something correctly is a good indicator that we are trying to do
> > > > just that - test the internal details.
> > > >
> > > > > Would that be better approach? You guys suggested to get interested in
> > > > > kselftests for having such checks, but I am afraid that it could be
> > > > > too much job and we have too few hands to work.
> > > >
> > > > How much of an effort would the kselftest be, since it seems that you did some
> > > > investigation already?
> > >
> > > It doesn't even require a whole selftest, just something like
> > >
> > > diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
> > > index 448e71af4772..e83b67fe0354 100644
> > > --- a/drivers/gpu/drm/i915/intel_pm.c
> > > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > > @@ -7733,7 +7733,8 @@ void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv)
> > > if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
> > > intel_runtime_pm_put(dev_priv);
> > >
> > > - /* gen6_rps_idle() will be called later to disable interrupts */
> > > + WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
> > > + gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> > > }
> >
> > Wrong spot. We actually need a call from
> > intel_runtime_pm_disable_interrupts.
>
> Yeah, for consistency checks which are very closely tied to the
> implementation we tend to sprinkle WARN_ON all over the place. In some
> cases those checks are too expensive for production, then we add a
> compile-time-option to hide them (e.g. GEM_BUG_ON).
>
> I chatted with Radek, and if I understand things correctly, the main value
> you derive from these is making sure a frankenstein port to an older
> kernel doesn't miss or miss-apply any critical patches. In-kernel
> consistency checks unfortunately don't really help with that, but we
> heavily rely on these for validation.
Having that stated on the mailing list from the beginning (e.g. in the
commit message or as one of the first replies) would help directing the
whole discussion on the right track and make us understand your needs
better.
I agree with Daniel's earlier statement that we should be very
(over)verbose about the changes we are making and purpose they are
serving.
> There's also examples of this (and again, they're very important) outside
> of i915, like kasan, lockdep (and maybe we'll even get kmemleak somehow
> integrated into CI/igt eventually).
>
> So still no idea what would be the best suggestion here for your team.
Kasia and Radek, can you elaborate a little more on the "frankenstein
port" and your use cases for such tests?
How is that comparable to backports to stable/LTS kernel branches?
Hopefully we can work out how to approach similar cases, because it
looks like it's might be a recurring theme in the future
--
Cheers,
Arek
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-21 22:31 ` Arkadiusz Hiler
@ 2017-08-22 11:56 ` Szwichtenberg, Radoslaw
2017-08-22 12:33 ` Chris Wilson
0 siblings, 1 reply; 15+ messages in thread
From: Szwichtenberg, Radoslaw @ 2017-08-22 11:56 UTC (permalink / raw)
To: Hiler, Arkadiusz, daniel@ffwll.ch
Cc: Dec, Katarzyna, intel-gfx@lists.freedesktop.org
On Tue, 2017-08-22 at 01:31 +0300, Arkadiusz Hiler wrote:
> On Mon, Aug 21, 2017 at 09:39:24PM +0200, Daniel Vetter wrote:
> > On Mon, Aug 21, 2017 at 11:21:49AM +0100, Chris Wilson wrote:
> > > Quoting Chris Wilson (2017-08-21 10:53:36)
> > > > Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> > > > > On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> > > > > > I understand we do not want to check registers in IGT tests. What
> > > > > > about reading interrupt masks from debugfs (i915_frequency_info).
> > > > >
> > > > > Hey Kasia
> > > > >
> > > > > It would be pretty much the same thing, but instead of us reading the
> > > > > PMINTRMASK directly we would ask the kernel to do that on our behalf.
> > > > >
> > > > > That would just hide register read, not get rid of it.
> > > > >
> > > > >
> > > > > I think you are missing the point. The idea is that we do not want to
> > > > > test details of in-kernel implementation, not ban the register reads
> > > > > completely.
> > > > >
> > > > > Reading register directly, especially just to make sure that the
> > > > > kernel
> > > > > set something correctly is a good indicator that we are trying to do
> > > > > just that - test the internal details.
> > > > >
> > > > > > Would that be better approach? You guys suggested to get interested
> > > > > > in
> > > > > > kselftests for having such checks, but I am afraid that it could be
> > > > > > too much job and we have too few hands to work.
> > > > >
> > > > > How much of an effort would the kselftest be, since it seems that you
> > > > > did some
> > > > > investigation already?
> > > >
> > > > It doesn't even require a whole selftest, just something like
> > > >
> > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > > > b/drivers/gpu/drm/i915/intel_pm.c
> > > > index 448e71af4772..e83b67fe0354 100644
> > > > --- a/drivers/gpu/drm/i915/intel_pm.c
> > > > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > > > @@ -7733,7 +7733,8 @@ void intel_suspend_gt_powersave(struct
> > > > drm_i915_private *dev_priv)
> > > > if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
> > > > intel_runtime_pm_put(dev_priv);
> > > >
> > > > - /* gen6_rps_idle() will be called later to disable interrupts */
> > > > + WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
> > > > + gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> > > > }
> > >
> > > Wrong spot. We actually need a call from
> > > intel_runtime_pm_disable_interrupts.
> >
> > Yeah, for consistency checks which are very closely tied to the
> > implementation we tend to sprinkle WARN_ON all over the place. In some
> > cases those checks are too expensive for production, then we add a
> > compile-time-option to hide them (e.g. GEM_BUG_ON).
> >
> > I chatted with Radek, and if I understand things correctly, the main value
> > you derive from these is making sure a frankenstein port to an older
> > kernel doesn't miss or miss-apply any critical patches. In-kernel
> > consistency checks unfortunately don't really help with that, but we
> > heavily rely on these for validation.
>
> Having that stated on the mailing list from the beginning (e.g. in the
> commit message or as one of the first replies) would help directing the
> whole discussion on the right track and make us understand your needs
> better.
>
> I agree with Daniel's earlier statement that we should be very
> (over)verbose about the changes we are making and purpose they are
> serving.
>
> > There's also examples of this (and again, they're very important) outside
> > of i915, like kasan, lockdep (and maybe we'll even get kmemleak somehow
> > integrated into CI/igt eventually).
> >
> > So still no idea what would be the best suggestion here for your team.
>
> Kasia and Radek, can you elaborate a little more on the "frankenstein
> port" and your use cases for such tests?
>
> How is that comparable to backports to stable/LTS kernel branches?
>
This test proposed by Kasia not only was used to find various regressions
(including performance ones) that were later fixed on upstream (and example
would be patch from Sagar: https://patchwork.kernel.org/patch/9527335/).
Additionally we do sometimes use older releases of kernel for which we do
backport some of the latest changes (on need basis). As this test verifies
whether masking is done as we expect it to be done it allows us to ensure that
during forklift/cherrypicking or any other process any of the required patches
were not missed.
I believe that proposed changes are not introducing any overhead or information
that is not really usefull for developers/test runners. Also providing support
for all previous and future platforms should not be an issue in this case.
Information about masking proved multiple times to be usefull when looking for
root cause of issues with rps we were facing. Fixes for all these issues (if
were still applicable to upstream code) were sent upstream (I think mostly by
Sagar). I think that there is no other way to verify whether rps interrupts are
masked or not in specific situations and using a register value is the only way
to do it.
CC'ed Sagar - maybe you would like to add something to this discussion?
Cheers,
Radek
> Hopefully we can work out how to approach similar cases, because it
> looks like it's might be a recurring theme in the future
>
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-22 11:56 ` Szwichtenberg, Radoslaw
@ 2017-08-22 12:33 ` Chris Wilson
2017-08-22 13:14 ` Szwichtenberg, Radoslaw
0 siblings, 1 reply; 15+ messages in thread
From: Chris Wilson @ 2017-08-22 12:33 UTC (permalink / raw)
To: Szwichtenberg, Radoslaw, Hiler, Arkadiusz, daniel@ffwll.ch
Cc: Dec, Katarzyna, intel-gfx@lists.freedesktop.org
Quoting Szwichtenberg, Radoslaw (2017-08-22 12:56:00)
> On Tue, 2017-08-22 at 01:31 +0300, Arkadiusz Hiler wrote:
> > On Mon, Aug 21, 2017 at 09:39:24PM +0200, Daniel Vetter wrote:
> > > On Mon, Aug 21, 2017 at 11:21:49AM +0100, Chris Wilson wrote:
> > > > Quoting Chris Wilson (2017-08-21 10:53:36)
> > > > > Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> > > > > > On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> > > > > > > I understand we do not want to check registers in IGT tests. What
> > > > > > > about reading interrupt masks from debugfs (i915_frequency_info).
> > > > > >
> > > > > > Hey Kasia
> > > > > >
> > > > > > It would be pretty much the same thing, but instead of us reading the
> > > > > > PMINTRMASK directly we would ask the kernel to do that on our behalf.
> > > > > >
> > > > > > That would just hide register read, not get rid of it.
> > > > > >
> > > > > >
> > > > > > I think you are missing the point. The idea is that we do not want to
> > > > > > test details of in-kernel implementation, not ban the register reads
> > > > > > completely.
> > > > > >
> > > > > > Reading register directly, especially just to make sure that the
> > > > > > kernel
> > > > > > set something correctly is a good indicator that we are trying to do
> > > > > > just that - test the internal details.
> > > > > >
> > > > > > > Would that be better approach? You guys suggested to get interested
> > > > > > > in
> > > > > > > kselftests for having such checks, but I am afraid that it could be
> > > > > > > too much job and we have too few hands to work.
> > > > > >
> > > > > > How much of an effort would the kselftest be, since it seems that you
> > > > > > did some
> > > > > > investigation already?
> > > > >
> > > > > It doesn't even require a whole selftest, just something like
> > > > >
> > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > > > > b/drivers/gpu/drm/i915/intel_pm.c
> > > > > index 448e71af4772..e83b67fe0354 100644
> > > > > --- a/drivers/gpu/drm/i915/intel_pm.c
> > > > > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > > > > @@ -7733,7 +7733,8 @@ void intel_suspend_gt_powersave(struct
> > > > > drm_i915_private *dev_priv)
> > > > > if (cancel_delayed_work_sync(&dev_priv->rps.autoenable_work))
> > > > > intel_runtime_pm_put(dev_priv);
> > > > >
> > > > > - /* gen6_rps_idle() will be called later to disable interrupts */
> > > > > + WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
> > > > > + gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> > > > > }
> > > >
> > > > Wrong spot. We actually need a call from
> > > > intel_runtime_pm_disable_interrupts.
> > >
> > > Yeah, for consistency checks which are very closely tied to the
> > > implementation we tend to sprinkle WARN_ON all over the place. In some
> > > cases those checks are too expensive for production, then we add a
> > > compile-time-option to hide them (e.g. GEM_BUG_ON).
> > >
> > > I chatted with Radek, and if I understand things correctly, the main value
> > > you derive from these is making sure a frankenstein port to an older
> > > kernel doesn't miss or miss-apply any critical patches. In-kernel
> > > consistency checks unfortunately don't really help with that, but we
> > > heavily rely on these for validation.
> >
> > Having that stated on the mailing list from the beginning (e.g. in the
> > commit message or as one of the first replies) would help directing the
> > whole discussion on the right track and make us understand your needs
> > better.
> >
> > I agree with Daniel's earlier statement that we should be very
> > (over)verbose about the changes we are making and purpose they are
> > serving.
> >
> > > There's also examples of this (and again, they're very important) outside
> > > of i915, like kasan, lockdep (and maybe we'll even get kmemleak somehow
> > > integrated into CI/igt eventually).
> > >
> > > So still no idea what would be the best suggestion here for your team.
> >
> > Kasia and Radek, can you elaborate a little more on the "frankenstein
> > port" and your use cases for such tests?
> >
> > How is that comparable to backports to stable/LTS kernel branches?
> >
> This test proposed by Kasia not only was used to find various regressions
> (including performance ones) that were later fixed on upstream (and example
> would be patch from Sagar: https://patchwork.kernel.org/patch/9527335/).
It is a terrible test for that. If you're goal is to validate performance,
do that. For example, the presumption there is that RPS continues to
respond after a sequence of events (delivering the expected
performance). You can either use the gpu clocks as reported by the
kernel, but you forgo that trust by doing a known workload and
count cycles. The result should be that you get a test that matches
typical patterns and corner cases of userspace, and that you are
delivering the expected *userspace* behaviour. All in a way that makes
no assumptions how the kernel/hw responds, just that it will.
-Chris
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-22 12:33 ` Chris Wilson
@ 2017-08-22 13:14 ` Szwichtenberg, Radoslaw
2017-08-23 15:04 ` Daniel Vetter
0 siblings, 1 reply; 15+ messages in thread
From: Szwichtenberg, Radoslaw @ 2017-08-22 13:14 UTC (permalink / raw)
To: Hiler, Arkadiusz, daniel@ffwll.ch, chris@chris-wilson.co.uk
Cc: Dec, Katarzyna, intel-gfx@lists.freedesktop.org
On Tue, 2017-08-22 at 13:33 +0100, Chris Wilson wrote:
> Quoting Szwichtenberg, Radoslaw (2017-08-22 12:56:00)
> > On Tue, 2017-08-22 at 01:31 +0300, Arkadiusz Hiler wrote:
> > > On Mon, Aug 21, 2017 at 09:39:24PM +0200, Daniel Vetter wrote:
> > > > On Mon, Aug 21, 2017 at 11:21:49AM +0100, Chris Wilson wrote:
> > > > > Quoting Chris Wilson (2017-08-21 10:53:36)
> > > > > > Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> > > > > > > On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> > > > > > > > I understand we do not want to check registers in IGT tests.
> > > > > > > > What
> > > > > > > > about reading interrupt masks from debugfs
> > > > > > > > (i915_frequency_info).
> > > > > > >
> > > > > > > Hey Kasia
> > > > > > >
> > > > > > > It would be pretty much the same thing, but instead of us reading
> > > > > > > the
> > > > > > > PMINTRMASK directly we would ask the kernel to do that on our
> > > > > > > behalf.
> > > > > > >
> > > > > > > That would just hide register read, not get rid of it.
> > > > > > >
> > > > > > >
> > > > > > > I think you are missing the point. The idea is that we do not want
> > > > > > > to
> > > > > > > test details of in-kernel implementation, not ban the register
> > > > > > > reads
> > > > > > > completely.
> > > > > > >
> > > > > > > Reading register directly, especially just to make sure that the
> > > > > > > kernel
> > > > > > > set something correctly is a good indicator that we are trying to
> > > > > > > do
> > > > > > > just that - test the internal details.
> > > > > > >
> > > > > > > > Would that be better approach? You guys suggested to get
> > > > > > > > interested
> > > > > > > > in
> > > > > > > > kselftests for having such checks, but I am afraid that it could
> > > > > > > > be
> > > > > > > > too much job and we have too few hands to work.
> > > > > > >
> > > > > > > How much of an effort would the kselftest be, since it seems that
> > > > > > > you
> > > > > > > did some
> > > > > > > investigation already?
> > > > > >
> > > > > > It doesn't even require a whole selftest, just something like
> > > > > >
> > > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > > > > > b/drivers/gpu/drm/i915/intel_pm.c
> > > > > > index 448e71af4772..e83b67fe0354 100644
> > > > > > --- a/drivers/gpu/drm/i915/intel_pm.c
> > > > > > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > > > > > @@ -7733,7 +7733,8 @@ void intel_suspend_gt_powersave(struct
> > > > > > drm_i915_private *dev_priv)
> > > > > > if (cancel_delayed_work_sync(&dev_priv-
> > > > > > >rps.autoenable_work))
> > > > > > intel_runtime_pm_put(dev_priv);
> > > > > >
> > > > > > - /* gen6_rps_idle() will be called later to disable
> > > > > > interrupts */
> > > > > > + WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
> > > > > > + gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> > > > > > }
> > > > >
> > > > > Wrong spot. We actually need a call from
> > > > > intel_runtime_pm_disable_interrupts.
> > > >
> > > > Yeah, for consistency checks which are very closely tied to the
> > > > implementation we tend to sprinkle WARN_ON all over the place. In some
> > > > cases those checks are too expensive for production, then we add a
> > > > compile-time-option to hide them (e.g. GEM_BUG_ON).
> > > >
> > > > I chatted with Radek, and if I understand things correctly, the main
> > > > value
> > > > you derive from these is making sure a frankenstein port to an older
> > > > kernel doesn't miss or miss-apply any critical patches. In-kernel
> > > > consistency checks unfortunately don't really help with that, but we
> > > > heavily rely on these for validation.
> > >
> > > Having that stated on the mailing list from the beginning (e.g. in the
> > > commit message or as one of the first replies) would help directing the
> > > whole discussion on the right track and make us understand your needs
> > > better.
> > >
> > > I agree with Daniel's earlier statement that we should be very
> > > (over)verbose about the changes we are making and purpose they are
> > > serving.
> > >
> > > > There's also examples of this (and again, they're very important)
> > > > outside
> > > > of i915, like kasan, lockdep (and maybe we'll even get kmemleak somehow
> > > > integrated into CI/igt eventually).
> > > >
> > > > So still no idea what would be the best suggestion here for your team.
> > >
> > > Kasia and Radek, can you elaborate a little more on the "frankenstein
> > > port" and your use cases for such tests?
> > >
> > > How is that comparable to backports to stable/LTS kernel branches?
> > >
> >
> > This test proposed by Kasia not only was used to find various regressions
> > (including performance ones) that were later fixed on upstream (and example
> > would be patch from Sagar: https://patchwork.kernel.org/patch/9527335/).
>
> It is a terrible test for that. If you're goal is to validate performance,
> do that. For example, the presumption there is that RPS continues to
> respond after a sequence of events (delivering the expected
> performance). You can either use the gpu clocks as reported by the
> kernel, but you forgo that trust by doing a known workload and
> count cycles. The result should be that you get a test that matches
> typical patterns and corner cases of userspace, and that you are
> delivering the expected *userspace* behaviour. All in a way that makes
> no assumptions how the kernel/hw responds, just that it will.
> -Chris
I do agree with your point - I just wanted give a example of where this test
turned out helpfull (not saying this test was aimed to catch such behaviour even
though it was helpfull in rootcausing the provlem). I hope Sagar will be able to
provide better examples.
The most important part of this discussion probably is a question whether IGT
should be backwards compatible and help testing previous stable kernel releases.
I know that saying that some user of IGTs uses it in certain ways to test older
kernels (we do) is not enough to justify upstreaming these. We do maintain some
older kernels (stable ones) with some changes forklifted from upstream/newer
releases and this test does help finding out if all required changes were
cherrypicked correctly or not.
Radek
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-22 13:14 ` Szwichtenberg, Radoslaw
@ 2017-08-23 15:04 ` Daniel Vetter
2017-08-28 10:34 ` Dec, Katarzyna
0 siblings, 1 reply; 15+ messages in thread
From: Daniel Vetter @ 2017-08-23 15:04 UTC (permalink / raw)
To: Szwichtenberg, Radoslaw; +Cc: Dec, Katarzyna, intel-gfx@lists.freedesktop.org
On Tue, Aug 22, 2017 at 01:14:19PM +0000, Szwichtenberg, Radoslaw wrote:
> On Tue, 2017-08-22 at 13:33 +0100, Chris Wilson wrote:
> > Quoting Szwichtenberg, Radoslaw (2017-08-22 12:56:00)
> > > On Tue, 2017-08-22 at 01:31 +0300, Arkadiusz Hiler wrote:
> > > > On Mon, Aug 21, 2017 at 09:39:24PM +0200, Daniel Vetter wrote:
> > > > > On Mon, Aug 21, 2017 at 11:21:49AM +0100, Chris Wilson wrote:
> > > > > > Quoting Chris Wilson (2017-08-21 10:53:36)
> > > > > > > Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> > > > > > > > On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec, Katarzyna wrote:
> > > > > > > > > I understand we do not want to check registers in IGT tests.
> > > > > > > > > What
> > > > > > > > > about reading interrupt masks from debugfs
> > > > > > > > > (i915_frequency_info).
> > > > > > > >
> > > > > > > > Hey Kasia
> > > > > > > >
> > > > > > > > It would be pretty much the same thing, but instead of us reading
> > > > > > > > the
> > > > > > > > PMINTRMASK directly we would ask the kernel to do that on our
> > > > > > > > behalf.
> > > > > > > >
> > > > > > > > That would just hide register read, not get rid of it.
> > > > > > > >
> > > > > > > >
> > > > > > > > I think you are missing the point. The idea is that we do not want
> > > > > > > > to
> > > > > > > > test details of in-kernel implementation, not ban the register
> > > > > > > > reads
> > > > > > > > completely.
> > > > > > > >
> > > > > > > > Reading register directly, especially just to make sure that the
> > > > > > > > kernel
> > > > > > > > set something correctly is a good indicator that we are trying to
> > > > > > > > do
> > > > > > > > just that - test the internal details.
> > > > > > > >
> > > > > > > > > Would that be better approach? You guys suggested to get
> > > > > > > > > interested
> > > > > > > > > in
> > > > > > > > > kselftests for having such checks, but I am afraid that it could
> > > > > > > > > be
> > > > > > > > > too much job and we have too few hands to work.
> > > > > > > >
> > > > > > > > How much of an effort would the kselftest be, since it seems that
> > > > > > > > you
> > > > > > > > did some
> > > > > > > > investigation already?
> > > > > > >
> > > > > > > It doesn't even require a whole selftest, just something like
> > > > > > >
> > > > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > > > > > > b/drivers/gpu/drm/i915/intel_pm.c
> > > > > > > index 448e71af4772..e83b67fe0354 100644
> > > > > > > --- a/drivers/gpu/drm/i915/intel_pm.c
> > > > > > > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > > > > > > @@ -7733,7 +7733,8 @@ void intel_suspend_gt_powersave(struct
> > > > > > > drm_i915_private *dev_priv)
> > > > > > > if (cancel_delayed_work_sync(&dev_priv-
> > > > > > > >rps.autoenable_work))
> > > > > > > intel_runtime_pm_put(dev_priv);
> > > > > > >
> > > > > > > - /* gen6_rps_idle() will be called later to disable
> > > > > > > interrupts */
> > > > > > > + WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
> > > > > > > + gen6_sanitize_rps_pm_mask(dev_priv, ~0));
> > > > > > > }
> > > > > >
> > > > > > Wrong spot. We actually need a call from
> > > > > > intel_runtime_pm_disable_interrupts.
> > > > >
> > > > > Yeah, for consistency checks which are very closely tied to the
> > > > > implementation we tend to sprinkle WARN_ON all over the place. In some
> > > > > cases those checks are too expensive for production, then we add a
> > > > > compile-time-option to hide them (e.g. GEM_BUG_ON).
> > > > >
> > > > > I chatted with Radek, and if I understand things correctly, the main
> > > > > value
> > > > > you derive from these is making sure a frankenstein port to an older
> > > > > kernel doesn't miss or miss-apply any critical patches. In-kernel
> > > > > consistency checks unfortunately don't really help with that, but we
> > > > > heavily rely on these for validation.
> > > >
> > > > Having that stated on the mailing list from the beginning (e.g. in the
> > > > commit message or as one of the first replies) would help directing the
> > > > whole discussion on the right track and make us understand your needs
> > > > better.
> > > >
> > > > I agree with Daniel's earlier statement that we should be very
> > > > (over)verbose about the changes we are making and purpose they are
> > > > serving.
> > > >
> > > > > There's also examples of this (and again, they're very important)
> > > > > outside
> > > > > of i915, like kasan, lockdep (and maybe we'll even get kmemleak somehow
> > > > > integrated into CI/igt eventually).
> > > > >
> > > > > So still no idea what would be the best suggestion here for your team.
> > > >
> > > > Kasia and Radek, can you elaborate a little more on the "frankenstein
> > > > port" and your use cases for such tests?
> > > >
> > > > How is that comparable to backports to stable/LTS kernel branches?
> > > >
> > >
> > > This test proposed by Kasia not only was used to find various regressions
> > > (including performance ones) that were later fixed on upstream (and example
> > > would be patch from Sagar: https://patchwork.kernel.org/patch/9527335/).
> >
> > It is a terrible test for that. If you're goal is to validate performance,
> > do that. For example, the presumption there is that RPS continues to
> > respond after a sequence of events (delivering the expected
> > performance). You can either use the gpu clocks as reported by the
> > kernel, but you forgo that trust by doing a known workload and
> > count cycles. The result should be that you get a test that matches
> > typical patterns and corner cases of userspace, and that you are
> > delivering the expected *userspace* behaviour. All in a way that makes
> > no assumptions how the kernel/hw responds, just that it will.
> > -Chris
>
> I do agree with your point - I just wanted give a example of where this test
> turned out helpfull (not saying this test was aimed to catch such behaviour even
> though it was helpfull in rootcausing the provlem). I hope Sagar will be able to
> provide better examples.
>
> The most important part of this discussion probably is a question whether IGT
> should be backwards compatible and help testing previous stable kernel releases.
> I know that saying that some user of IGTs uses it in certain ways to test older
> kernels (we do) is not enough to justify upstreaming these. We do maintain some
> older kernels (stable ones) with some changes forklifted from upstream/newer
> releases and this test does help finding out if all required changes were
> cherrypicked correctly or not.
One thing we could do on that front is run igt patches against older-ish
kernels. Well, since full igt runs are already at the limit of machine
time we have, that would probably be something that each release
engineering team that cares about a specific kernel/hw combo would need to
do.
But patchwork supports that, you can add arbitrary amounts of CI farms to
it that post results.
If we only rely on review at least I don't think igt is going to be
backward compatible enough for serious backporting efforts. There's still
going to be issues, simply because we sometimes need to change the uabi
(in a way that only breaks igt, not any real apps ofc).
-Daniel
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value
2017-08-23 15:04 ` Daniel Vetter
@ 2017-08-28 10:34 ` Dec, Katarzyna
0 siblings, 0 replies; 15+ messages in thread
From: Dec, Katarzyna @ 2017-08-28 10:34 UTC (permalink / raw)
To: daniel@ffwll.ch, Szwichtenberg, Radoslaw; +Cc: intel-gfx@lists.freedesktop.org
On Wed, 2017-08-23 at 17:04 +0200, Daniel Vetter wrote:
> On Tue, Aug 22, 2017 at 01:14:19PM +0000, Szwichtenberg, Radoslaw
> wrote:
> > On Tue, 2017-08-22 at 13:33 +0100, Chris Wilson wrote:
> > > Quoting Szwichtenberg, Radoslaw (2017-08-22 12:56:00)
> > > > On Tue, 2017-08-22 at 01:31 +0300, Arkadiusz Hiler wrote:
> > > > > On Mon, Aug 21, 2017 at 09:39:24PM +0200, Daniel Vetter
> > > > > wrote:
> > > > > > On Mon, Aug 21, 2017 at 11:21:49AM +0100, Chris Wilson
> > > > > > wrote:
> > > > > > > Quoting Chris Wilson (2017-08-21 10:53:36)
> > > > > > > > Quoting Arkadiusz Hiler (2017-08-21 10:42:25)
> > > > > > > > > On Mon, Aug 21, 2017 at 08:05:58AM +0000, Dec,
> > > > > > > > > Katarzyna wrote:
> > > > > > > > > > I understand we do not want to check registers in
> > > > > > > > > > IGT tests.
> > > > > > > > > > What
> > > > > > > > > > about reading interrupt masks from debugfs
> > > > > > > > > > (i915_frequency_info).
> > > > > > > > >
> > > > > > > > > Hey Kasia
> > > > > > > > >
> > > > > > > > > It would be pretty much the same thing, but instead
> > > > > > > > > of us reading
> > > > > > > > > the
> > > > > > > > > PMINTRMASK directly we would ask the kernel to do
> > > > > > > > > that on our
> > > > > > > > > behalf.
> > > > > > > > >
> > > > > > > > > That would just hide register read, not get rid of
> > > > > > > > > it.
> > > > > > > > >
> > > > > > > > >
> > > > > > > > > I think you are missing the point. The idea is that
> > > > > > > > > we do not want
> > > > > > > > > to
> > > > > > > > > test details of in-kernel implementation, not ban the
> > > > > > > > > register
> > > > > > > > > reads
> > > > > > > > > completely.
> > > > > > > > >
> > > > > > > > > Reading register directly, especially just to make
> > > > > > > > > sure that the
> > > > > > > > > kernel
> > > > > > > > > set something correctly is a good indicator that we
> > > > > > > > > are trying to
> > > > > > > > > do
> > > > > > > > > just that - test the internal details.
> > > > > > > > >
> > > > > > > > > > Would that be better approach? You guys suggested
> > > > > > > > > > to get
> > > > > > > > > > interested
> > > > > > > > > > in
> > > > > > > > > > kselftests for having such checks, but I am afraid
> > > > > > > > > > that it could
> > > > > > > > > > be
> > > > > > > > > > too much job and we have too few hands to work.
> > > > > > > > >
> > > > > > > > > How much of an effort would the kselftest be, since
> > > > > > > > > it seems that
> > > > > > > > > you
> > > > > > > > > did some
> > > > > > > > > investigation already?
> > > > > > > >
> > > > > > > > It doesn't even require a whole selftest, just
> > > > > > > > something like
> > > > > > > >
> > > > > > > > diff --git a/drivers/gpu/drm/i915/intel_pm.c
> > > > > > > > b/drivers/gpu/drm/i915/intel_pm.c
> > > > > > > > index 448e71af4772..e83b67fe0354 100644
> > > > > > > > --- a/drivers/gpu/drm/i915/intel_pm.c
> > > > > > > > +++ b/drivers/gpu/drm/i915/intel_pm.c
> > > > > > > > @@ -7733,7 +7733,8 @@ void
> > > > > > > > intel_suspend_gt_powersave(struct
> > > > > > > > drm_i915_private *dev_priv)
> > > > > > > > if (cancel_delayed_work_sync(&dev_priv-
> > > > > > > > > rps.autoenable_work))
> > > > > > > >
> > > > > > > > intel_runtime_pm_put(dev_priv);
> > > > > > > >
> > > > > > > > - /* gen6_rps_idle() will be called later to
> > > > > > > > disable
> > > > > > > > interrupts */
> > > > > > > > + WARN_ON(I915_READ(GEN6_PMINTRMSK) !=
> > > > > > > > + gen6_sanitize_rps_pm_mask(dev_priv,
> > > > > > > > ~0));
> > > > > > > > }
> > > > > > >
> > > > > > > Wrong spot. We actually need a call from
> > > > > > > intel_runtime_pm_disable_interrupts.
> > > > > >
> > > > > > Yeah, for consistency checks which are very closely tied to
> > > > > > the
> > > > > > implementation we tend to sprinkle WARN_ON all over the
> > > > > > place. In some
> > > > > > cases those checks are too expensive for production, then
> > > > > > we add a
> > > > > > compile-time-option to hide them (e.g. GEM_BUG_ON).
> > > > > >
> > > > > > I chatted with Radek, and if I understand things correctly,
> > > > > > the main
> > > > > > value
> > > > > > you derive from these is making sure a frankenstein port to
> > > > > > an older
> > > > > > kernel doesn't miss or miss-apply any critical patches. In-
> > > > > > kernel
> > > > > > consistency checks unfortunately don't really help with
> > > > > > that, but we
> > > > > > heavily rely on these for validation.
> > > > >
> > > > > Having that stated on the mailing list from the beginning
> > > > > (e.g. in the
> > > > > commit message or as one of the first replies) would help
> > > > > directing the
> > > > > whole discussion on the right track and make us understand
> > > > > your needs
> > > > > better.
> > > > >
> > > > > I agree with Daniel's earlier statement that we should be
> > > > > very
> > > > > (over)verbose about the changes we are making and purpose
> > > > > they are
> > > > > serving.
> > > > >
> > > > > > There's also examples of this (and again, they're very
> > > > > > important)
> > > > > > outside
> > > > > > of i915, like kasan, lockdep (and maybe we'll even get
> > > > > > kmemleak somehow
> > > > > > integrated into CI/igt eventually).
> > > > > >
> > > > > > So still no idea what would be the best suggestion here for
> > > > > > your team.
> > > > >
> > > > > Kasia and Radek, can you elaborate a little more on the
> > > > > "frankenstein
> > > > > port" and your use cases for such tests?
> > > > >
> > > > > How is that comparable to backports to stable/LTS kernel
> > > > > branches?
> > > > >
> > > >
> > > > This test proposed by Kasia not only was used to find various
> > > > regressions
> > > > (including performance ones) that were later fixed on upstream
> > > > (and example
> > > > would be patch from Sagar: https://patchwork.kernel.org/patch/9
> > > > 527335/).
> > >
> > > It is a terrible test for that. If you're goal is to validate
> > > performance,
> > > do that. For example, the presumption there is that RPS continues
> > > to
> > > respond after a sequence of events (delivering the expected
> > > performance). You can either use the gpu clocks as reported by
> > > the
> > > kernel, but you forgo that trust by doing a known workload and
> > > count cycles. The result should be that you get a test that
> > > matches
> > > typical patterns and corner cases of userspace, and that you are
> > > delivering the expected *userspace* behaviour. All in a way that
> > > makes
> > > no assumptions how the kernel/hw responds, just that it will.
> > > -Chris
> >
> > I do agree with your point - I just wanted give a example of where
> > this test
> > turned out helpfull (not saying this test was aimed to catch such
> > behaviour even
> > though it was helpfull in rootcausing the provlem). I hope Sagar
> > will be able to
> > provide better examples.
> >
> > The most important part of this discussion probably is a question
> > whether IGT
> > should be backwards compatible and help testing previous stable
> > kernel releases.
> > I know that saying that some user of IGTs uses it in certain ways
> > to test older
> > kernels (we do) is not enough to justify upstreaming these. We do
> > maintain some
> > older kernels (stable ones) with some changes forklifted from
> > upstream/newer
> > releases and this test does help finding out if all required
> > changes were
> > cherrypicked correctly or not.
>
> One thing we could do on that front is run igt patches against older-
> ish
> kernels. Well, since full igt runs are already at the limit of
> machine
> time we have, that would probably be something that each release
> engineering team that cares about a specific kernel/hw combo would
> need to
> do.
>
> But patchwork supports that, you can add arbitrary amounts of CI
> farms to
> it that post results.
>
> If we only rely on review at least I don't think igt is going to be
> backward compatible enough for serious backporting efforts. There's
> still
> going to be issues, simply because we sometimes need to change the
> uabi
> (in a way that only breaks igt, not any real apps ofc).
> -Daniel
How about adding check to kernel and having such test in IGT? We could
have double check in case of "frankenstein" kernel builds. This is
really the special case when we want to check registers value. I think
there is no other way to check such things using IGT.
About check in kernel code:
Is this something like that you have in mind?
diff --git a/drivers/gpu/drm/i915/i915_irq.c
b/drivers/gpu/drm/i915/i915_irq.c
index 196caa463edf..b4030d8fa4c9 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -4326,6 +4326,7 @@ void intel_runtime_pm_disable_interrupts(struct
drm_i915_private *dev_priv)
dev_priv->drm.driver->irq_uninstall(&dev_priv->drm);
dev_priv->pm.irqs_enabled = false;
synchronize_irq(dev_priv->drm.irq);
+ WARN_ON(I915_READ(GEN6_PMINTRMSK)!=gen6_sanitize_rps_pm_mask(de
v_priv, ~0));
}
Regards,
Kasia
_______________________________________________
Intel-gfx mailing list
Intel-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/intel-gfx
^ permalink raw reply related [flat|nested] 15+ messages in thread
end of thread, other threads:[~2017-08-28 10:35 UTC | newest]
Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2017-08-10 13:06 [PATCH i-g-t] pm_rps: Extended testcases with checking PMINTRMSK register value Katarzyna Dec
2017-08-10 13:20 ` Chris Wilson
2017-08-11 8:00 ` Daniel Vetter
2017-08-21 8:05 ` Dec, Katarzyna
2017-08-21 9:42 ` Arkadiusz Hiler
2017-08-21 9:53 ` Chris Wilson
2017-08-21 10:21 ` Chris Wilson
2017-08-21 19:39 ` Daniel Vetter
2017-08-21 22:31 ` Arkadiusz Hiler
2017-08-22 11:56 ` Szwichtenberg, Radoslaw
2017-08-22 12:33 ` Chris Wilson
2017-08-22 13:14 ` Szwichtenberg, Radoslaw
2017-08-23 15:04 ` Daniel Vetter
2017-08-28 10:34 ` Dec, Katarzyna
2017-08-10 13:29 ` ✗ Fi.CI.BAT: failure for " Patchwork
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).