From: Martin Peres <martin.peres-GANU6spQydw@public.gmane.org>
To: Karol Herbst <nouveau-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org>,
nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Cc: Karol Herbst <git-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org>
Subject: Re: [PATCH 3/4] subdev/pmu/fuc: implement perf
Date: Mon, 15 Feb 2016 00:02:22 +0200 [thread overview]
Message-ID: <56C0F96E.4040308@free.fr> (raw)
In-Reply-To: <1445883189-4407-4-git-send-email-nouveau-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org>
On 26/10/15 20:13, Karol Herbst wrote:
> From: Karol Herbst <git@karolherbst.de>
>
> ---
> drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h | 788 +++++++++++++++------------
> drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h | 740 ++++++++++++++-----------
> drm/nouveau/nvkm/subdev/pmu/fuc/gk104.fuc4.h | 740 ++++++++++++++-----------
> drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h | 710 ++++++++++++++----------
> drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h | 755 ++++++++++++++-----------
> drm/nouveau/nvkm/subdev/pmu/fuc/os.h | 4 +
> drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc | 148 +++++
> 7 files changed, 2267 insertions(+), 1618 deletions(-)
>
>
> diff --git a/drm/nouveau/nvkm/subdev/pmu/fuc/os.h b/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
> index c8b06cb..53508d9 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
> +++ b/drm/nouveau/nvkm/subdev/pmu/fuc/os.h
> @@ -49,4 +49,8 @@
> #define I2C__MSG_DATA0_WR08_REG 0:7
> #define I2C__MSG_DATA1_WR08_VAL 0:7
>
> +
> +/* PERF: message identifiers */
> +#define PERF_MSG_LOAD 1
Could you document the expected inputs and outputs of this message as a
comment?
// IN: don't care
// OUT:
// - byte 3: PCIe load (GF100+)
// - byte 2: Memory load
// - byte 1: Video decoding engines' load
// - byte 0: GR load
> +
> #endif
> diff --git a/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc b/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
> index 38eadf7..69a8f8d 100644
> --- a/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
> +++ b/drm/nouveau/nvkm/subdev/pmu/fuc/perf.fuc
> @@ -30,6 +30,18 @@ process(PROC_PERF, #perf_init, #perf_recv)
> * PERF data segment
> *****************************************************************************/
> #ifdef INCLUDE_DATA
> +perf_attr_start:
> +// parameters
> +perf_polling_period_us: .b32 100000
> +
> +// engine usage percentage
It is a tiny bit misleading to say percentage when you mean per-255. How
about:
// engine usage (0-255)
> +perf_eng_gr: .b8 0
> +perf_eng_vdec: .b8 0
> +perf_eng_mc: .b8 0
> +#if NVKM_PPWR_CHIPSET >= GF100
> +perf_eng_pcie: .b8 0
> +#endif
> +.align 4
> #endif
>
> /******************************************************************************
> @@ -46,6 +58,78 @@ process(PROC_PERF, #perf_init, #perf_recv)
> // $r11 - data1
> // $r0 - zero
> perf_recv:
> + push $r1
> +
> + imm32($r10, PROC_HOST)
> + cmp b32 $r14 $r10
> + bra ne #perf_recv_not_host
> + cmp b32 $r13 PERF_MSG_LOAD
Thanks for doing this and not assuming straight away that this is going
to be the only type of messages you will receive from the host :)
> + bra e #perf_load
> + bra #perf_recv_exit
> +
> +perf_load:
> + clear b32 $r11
> + clear b32 $r12
> +#if NVKM_PPWR_CHIPSET >= GF100
> + ld(b8, $r12, #perf_eng_pcie)
> + shl b32 $r12 8
> +#endif
> + ld(b8, $r12, #perf_eng_mc)
> + shl b32 $r12 8
> + ld(b8, $r12, #perf_eng_vdec)
> + shl b32 $r12 8
> + ld(b8, $r12, #perf_eng_gr)
Simple and effective, that's nice :)
> + call(send)
> + bra #perf_recv_exit
> +
> +perf_recv_not_host:
> + call(perf_counter_readout)
> +
> + ld(b32, $r14, #perf_polling_period_us)
> + call #ticks_from_us
> + call(timer)
> +
> +perf_recv_exit:
> + pop $r1
> + ret
> +
> +
> +// description
> +//
> +// $r15 - current (perf)
> +// $r0 - zero
> +perf_counter_readout:
> + nv_iord($r14, NV_PPWR_COUNTER_COUNT(0))
> + div $r14 $r14 0xff
Not a big fan of the loss in precision. Why do you do that instead of
what I did here:
https://cgit.freedesktop.org/~mperes/nouveau/tree/nvkm/subdev/pwr/fuc/perf.fuc?h=ppwr_rework&id=27310fa8fdc39e54a3f4383fada96a3562c5a022#n134
With my solution, you get no loss in precision. And you also poll the
counters faster so it means a greater accuracy. I also miss less cycles.
All in all, could you please reuse my polling function please? :D
> +
> + nv_iord($r13, NV_PPWR_COUNTER_COUNT(1))
> + div $r13 $r13 $r14
> + st(b8, #perf_eng_gr, $r13)
> +
> + nv_iord($r13, NV_PPWR_COUNTER_COUNT(2))
> + div $r13 $r13 $r14
> + st(b8, #perf_eng_vdec, $r13)
> +
> + nv_iord($r13, NV_PPWR_COUNTER_COUNT(3))
> + div $r13 $r13 $r14
> + st(b8, #perf_eng_mc, $r13)
> +
> +#if NVKM_PPWR_CHIPSET >= GF100
> + nv_iord($r13, NV_PPWR_COUNTER_COUNT(4))
> + div $r13 $r13 $r14
> + st(b8, #perf_eng_pcie, $r13)
> +#endif
> +
> + // reset the counters
> + imm32($r14, NV_PPWR_COUNTER_COUNT_RESET)
> + nv_iowr(NV_PPWR_COUNTER_COUNT(0), $r14)
> + nv_iowr(NV_PPWR_COUNTER_COUNT(1), $r14)
> + nv_iowr(NV_PPWR_COUNTER_COUNT(2), $r14)
> + nv_iowr(NV_PPWR_COUNTER_COUNT(3), $r14)
> +#if NVKM_PPWR_CHIPSET >= GF100
> + nv_iowr(NV_PPWR_COUNTER_COUNT(4), $r14)
> +#endif
> +
> ret
>
> // description
> @@ -53,5 +137,69 @@ perf_recv:
> // $r15 - current (perf)
> // $r0 - zero
> perf_init:
> + // set up the total ticks counter first
> + imm32($r14, NV_PPWR_COUNTER_MODE_ALWAYS)
> + nv_iowr(NV_PPWR_COUNTER_MODE(0), $r14)
> +
> + // set up the other counters, with fermi there are more
> + imm32($r14, NV_PPWR_COUNTER_MODE_IF_NOT_ALL)
> + nv_iowr(NV_PPWR_COUNTER_MODE(1), $r14)
> + nv_iowr(NV_PPWR_COUNTER_MODE(2), $r14)
> + nv_iowr(NV_PPWR_COUNTER_MODE(3), $r14)
> +#if NVKM_PPWR_CHIPSET >= GF100
> + nv_iowr(NV_PPWR_COUNTER_MODE(4), $r14)
> +#endif
> +
> + // core load counter
> + imm32($r14,
> + NV_PPWR_COUNTER_SIG_GR_IDLE
> + | NV_PPWR_COUNTER_SIG_GR_GPC_IDLE
> + | NV_PPWR_COUNTER_SIG_GR_ROP_IDLE
> +#if NVKM_PPWR_CHIPSET >= GF100
> + | NV_PPWR_COUNTER_SIG_GR_HUB_IDLE
> + | NV_PPWR_COUNTER_SIG_PCOPY0_IDLE
> + | NV_PPWR_COUNTER_SIG_PCOPY1_IDLE
> +#if NVKM_PPWR_CHIPSET >= GK104
> + | NV_PPWR_COUNTER_SIG_PCOPY2_IDLE
> +#endif
> +#endif
> + )
> + nv_iowr(NV_PPWR_COUNTER_MASK(1), $r14)
> +
> + // video load counter
> + imm32($r14,
> + NV_PPWR_COUNTER_SIG_PVLD_IDLE
> + | NV_PPWR_COUNTER_SIG_PPDEC_IDLE
> + | NV_PPWR_COUNTER_SIG_PPPP_IDLE
> +#if NVKM_PPWR_CHIPSET >= GK104
> + | NV_PPWR_COUNTER_SIG_PVENC
> +#endif
> + )
> + nv_iowr(NV_PPWR_COUNTER_MASK(2), $r14)
> +
> + // memory load counter
> + imm32($r14,
> +#if NVKM_PPWR_CHIPSET >= GF100
> + NV_PPWR_COUNTER_SIG_BFB_PART0_REQ
> +#else
> + NV_PPWR_COUNTER_SIG_FB_PART0_REQ
> +#endif
> + )
> + nv_iowr(NV_PPWR_COUNTER_MASK(3), $r14)
> +
> + // pcie load counter
> +#if NVKM_PPWR_CHIPSET >= GF100
> + imm32($r14, NV_PPWR_COUNTER_SIG_PCIE)
> + nv_iowr(NV_PPWR_COUNTER_MASK(4), $r14)
> +#endif
> +
> + // initial read out
> + call(perf_counter_readout)
> +
> + // schedule the next read out
> + ld(b32, $r14, #perf_polling_period_us)
> + call #ticks_from_us
> + call(timer)
> +
> ret
> #endif
Looks about good, except the polling function!
_______________________________________________
Nouveau mailing list
Nouveau@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/nouveau
next prev parent reply other threads:[~2016-02-14 22:02 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-26 18:13 [PATCH 0/4] Add pdaemon load counters Karol Herbst
[not found] ` <1445883189-4407-1-git-send-email-nouveau-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org>
2015-10-26 18:13 ` [PATCH 1/4] subdev/pmu/fuc: add gk104 Karol Herbst
[not found] ` <1445883189-4407-2-git-send-email-nouveau-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org>
2016-02-14 21:10 ` Martin Peres
2015-10-26 18:13 ` [PATCH 2/4] pmu/fuc: add macros for pdaemon pwr counters Karol Herbst
[not found] ` <1445883189-4407-3-git-send-email-nouveau-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org>
2016-02-14 21:14 ` Martin Peres
2015-10-26 18:13 ` [PATCH 3/4] subdev/pmu/fuc: implement perf Karol Herbst
[not found] ` <1445883189-4407-4-git-send-email-nouveau-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org>
2015-10-26 19:19 ` Roy Spliet
2016-02-14 22:02 ` Martin Peres [this message]
2015-10-26 18:13 ` [PATCH 4/4] nouveau/debugfs: add interface for current load Karol Herbst
[not found] ` <1445883189-4407-5-git-send-email-nouveau-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org>
2015-10-26 18:17 ` Ilia Mirkin
[not found] ` <CAKb7UvjGYgXGCG-SjictQS=GvL1QdDYAY6A4nZYHP6=RFMyxeQ-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2016-02-14 22:36 ` Martin Peres
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=56C0F96E.4040308@free.fr \
--to=martin.peres-ganu6spqydw@public.gmane.org \
--cc=git-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org \
--cc=nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org \
--cc=nouveau-lIBOoy2+GI7scQ4cX5LuPg@public.gmane.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.