From: Faith Ekstrand <faith@gfxstrand.net>
To: Dave Airlie <airlied@gmail.com>
Cc: dri-devel@lists.freedesktop.org, nouveau@lists.freedesktop.org,
dakr@kernel.org
Subject: Re: [PATCH 1/2] nouveau: fix disabling the nonstall irq due to storm code. (v2)
Date: Fri, 29 Aug 2025 10:49:59 -0400 [thread overview]
Message-ID: <CAOFGe95wF3-8dcbmEs+t=Z_NeXyES4xkTmKToZkExompEq0VFQ@mail.gmail.com> (raw)
In-Reply-To: <20250829021633.1674524-1-airlied@gmail.com>
On Thu, Aug 28, 2025 at 10:17 PM Dave Airlie <airlied@gmail.com> wrote:
>
> From: Dave Airlie <airlied@redhat.com>
>
> Nouveau has code that when it gets an IRQ with no allowed handler
> it disables it to avoid storms.
>
> However with nonstall interrupts, we often disable them from
> the drm driver, but still request their emission via the push submission.
>
> Just don't disable nonstall irqs ever in normal operation, the
> event handling code will filter them out, and the driver will
> just enable/disable them at load time.
>
> This fixes timeouts we've been seeing on/off for a long time,
> but they became a lot more noticable on Blackwell.
>
> This doesn't fix all of them, there is a subsequent fence emission
> fix to fix the last few.
>
> Fixes: 3ebd64aa3c4f ("drm/nouveau/intr: support multiple trees, and explicit interfaces")
> Cc: stable@vger.kernel.org
> Signed-off-by: Dave Airlie <airlied@redhat.com>
I don't 100% grok all the storm stuff but this certainly looks
reasonable and I'm convinced it shouldn't break anything
Reviewed-by Faith Ekstrand <faith.ekstrand@collabora.com>
>
> ---
> v2: add missing ga102.
> ---
> .../gpu/drm/nouveau/nvkm/engine/fifo/base.c | 2 ++
> .../gpu/drm/nouveau/nvkm/engine/fifo/ga100.c | 22 ++++++++++++-------
> .../gpu/drm/nouveau/nvkm/engine/fifo/ga102.c | 1 +
> .../gpu/drm/nouveau/nvkm/engine/fifo/priv.h | 2 ++
> .../nouveau/nvkm/subdev/gsp/rm/r535/fifo.c | 2 +-
> 5 files changed, 20 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
> index fdffa0391b31..6fd4e60634fb 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
> @@ -350,6 +350,8 @@ nvkm_fifo_dtor(struct nvkm_engine *engine)
> nvkm_chid_unref(&fifo->chid);
>
> nvkm_event_fini(&fifo->nonstall.event);
> + if (fifo->func->nonstall_dtor)
> + fifo->func->nonstall_dtor(fifo);
> mutex_destroy(&fifo->mutex);
>
> if (fifo->func->dtor)
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
> index e74493a4569e..81beae473122 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
> @@ -517,19 +517,11 @@ ga100_fifo_nonstall_intr(struct nvkm_inth *inth)
> static void
> ga100_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
> {
> - struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
> - struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0);
> -
> - nvkm_inth_block(&runl->nonstall.inth);
> }
>
> static void
> ga100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
> {
> - struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
> - struct nvkm_runl *runl = nvkm_runl_get(fifo, index, 0);
> -
> - nvkm_inth_allow(&runl->nonstall.inth);
> }
>
> const struct nvkm_event_func
> @@ -564,12 +556,25 @@ ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo)
> if (ret)
> return ret;
>
> + nvkm_inth_allow(&runl->nonstall.inth);
> +
> nr = max(nr, runl->id + 1);
> }
>
> return nr;
> }
>
> +void
> +ga100_fifo_nonstall_dtor(struct nvkm_fifo *fifo)
> +{
> + struct nvkm_runl *runl;
> + nvkm_runl_foreach(runl, fifo) {
> + if (runl->nonstall.vector < 0)
> + continue;
> + nvkm_inth_block(&runl->nonstall.inth);
> + }
> +}
> +
> int
> ga100_fifo_runl_ctor(struct nvkm_fifo *fifo)
> {
> @@ -599,6 +604,7 @@ ga100_fifo = {
> .runl_ctor = ga100_fifo_runl_ctor,
> .mmu_fault = &tu102_fifo_mmu_fault,
> .nonstall_ctor = ga100_fifo_nonstall_ctor,
> + .nonstall_dtor = ga100_fifo_nonstall_dtor,
> .nonstall = &ga100_fifo_nonstall,
> .runl = &ga100_runl,
> .runq = &ga100_runq,
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
> index 755235f55b3a..18a0b1f4eab7 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
> @@ -30,6 +30,7 @@ ga102_fifo = {
> .runl_ctor = ga100_fifo_runl_ctor,
> .mmu_fault = &tu102_fifo_mmu_fault,
> .nonstall_ctor = ga100_fifo_nonstall_ctor,
> + .nonstall_dtor = ga100_fifo_nonstall_dtor,
> .nonstall = &ga100_fifo_nonstall,
> .runl = &ga100_runl,
> .runq = &ga100_runq,
> diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
> index 5e81ae195329..fff1428ef267 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
> +++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
> @@ -41,6 +41,7 @@ struct nvkm_fifo_func {
> void (*start)(struct nvkm_fifo *, unsigned long *);
>
> int (*nonstall_ctor)(struct nvkm_fifo *);
> + void (*nonstall_dtor)(struct nvkm_fifo *);
> const struct nvkm_event_func *nonstall;
>
> const struct nvkm_runl_func *runl;
> @@ -200,6 +201,7 @@ u32 tu102_chan_doorbell_handle(struct nvkm_chan *);
>
> int ga100_fifo_runl_ctor(struct nvkm_fifo *);
> int ga100_fifo_nonstall_ctor(struct nvkm_fifo *);
> +void ga100_fifo_nonstall_dtor(struct nvkm_fifo *);
> extern const struct nvkm_event_func ga100_fifo_nonstall;
> extern const struct nvkm_runl_func ga100_runl;
> extern const struct nvkm_runq_func ga100_runq;
> diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c
> index 1ac5628c5140..b8be0a872e7a 100644
> --- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c
> +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/rm/r535/fifo.c
> @@ -601,7 +601,7 @@ r535_fifo_new(const struct nvkm_fifo_func *hw, struct nvkm_device *device,
> rm->chan.func = &r535_chan;
> rm->nonstall = &ga100_fifo_nonstall;
> rm->nonstall_ctor = ga100_fifo_nonstall_ctor;
> -
> + rm->nonstall_dtor = ga100_fifo_nonstall_dtor;
> return nvkm_fifo_new_(rm, device, type, inst, pfifo);
> }
>
> --
> 2.50.1
>
next prev parent reply other threads:[~2025-08-29 14:50 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-29 2:16 [PATCH 1/2] nouveau: fix disabling the nonstall irq due to storm code. (v2) Dave Airlie
2025-08-29 2:16 ` [PATCH 2/2] nouveau: Membar before between semaphore writes and the interrupt Dave Airlie
2025-08-29 18:21 ` Danilo Krummrich
2025-08-29 23:09 ` James Jones
2025-08-29 14:49 ` Faith Ekstrand [this message]
2025-08-29 18:21 ` [PATCH 1/2] nouveau: fix disabling the nonstall irq due to storm code. (v2) Danilo Krummrich
2025-09-01 18:17 ` M Henning
2025-09-01 20:52 ` Dave Airlie
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAOFGe95wF3-8dcbmEs+t=Z_NeXyES4xkTmKToZkExompEq0VFQ@mail.gmail.com' \
--to=faith@gfxstrand.net \
--cc=airlied@gmail.com \
--cc=dakr@kernel.org \
--cc=dri-devel@lists.freedesktop.org \
--cc=nouveau@lists.freedesktop.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).