* [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v5)
@ 2016-09-08 5:29 Monk Liu
[not found] ` <1473312556-26795-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Monk Liu @ 2016-09-08 5:29 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu
v1:
for gfx8, use CONTEXT_CONTROL package to dynamically
skip preamble CEIB and other load_xxx command in sequence.
v2:
support GFX7 as well, and bump up version.
remove cntxcntl in compute ring funcs because CPC doesn't
support this packet.
v3: fix reduntant judgement in cntxcntl.
v4: some cleanups, don't change cs_submit()
v5: keep old MESA supported & bump up KMS version.
Change-Id: I7b2adc15ea83fd6c4d2521d75662bf39587898d5
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Ack-by: Chunming Zhou <David1.Zhou@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 +++++++++++-
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 ++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++++++++++++++++++++++++++++++
6 files changed, 78 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9d39fa8..8c6d8c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -321,6 +321,7 @@ struct amdgpu_ring_funcs {
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+ void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
};
/*
@@ -966,6 +967,7 @@ struct amdgpu_ctx {
spinlock_t ring_lock;
struct fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
+ bool preamble_presented;
};
struct amdgpu_ctx_mgr {
@@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
struct amdgpu_bo_list_entry uf_entry;
};
+#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
+#define PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
+#define HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
+
struct amdgpu_job {
struct amd_sched_job base;
struct amdgpu_device *adev;
@@ -1239,6 +1245,7 @@ struct amdgpu_job {
struct amdgpu_sync sync;
struct amdgpu_ib *ibs;
struct fence *fence; /* the hw fence */
+ uint32_t preamble_status;
uint32_t num_ibs;
void *owner;
uint64_t fence_ctx; /* the fence_context this job uses */
@@ -2284,6 +2291,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 20a1962..2386c7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -851,6 +851,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (r)
return r;
+ if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
+ parser->job->preamble_status |= PREAMBLE_IB_PRESENT;
+ if (!parser->ctx->preamble_presented) {
+ parser->job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
+ parser->ctx->preamble_presented = true;
+ }
+ }
+
if (parser->job->ring && parser->job->ring != ring)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 21b8cd6..e54049a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -55,6 +55,7 @@
* - 3.3.0 - Add VM support for UVD on supported hardware.
* - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
* - 3.5.0 - Add support for new UVD_NO_OP register.
+ * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 5
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 2fe2686..1af52be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
unsigned patch_offset = ~0;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
+ uint32_t status = 0;
unsigned i;
int r = 0;
@@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
skip_preamble = ring->current_ctx == fence_ctx;
need_ctx_switch = ring->current_ctx != fence_ctx;
+ if (job && ring->funcs->emit_cntxcntl) {
+ if (need_ctx_switch)
+ status |= HAVE_CTX_SWITCH;
+ status |= job->preamble_status;
+ amdgpu_ring_emit_cntxcntl(ring, status);
+ }
+
for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
/* drop preamble IBs if we don't have a context switch */
- if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
+ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+ skip_preamble &&
+ !(status & PREAMBLE_IB_PRESENT_FIRST))
continue;
amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 84ad13e..41b8654 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, control);
}
+static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
/**
* gfx_v7_0_ring_test_ib - basic ring IB test
*
@@ -4930,6 +4949,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1ac9ef7..1bf8c9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6080,6 +6080,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0);
}
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+
+ /* set load_ce_ram if preamble presented */
+ if (PREAMBLE_IB_PRESENT & flags)
+ dw2 |= 0x10000000;
+ } else {
+ /* still load_ce_ram if this is the first time preamble presented
+ * although there is no context switch happens.
+ */
+ if (PREAMBLE_IB_PRESENT_FIRST & flags)
+ dw2 |= 0x10000000;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -6262,6 +6291,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v8_ring_emit_sb,
+ .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v5)
[not found] ` <1473312556-26795-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-09-08 6:00 ` zhoucm1
[not found] ` <57D0FE97.5020403-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: zhoucm1 @ 2016-09-08 6:00 UTC (permalink / raw)
To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
On 2016年09月08日 13:29, Monk Liu wrote:
> v1:
> for gfx8, use CONTEXT_CONTROL package to dynamically
> skip preamble CEIB and other load_xxx command in sequence.
>
> v2:
> support GFX7 as well, and bump up version.
> remove cntxcntl in compute ring funcs because CPC doesn't
> support this packet.
>
> v3: fix reduntant judgement in cntxcntl.
> v4: some cleanups, don't change cs_submit()
> v5: keep old MESA supported & bump up KMS version.
>
> Change-Id: I7b2adc15ea83fd6c4d2521d75662bf39587898d5
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Ack-by: Chunming Zhou <David1.Zhou@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 +++++++++++-
> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 ++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++++++++++++++++++++++++++++++
> 6 files changed, 78 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9d39fa8..8c6d8c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -321,6 +321,7 @@ struct amdgpu_ring_funcs {
> void (*begin_use)(struct amdgpu_ring *ring);
> void (*end_use)(struct amdgpu_ring *ring);
> void (*emit_switch_buffer) (struct amdgpu_ring *ring);
> + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
> };
>
> /*
> @@ -966,6 +967,7 @@ struct amdgpu_ctx {
> spinlock_t ring_lock;
> struct fence **fences;
> struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
> + bool preamble_presented;
> };
>
> struct amdgpu_ctx_mgr {
> @@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
> struct amdgpu_bo_list_entry uf_entry;
> };
>
> +#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
> +#define PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
> +#define HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
> +
> struct amdgpu_job {
> struct amd_sched_job base;
> struct amdgpu_device *adev;
> @@ -1239,6 +1245,7 @@ struct amdgpu_job {
> struct amdgpu_sync sync;
> struct amdgpu_ib *ibs;
> struct fence *fence; /* the hw fence */
> + uint32_t preamble_status;
> uint32_t num_ibs;
> void *owner;
> uint64_t fence_ctx; /* the fence_context this job uses */
> @@ -2284,6 +2291,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
> #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
> #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
> +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20a1962..2386c7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -851,6 +851,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> if (r)
> return r;
>
> + if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
> + parser->job->preamble_status |= PREAMBLE_IB_PRESENT;
> + if (!parser->ctx->preamble_presented) {
> + parser->job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
> + parser->ctx->preamble_presented = true;
> + }
> + }
> +
> if (parser->job->ring && parser->job->ring != ring)
> return -EINVAL;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 21b8cd6..e54049a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -55,6 +55,7 @@
> * - 3.3.0 - Add VM support for UVD on supported hardware.
> * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
> * - 3.5.0 - Add support for new UVD_NO_OP register.
> + * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
> */
> #define KMS_DRIVER_MAJOR 3
> #define KMS_DRIVER_MINOR 5
Forgot to change it to 6?
Regards,
David Zhou
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 2fe2686..1af52be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> unsigned patch_offset = ~0;
> struct amdgpu_vm *vm;
> uint64_t fence_ctx;
> + uint32_t status = 0;
>
> unsigned i;
> int r = 0;
> @@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>
> skip_preamble = ring->current_ctx == fence_ctx;
> need_ctx_switch = ring->current_ctx != fence_ctx;
> + if (job && ring->funcs->emit_cntxcntl) {
> + if (need_ctx_switch)
> + status |= HAVE_CTX_SWITCH;
> + status |= job->preamble_status;
> + amdgpu_ring_emit_cntxcntl(ring, status);
> + }
> +
> for (i = 0; i < num_ibs; ++i) {
> ib = &ibs[i];
>
> /* drop preamble IBs if we don't have a context switch */
> - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
> + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> + skip_preamble &&
> + !(status & PREAMBLE_IB_PRESENT_FIRST))
> continue;
>
> amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 84ad13e..41b8654 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
> amdgpu_ring_write(ring, control);
> }
>
> +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> +{
> + uint32_t dw2 = 0;
> +
> + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> + if (flags & HAVE_CTX_SWITCH) {
> + /* set load_global_config & load_global_uconfig */
> + dw2 |= 0x8001;
> + /* set load_cs_sh_regs */
> + dw2 |= 0x01000000;
> + /* set load_per_context_state & load_gfx_sh_regs */
> + dw2 |= 0x10002;
> + }
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> + amdgpu_ring_write(ring, dw2);
> + amdgpu_ring_write(ring, 0);
> +}
> +
> /**
> * gfx_v7_0_ring_test_ib - basic ring IB test
> *
> @@ -4930,6 +4949,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
> .test_ib = gfx_v7_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> + .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
> };
>
> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 1ac9ef7..1bf8c9e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6080,6 +6080,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
> amdgpu_ring_write(ring, 0);
> }
>
> +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> +{
> + uint32_t dw2 = 0;
> +
> + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> + if (flags & HAVE_CTX_SWITCH) {
> + /* set load_global_config & load_global_uconfig */
> + dw2 |= 0x8001;
> + /* set load_cs_sh_regs */
> + dw2 |= 0x01000000;
> + /* set load_per_context_state & load_gfx_sh_regs for GFX */
> + dw2 |= 0x10002;
> +
> + /* set load_ce_ram if preamble presented */
> + if (PREAMBLE_IB_PRESENT & flags)
> + dw2 |= 0x10000000;
> + } else {
> + /* still load_ce_ram if this is the first time preamble presented
> + * although there is no context switch happens.
> + */
> + if (PREAMBLE_IB_PRESENT_FIRST & flags)
> + dw2 |= 0x10000000;
> + }
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> + amdgpu_ring_write(ring, dw2);
> + amdgpu_ring_write(ring, 0);
> +}
> +
> static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
> enum amdgpu_interrupt_state state)
> {
> @@ -6262,6 +6291,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_switch_buffer = gfx_v8_ring_emit_sb,
> + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
> };
>
> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v5)
@ 2016-09-08 6:32 Monk Liu
[not found] ` <1473316377-15377-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
0 siblings, 1 reply; 5+ messages in thread
From: Monk Liu @ 2016-09-08 6:32 UTC (permalink / raw)
To: amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW; +Cc: Monk Liu
v1:
for gfx8, use CONTEXT_CONTROL package to dynamically
skip preamble CEIB and other load_xxx command in sequence.
v2:
support GFX7 as well, and bump up version.
remove cntxcntl in compute ring funcs because CPC doesn't
support this packet.
v3: fix reduntant judgement in cntxcntl.
v4: some cleanups, don't change cs_submit()
v5: keep old MESA supported & bump up KMS version.
Change-Id: I7b2adc15ea83fd6c4d2521d75662bf39587898d5
Signed-off-by: Monk Liu <Monk.Liu@amd.com>
Ack-by: Chunming Zhou <David1.Zhou@amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 +++++++++++-
drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 ++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++++++++++++++++++++++++++++++
6 files changed, 79 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9d39fa8..8c6d8c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -321,6 +321,7 @@ struct amdgpu_ring_funcs {
void (*begin_use)(struct amdgpu_ring *ring);
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
+ void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
};
/*
@@ -966,6 +967,7 @@ struct amdgpu_ctx {
spinlock_t ring_lock;
struct fence **fences;
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
+ bool preamble_presented;
};
struct amdgpu_ctx_mgr {
@@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
struct amdgpu_bo_list_entry uf_entry;
};
+#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
+#define PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
+#define HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
+
struct amdgpu_job {
struct amd_sched_job base;
struct amdgpu_device *adev;
@@ -1239,6 +1245,7 @@ struct amdgpu_job {
struct amdgpu_sync sync;
struct amdgpu_ib *ibs;
struct fence *fence; /* the hw fence */
+ uint32_t preamble_status;
uint32_t num_ibs;
void *owner;
uint64_t fence_ctx; /* the fence_context this job uses */
@@ -2284,6 +2291,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
+#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 20a1962..2386c7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -851,6 +851,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (r)
return r;
+ if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
+ parser->job->preamble_status |= PREAMBLE_IB_PRESENT;
+ if (!parser->ctx->preamble_presented) {
+ parser->job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
+ parser->ctx->preamble_presented = true;
+ }
+ }
+
if (parser->job->ring && parser->job->ring != ring)
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 21b8cd6..2a96ce7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -55,9 +55,10 @@
* - 3.3.0 - Add VM support for UVD on supported hardware.
* - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
* - 3.5.0 - Add support for new UVD_NO_OP register.
+ * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 5
+#define KMS_DRIVER_MINOR 6
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 2fe2686..1af52be 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
unsigned patch_offset = ~0;
struct amdgpu_vm *vm;
uint64_t fence_ctx;
+ uint32_t status = 0;
unsigned i;
int r = 0;
@@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
skip_preamble = ring->current_ctx == fence_ctx;
need_ctx_switch = ring->current_ctx != fence_ctx;
+ if (job && ring->funcs->emit_cntxcntl) {
+ if (need_ctx_switch)
+ status |= HAVE_CTX_SWITCH;
+ status |= job->preamble_status;
+ amdgpu_ring_emit_cntxcntl(ring, status);
+ }
+
for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
/* drop preamble IBs if we don't have a context switch */
- if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
+ if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
+ skip_preamble &&
+ !(status & PREAMBLE_IB_PRESENT_FIRST))
continue;
amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 84ad13e..41b8654 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, control);
}
+static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs */
+ dw2 |= 0x10002;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
/**
* gfx_v7_0_ring_test_ib - basic ring IB test
*
@@ -4930,6 +4949,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
+ .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1ac9ef7..1bf8c9e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -6080,6 +6080,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0);
}
+static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
+{
+ uint32_t dw2 = 0;
+
+ dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ if (flags & HAVE_CTX_SWITCH) {
+ /* set load_global_config & load_global_uconfig */
+ dw2 |= 0x8001;
+ /* set load_cs_sh_regs */
+ dw2 |= 0x01000000;
+ /* set load_per_context_state & load_gfx_sh_regs for GFX */
+ dw2 |= 0x10002;
+
+ /* set load_ce_ram if preamble presented */
+ if (PREAMBLE_IB_PRESENT & flags)
+ dw2 |= 0x10000000;
+ } else {
+ /* still load_ce_ram if this is the first time preamble presented
+ * although there is no context switch happens.
+ */
+ if (PREAMBLE_IB_PRESENT_FIRST & flags)
+ dw2 |= 0x10000000;
+ }
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
+ amdgpu_ring_write(ring, dw2);
+ amdgpu_ring_write(ring, 0);
+}
+
static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
enum amdgpu_interrupt_state state)
{
@@ -6262,6 +6291,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.insert_nop = amdgpu_ring_insert_nop,
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_switch_buffer = gfx_v8_ring_emit_sb,
+ .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
--
1.9.1
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply related [flat|nested] 5+ messages in thread
* RE: [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v5)
[not found] ` <57D0FE97.5020403-5C7GfCeVMHo@public.gmane.org>
@ 2016-09-08 6:33 ` Liu, Monk
0 siblings, 0 replies; 5+ messages in thread
From: Liu, Monk @ 2016-09-08 6:33 UTC (permalink / raw)
To: Zhou, David(ChunMing),
amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
Oh, yeah. That is missed, thanks!
-----Original Message-----
From: amd-gfx [mailto:amd-gfx-bounces@lists.freedesktop.org] On Behalf Of zhoucm1
Sent: Thursday, September 08, 2016 2:01 PM
To: Liu, Monk <Monk.Liu@amd.com>; amd-gfx@lists.freedesktop.org
Subject: Re: [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v5)
On 2016年09月08日 13:29, Monk Liu wrote:
> v1:
> for gfx8, use CONTEXT_CONTROL package to dynamically skip preamble
> CEIB and other load_xxx command in sequence.
>
> v2:
> support GFX7 as well, and bump up version.
> remove cntxcntl in compute ring funcs because CPC doesn't support this
> packet.
>
> v3: fix reduntant judgement in cntxcntl.
> v4: some cleanups, don't change cs_submit()
> v5: keep old MESA supported & bump up KMS version.
>
> Change-Id: I7b2adc15ea83fd6c4d2521d75662bf39587898d5
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Ack-by: Chunming Zhou <David1.Zhou@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 +++++++++++-
> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 ++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++++++++++++++++++++++++++++++
> 6 files changed, 78 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9d39fa8..8c6d8c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -321,6 +321,7 @@ struct amdgpu_ring_funcs {
> void (*begin_use)(struct amdgpu_ring *ring);
> void (*end_use)(struct amdgpu_ring *ring);
> void (*emit_switch_buffer) (struct amdgpu_ring *ring);
> + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
> };
>
> /*
> @@ -966,6 +967,7 @@ struct amdgpu_ctx {
> spinlock_t ring_lock;
> struct fence **fences;
> struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
> + bool preamble_presented;
> };
>
> struct amdgpu_ctx_mgr {
> @@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
> struct amdgpu_bo_list_entry uf_entry;
> };
>
> +#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
> +#define PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
> +#define HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
> +
> struct amdgpu_job {
> struct amd_sched_job base;
> struct amdgpu_device *adev;
> @@ -1239,6 +1245,7 @@ struct amdgpu_job {
> struct amdgpu_sync sync;
> struct amdgpu_ib *ibs;
> struct fence *fence; /* the hw fence */
> + uint32_t preamble_status;
> uint32_t num_ibs;
> void *owner;
> uint64_t fence_ctx; /* the fence_context this job uses */
> @@ -2284,6 +2291,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
> #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
> #define amdgpu_ring_emit_switch_buffer(r)
> (r)->funcs->emit_switch_buffer((r))
> +#define amdgpu_ring_emit_cntxcntl(r, d)
> +(r)->funcs->emit_cntxcntl((r), (d))
> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
> #define amdgpu_ring_patch_cond_exec(r,o)
> (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20a1962..2386c7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -851,6 +851,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> if (r)
> return r;
>
> + if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
> + parser->job->preamble_status |= PREAMBLE_IB_PRESENT;
> + if (!parser->ctx->preamble_presented) {
> + parser->job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
> + parser->ctx->preamble_presented = true;
> + }
> + }
> +
> if (parser->job->ring && parser->job->ring != ring)
> return -EINVAL;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 21b8cd6..e54049a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -55,6 +55,7 @@
> * - 3.3.0 - Add VM support for UVD on supported hardware.
> * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
> * - 3.5.0 - Add support for new UVD_NO_OP register.
> + * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
> */
> #define KMS_DRIVER_MAJOR 3
> #define KMS_DRIVER_MINOR 5
Forgot to change it to 6?
Regards,
David Zhou
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 2fe2686..1af52be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> unsigned patch_offset = ~0;
> struct amdgpu_vm *vm;
> uint64_t fence_ctx;
> + uint32_t status = 0;
>
> unsigned i;
> int r = 0;
> @@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring,
> unsigned num_ibs,
>
> skip_preamble = ring->current_ctx == fence_ctx;
> need_ctx_switch = ring->current_ctx != fence_ctx;
> + if (job && ring->funcs->emit_cntxcntl) {
> + if (need_ctx_switch)
> + status |= HAVE_CTX_SWITCH;
> + status |= job->preamble_status;
> + amdgpu_ring_emit_cntxcntl(ring, status);
> + }
> +
> for (i = 0; i < num_ibs; ++i) {
> ib = &ibs[i];
>
> /* drop preamble IBs if we don't have a context switch */
> - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
> + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> + skip_preamble &&
> + !(status & PREAMBLE_IB_PRESENT_FIRST))
> continue;
>
> amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, diff --git
> a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 84ad13e..41b8654 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
> amdgpu_ring_write(ring, control);
> }
>
> +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring,
> +uint32_t flags) {
> + uint32_t dw2 = 0;
> +
> + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> + if (flags & HAVE_CTX_SWITCH) {
> + /* set load_global_config & load_global_uconfig */
> + dw2 |= 0x8001;
> + /* set load_cs_sh_regs */
> + dw2 |= 0x01000000;
> + /* set load_per_context_state & load_gfx_sh_regs */
> + dw2 |= 0x10002;
> + }
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> + amdgpu_ring_write(ring, dw2);
> + amdgpu_ring_write(ring, 0);
> +}
> +
> /**
> * gfx_v7_0_ring_test_ib - basic ring IB test
> *
> @@ -4930,6 +4949,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
> .test_ib = gfx_v7_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> + .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
> };
>
> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute =
> { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 1ac9ef7..1bf8c9e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6080,6 +6080,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
> amdgpu_ring_write(ring, 0);
> }
>
> +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring,
> +uint32_t flags) {
> + uint32_t dw2 = 0;
> +
> + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> + if (flags & HAVE_CTX_SWITCH) {
> + /* set load_global_config & load_global_uconfig */
> + dw2 |= 0x8001;
> + /* set load_cs_sh_regs */
> + dw2 |= 0x01000000;
> + /* set load_per_context_state & load_gfx_sh_regs for GFX */
> + dw2 |= 0x10002;
> +
> + /* set load_ce_ram if preamble presented */
> + if (PREAMBLE_IB_PRESENT & flags)
> + dw2 |= 0x10000000;
> + } else {
> + /* still load_ce_ram if this is the first time preamble presented
> + * although there is no context switch happens.
> + */
> + if (PREAMBLE_IB_PRESENT_FIRST & flags)
> + dw2 |= 0x10000000;
> + }
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> + amdgpu_ring_write(ring, dw2);
> + amdgpu_ring_write(ring, 0);
> +}
> +
> static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
> enum amdgpu_interrupt_state state)
> {
> @@ -6262,6 +6291,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_switch_buffer = gfx_v8_ring_emit_sb,
> + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
> };
>
> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute =
> {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v5)
[not found] ` <1473316377-15377-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
@ 2016-09-08 7:22 ` Christian König
0 siblings, 0 replies; 5+ messages in thread
From: Christian König @ 2016-09-08 7:22 UTC (permalink / raw)
To: Monk Liu, amd-gfx-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW
Am 08.09.2016 um 08:32 schrieb Monk Liu:
> v1:
> for gfx8, use CONTEXT_CONTROL package to dynamically
> skip preamble CEIB and other load_xxx command in sequence.
>
> v2:
> support GFX7 as well, and bump up version.
> remove cntxcntl in compute ring funcs because CPC doesn't
> support this packet.
>
> v3: fix reduntant judgement in cntxcntl.
> v4: some cleanups, don't change cs_submit()
> v5: keep old MESA supported & bump up KMS version.
>
> Change-Id: I7b2adc15ea83fd6c4d2521d75662bf39587898d5
> Signed-off-by: Monk Liu <Monk.Liu@amd.com>
> Ack-by: Chunming Zhou <David1.Zhou@amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 8 ++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 12 +++++++++++-
> drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 ++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++++++++++++++++++++++++++++++
> 6 files changed, 79 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 9d39fa8..8c6d8c6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -321,6 +321,7 @@ struct amdgpu_ring_funcs {
> void (*begin_use)(struct amdgpu_ring *ring);
> void (*end_use)(struct amdgpu_ring *ring);
> void (*emit_switch_buffer) (struct amdgpu_ring *ring);
> + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
> };
>
> /*
> @@ -966,6 +967,7 @@ struct amdgpu_ctx {
> spinlock_t ring_lock;
> struct fence **fences;
> struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
> + bool preamble_presented;
> };
>
> struct amdgpu_ctx_mgr {
> @@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
> struct amdgpu_bo_list_entry uf_entry;
> };
>
> +#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
> +#define PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
> +#define HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
> +
Please add an AMDGPU_ prefix in the names of those defines.
With that fixed the patch is Reviewed-by: Christian König
<christian.koenig@amd.com>.
Regards,
Christian.
> struct amdgpu_job {
> struct amd_sched_job base;
> struct amdgpu_device *adev;
> @@ -1239,6 +1245,7 @@ struct amdgpu_job {
> struct amdgpu_sync sync;
> struct amdgpu_ib *ibs;
> struct fence *fence; /* the hw fence */
> + uint32_t preamble_status;
> uint32_t num_ibs;
> void *owner;
> uint64_t fence_ctx; /* the fence_context this job uses */
> @@ -2284,6 +2291,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
> #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
> #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
> #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
> +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
> #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
> #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
> #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 20a1962..2386c7b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -851,6 +851,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
> if (r)
> return r;
>
> + if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
> + parser->job->preamble_status |= PREAMBLE_IB_PRESENT;
> + if (!parser->ctx->preamble_presented) {
> + parser->job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST;
> + parser->ctx->preamble_presented = true;
> + }
> + }
> +
> if (parser->job->ring && parser->job->ring != ring)
> return -EINVAL;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 21b8cd6..2a96ce7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -55,9 +55,10 @@
> * - 3.3.0 - Add VM support for UVD on supported hardware.
> * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
> * - 3.5.0 - Add support for new UVD_NO_OP register.
> + * - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
> */
> #define KMS_DRIVER_MAJOR 3
> -#define KMS_DRIVER_MINOR 5
> +#define KMS_DRIVER_MINOR 6
> #define KMS_DRIVER_PATCHLEVEL 0
>
> int amdgpu_vram_limit = 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> index 2fe2686..1af52be 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
> @@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
> unsigned patch_offset = ~0;
> struct amdgpu_vm *vm;
> uint64_t fence_ctx;
> + uint32_t status = 0;
>
> unsigned i;
> int r = 0;
> @@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
>
> skip_preamble = ring->current_ctx == fence_ctx;
> need_ctx_switch = ring->current_ctx != fence_ctx;
> + if (job && ring->funcs->emit_cntxcntl) {
> + if (need_ctx_switch)
> + status |= HAVE_CTX_SWITCH;
> + status |= job->preamble_status;
> + amdgpu_ring_emit_cntxcntl(ring, status);
> + }
> +
> for (i = 0; i < num_ibs; ++i) {
> ib = &ibs[i];
>
> /* drop preamble IBs if we don't have a context switch */
> - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
> + if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
> + skip_preamble &&
> + !(status & PREAMBLE_IB_PRESENT_FIRST))
> continue;
>
> amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> index 84ad13e..41b8654 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
> @@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
> amdgpu_ring_write(ring, control);
> }
>
> +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> +{
> + uint32_t dw2 = 0;
> +
> + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> + if (flags & HAVE_CTX_SWITCH) {
> + /* set load_global_config & load_global_uconfig */
> + dw2 |= 0x8001;
> + /* set load_cs_sh_regs */
> + dw2 |= 0x01000000;
> + /* set load_per_context_state & load_gfx_sh_regs */
> + dw2 |= 0x10002;
> + }
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> + amdgpu_ring_write(ring, dw2);
> + amdgpu_ring_write(ring, 0);
> +}
> +
> /**
> * gfx_v7_0_ring_test_ib - basic ring IB test
> *
> @@ -4930,6 +4949,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
> .test_ib = gfx_v7_0_ring_test_ib,
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> + .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
> };
>
> static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> index 1ac9ef7..1bf8c9e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
> @@ -6080,6 +6080,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
> amdgpu_ring_write(ring, 0);
> }
>
> +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
> +{
> + uint32_t dw2 = 0;
> +
> + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
> + if (flags & HAVE_CTX_SWITCH) {
> + /* set load_global_config & load_global_uconfig */
> + dw2 |= 0x8001;
> + /* set load_cs_sh_regs */
> + dw2 |= 0x01000000;
> + /* set load_per_context_state & load_gfx_sh_regs for GFX */
> + dw2 |= 0x10002;
> +
> + /* set load_ce_ram if preamble presented */
> + if (PREAMBLE_IB_PRESENT & flags)
> + dw2 |= 0x10000000;
> + } else {
> + /* still load_ce_ram if this is the first time preamble presented
> + * although there is no context switch happens.
> + */
> + if (PREAMBLE_IB_PRESENT_FIRST & flags)
> + dw2 |= 0x10000000;
> + }
> +
> + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
> + amdgpu_ring_write(ring, dw2);
> + amdgpu_ring_write(ring, 0);
> +}
> +
> static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
> enum amdgpu_interrupt_state state)
> {
> @@ -6262,6 +6291,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
> .insert_nop = amdgpu_ring_insert_nop,
> .pad_ib = amdgpu_ring_generic_pad_ib,
> .emit_switch_buffer = gfx_v8_ring_emit_sb,
> + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
> };
>
> static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx
^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2016-09-08 7:22 UTC | newest]
Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-09-08 5:29 [PATCH] drm/amdgpu:implement CONTEXT_CONTROL (v5) Monk Liu
[not found] ` <1473312556-26795-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-09-08 6:00 ` zhoucm1
[not found] ` <57D0FE97.5020403-5C7GfCeVMHo@public.gmane.org>
2016-09-08 6:33 ` Liu, Monk
-- strict thread matches above, loose matches on Subject: below --
2016-09-08 6:32 Monk Liu
[not found] ` <1473316377-15377-1-git-send-email-Monk.Liu-5C7GfCeVMHo@public.gmane.org>
2016-09-08 7:22 ` Christian König
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.