All of lore.kernel.org
 help / color / mirror / Atom feed
* mesa: Patch that fix/add missing WAIT_RING calls
@ 2010-12-16 11:22 Michel Hermier
       [not found] ` <AANLkTinj4TObB1Mb9mG3T74OQiyKd+vK1Dk-WYuT6=Qc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  0 siblings, 1 reply; 5+ messages in thread
From: Michel Hermier @ 2010-12-16 11:22 UTC (permalink / raw)
  To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

[-- Attachment #1: Type: text/plain, Size: 305 bytes --]

Hi,
While trying to run a 3D app I needed to modify/add some WAIT_RING
calls so the push buffer is properly checked, before we try to blindly
push to it (sine OUT_RING don't perform this checks yet, I have a
small patch for that for libdrm).
I allready discussed about it with lynxeye and shining on IRC.

[-- Attachment #2: WAIT_RING-fixes.diff --]
[-- Type: application/octet-stream, Size: 10356 bytes --]

diff --git a/src/gallium/drivers/nvfx/nvfx_draw.c b/src/gallium/drivers/nvfx/nvfx_draw.c
index 61f888a..0d14e44 100644
--- a/src/gallium/drivers/nvfx/nvfx_draw.c
+++ b/src/gallium/drivers/nvfx/nvfx_draw.c
@@ -30,7 +30,7 @@ nvfx_render_flush(struct draw_stage *stage, unsigned flags)
 	struct nouveau_channel *chan = nvfx->screen->base.channel;
 
 	if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
-		assert(AVAIL_RING(chan) >= 2);
+		WAIT_RING(chan, 2);
 		OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
 		OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
 		rs->prim = NV30_3D_VERTEX_BEGIN_END_STOP;
@@ -63,6 +63,7 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
 	/* Switch primitive modes if necessary */
 	if (rs->prim != mode) {
 		if (rs->prim != NV30_3D_VERTEX_BEGIN_END_STOP) {
+			WAIT_RING(chan, 2);
 			OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
 			OUT_RING(chan, NV30_3D_VERTEX_BEGIN_END_STOP);
 		}
@@ -74,16 +75,19 @@ nvfx_render_prim(struct draw_stage *stage, struct prim_header *prim,
 			int i;
 			for(i = 0; i < 32; ++i)
 			{
+				WAIT_RING(chan, 2);
 				OUT_RING(chan, RING_3D(0x1dac, 1));
 				OUT_RING(chan, 0);
 			}
 		}
 
+		WAIT_RING(chan, 2);
 		OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
 		OUT_RING  (chan, mode);
 		rs->prim = mode;
 	}
 
+	WAIT_RING(chan, num_attribs * 4 * count + 1);
 	OUT_RING(chan, RING_3D_NI(NV30_3D_VERTEX_DATA, num_attribs * 4 * count));
 	if(no_elements) {
 		OUT_RING(chan, 0);
diff --git a/src/gallium/drivers/nvfx/nvfx_push.c b/src/gallium/drivers/nvfx/nvfx_push.c
index ebf47e6..ecde30b 100644
--- a/src/gallium/drivers/nvfx/nvfx_push.c
+++ b/src/gallium/drivers/nvfx/nvfx_push.c
@@ -29,6 +29,7 @@ emit_edgeflag(void *priv, boolean enabled)
 	struct push_context* ctx = priv;
 	struct nouveau_channel *chan = ctx->chan;
 
+	WAIT_RING(chan, 2);
 	OUT_RING(chan, RING_3D(NV30_3D_EDGEFLAG, 1));
 	OUT_RING(chan, enabled ? 1 : 0);
 }
@@ -44,6 +45,7 @@ emit_vertices_lookup8(void *priv, unsigned start, unsigned count)
                 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
                 unsigned length = push * ctx->vertex_length;
 
+		WAIT_RING(ctx->chan, length + 1);
                 OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
                 ctx->translate->run_elts8(ctx->translate, elts, push, 0, ctx->chan->cur);
                 ctx->chan->cur += length;
@@ -64,6 +66,7 @@ emit_vertices_lookup16(void *priv, unsigned start, unsigned count)
                 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
                 unsigned length = push * ctx->vertex_length;
 
+		WAIT_RING(ctx->chan, length + 1);
                 OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
                 ctx->translate->run_elts16(ctx->translate, elts, push, 0, ctx->chan->cur);
                 ctx->chan->cur += length;
@@ -84,6 +87,7 @@ emit_vertices_lookup32(void *priv, unsigned start, unsigned count)
                 unsigned push = MIN2(count, ctx->max_vertices_per_packet);
                 unsigned length = push * ctx->vertex_length;
 
+		WAIT_RING(ctx->chan, length + 1);
                 OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
                 ctx->translate->run_elts(ctx->translate, elts, push, 0, ctx->chan->cur);
                 ctx->chan->cur += length;
@@ -103,6 +107,7 @@ emit_vertices(void *priv, unsigned start, unsigned count)
 		unsigned push = MIN2(count, ctx->max_vertices_per_packet);
 		unsigned length = push * ctx->vertex_length;
 
+		WAIT_RING(ctx->chan, length + 1);
 		OUT_RING(ctx->chan, RING_3D_NI(NV30_3D_VERTEX_DATA, length));
 		ctx->translate->run(ctx->translate, start, push, 0, ctx->chan->cur);
 		ctx->chan->cur += length;
@@ -119,8 +124,9 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
 	struct nouveau_channel *chan = ctx->chan;
 	unsigned nr = (vc & 0xff);
 	if (nr) {
+		WAIT_RING(chan, 2);
 		OUT_RING(chan, RING_3D(reg, 1));
-		OUT_RING  (chan, ((nr - 1) << 24) | start);
+		OUT_RING(chan, ((nr - 1) << 24) | start);
 		start += nr;
 	}
 
@@ -130,6 +136,7 @@ emit_ranges(void* priv, unsigned start, unsigned vc, unsigned reg)
 
 		nr -= push;
 
+		WAIT_RING(chan, push + 1);
 		OUT_RING(chan, RING_3D_NI(reg, push));
 		while (push--) {
 			OUT_RING(chan, ((0x100 - 1) << 24) | start);
@@ -159,8 +166,9 @@ emit_elt8(void* priv, unsigned start, unsigned vc)
 	int idxbias = ctx->idxbias;
 
 	if (vc & 1) {
+		WAIT_RING(chan, 2);
 		OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
-		OUT_RING  (chan, elts[0]);
+		OUT_RING(chan, elts[0]);
 		elts++; vc--;
 	}
 
@@ -168,6 +176,7 @@ emit_elt8(void* priv, unsigned start, unsigned vc)
 		unsigned i;
 		unsigned push = MIN2(vc, 2047 * 2);
 
+		WAIT_RING(chan, (push >> 1) + 1);
 		OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
 		for (i = 0; i < push; i+=2)
 			OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
@@ -186,6 +195,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc)
 	int idxbias = ctx->idxbias;
 
 	if (vc & 1) {
+		WAIT_RING(chan, 2);
 		OUT_RING(chan, RING_3D(NV30_3D_VB_ELEMENT_U32, 1));
 		OUT_RING  (chan, elts[0]);
 		elts++; vc--;
@@ -195,6 +205,7 @@ emit_elt16(void* priv, unsigned start, unsigned vc)
 		unsigned i;
 		unsigned push = MIN2(vc, 2047 * 2);
 
+		WAIT_RING(chan, (push >> 1) + 1);
 		OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U16, push >> 1));
 		for (i = 0; i < push; i+=2)
 			OUT_RING(chan, ((elts[i+1] + idxbias) << 16) | (elts[i] + idxbias));
@@ -215,6 +226,7 @@ emit_elt32(void* priv, unsigned start, unsigned vc)
 	while (vc) {
 		unsigned push = MIN2(vc, 2047);
 
+		WAIT_RING(chan, push + 1);
 		OUT_RING(chan, RING_3D_NI(NV30_3D_VB_ELEMENT_U32, push));
 		assert(AVAIL_RING(chan) >= push);
 		if(idxbias)
@@ -374,14 +386,17 @@ nvfx_push_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
 					int i;
 					for(i = 0; i < 32; ++i)
 					{
+						WAIT_RING(chan, 2);
 						OUT_RING(chan, RING_3D(0x1dac, 1));
 						OUT_RING(chan, 0);
 					}
 				}
 
+				WAIT_RING(chan, 2);
 				OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
 				OUT_RING(chan, hw_mode);
 				done = util_split_prim_next(&s, max_verts);
+				WAIT_RING(chan, 2);
 				OUT_RING(chan, RING_3D(NV30_3D_VERTEX_BEGIN_END, 1));
 				OUT_RING(chan, 0);
 
diff --git a/src/gallium/drivers/nvfx/nvfx_state_emit.c b/src/gallium/drivers/nvfx/nvfx_state_emit.c
index 501fdd4..670ef39 100644
--- a/src/gallium/drivers/nvfx/nvfx_state_emit.c
+++ b/src/gallium/drivers/nvfx/nvfx_state_emit.c
@@ -130,7 +130,7 @@ nvfx_ucp_validate(struct nvfx_context* nvfx)
 		OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANES_ENABLE, 1));
 		OUT_RING(chan, 0);
 
-		WAIT_RING(chan, 6 * 4 + 1);
+		WAIT_RING(chan, nvfx->clip.nr * 4 + 1);
 		OUT_RING(chan, RING_3D(NV30_3D_VP_CLIP_PLANE(0, 0), nvfx->clip.nr * 4));
 		OUT_RINGp(chan, &nvfx->clip.ucp[0][0], nvfx->clip.nr * 4);
 	}
@@ -155,6 +155,7 @@ nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
 		if(vp->clip_nr >= 0)
 		{
 			idx = vp->nr_insns - 7 + vp->clip_nr;
+			WAIT_RING(chan, 7);
 			OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
 			OUT_RING(chan,  vp->exec->start + idx);
 			OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
@@ -163,6 +164,7 @@ nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
 
 		 /* set last instruction bit */
 		idx = vp->nr_insns - 7 + nvfx->clip.nr;
+		WAIT_RING(chan, 7);
 		OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_FROM_ID, 1));
 		OUT_RING(chan,  vp->exec->start + idx);
 		OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_INST(0), 4));
@@ -172,9 +174,9 @@ nvfx_vertprog_ucp_validate(struct nvfx_context* nvfx)
 	}
 
 	// TODO: only do this for the ones changed
-	WAIT_RING(chan, 6 * 6);
 	for(i = 0; i < nvfx->clip.nr; ++i)
 	{
+		WAIT_RING(chan, 6);
 		OUT_RING(chan, RING_3D(NV30_3D_VP_UPLOAD_CONST_ID, 5));
 		OUT_RING(chan, vp->data->start + i);
 		OUT_RINGp (chan, nvfx->clip.ucp[i], 4);
diff --git a/src/gallium/drivers/nvfx/nvfx_vbo.c b/src/gallium/drivers/nvfx/nvfx_vbo.c
index 597664e..6641c15 100644
--- a/src/gallium/drivers/nvfx/nvfx_vbo.c
+++ b/src/gallium/drivers/nvfx/nvfx_vbo.c
@@ -254,7 +254,6 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
 	if (!elements)
 		return TRUE;
 
-	MARK_RING(chan, (5 + 2) * 16 + 2 + 11, 16 + 2);
 	for(unsigned i = 0; i < nvfx->vtxelt->num_constant; ++i)
 	{
 		struct nvfx_low_frequency_element *ve = &nvfx->vtxelt->constant[i];
@@ -262,10 +261,11 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
 		struct nvfx_buffer* buffer = nvfx_buffer(vb->buffer);
 		float v[4];
 		ve->fetch_rgba_float(v, buffer->data + vb->buffer_offset + ve->src_offset, 0, 0);
+		WAIT_RING(chan, ve->ncomp + 1);
 		nvfx_emit_vtx_attr(chan, ve->idx, v, ve->ncomp);
 	}
 
-
+	WAIT_RING(chan, elements + 1);
 	OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
 	if(nvfx->use_vertex_buffers)
 	{
@@ -297,11 +297,13 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
 		unsigned i;
 		/* seems to be some kind of cache flushing */
 		for(i = 0; i < 3; ++i) {
+			WAIT_RING(chan, 2);
 			OUT_RING(chan, RING_3D(0x1718, 1));
 			OUT_RING(chan, 0);
 		}
 	}
 
+	MARK_RING(chan, elements + 1, nvfx->vtxelt->num_per_vertex);
 	OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
 	if(nvfx->use_vertex_buffers)
 	{
@@ -330,6 +332,7 @@ nvfx_vbo_validate(struct nvfx_context *nvfx)
 			OUT_RING(chan, 0);
 	}
 
+	WAIT_RING(chan, 2);
 	OUT_RING(chan, RING_3D(0x1710, 1));
 	OUT_RING(chan, 0);
 
@@ -348,8 +351,7 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
 	if (!elements)
 		return;
 
-	WAIT_RING(chan, (1 + 6 + 1 + 2) + elements * 2);
-
+	WAIT_RING(chan, elements + 1);
 	OUT_RING(chan, RING_3D(NV30_3D_VTXFMT(0), elements));
 	for(unsigned i = 0; i < num_outputs; ++i)
 		OUT_RING(chan, (4 << NV30_3D_VTXFMT_SIZE__SHIFT) | NV30_3D_VTXFMT_TYPE_V32_FLOAT);
@@ -360,15 +362,18 @@ nvfx_vbo_swtnl_validate(struct nvfx_context *nvfx)
 		unsigned i;
 		/* seems to be some kind of cache flushing */
 		for(i = 0; i < 3; ++i) {
+			WAIT_RING(chan, 2);
 			OUT_RING(chan, RING_3D(0x1718, 1));
 			OUT_RING(chan, 0);
 		}
 	}
 
+	WAIT_RING(chan, elements +1);
 	OUT_RING(chan, RING_3D(NV30_3D_VTXBUF(0), elements));
 	for (unsigned i = 0; i < elements; i++)
 		OUT_RING(chan, 0);
 
+	WAIT_RING(chan, 2);
 	OUT_RING(chan, RING_3D(0x1710, 1));
 	OUT_RING(chan, 0);
 

[-- Attachment #3: Type: text/plain, Size: 181 bytes --]

_______________________________________________
Nouveau mailing list
Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: mesa: Patch that fix/add missing WAIT_RING calls
       [not found] ` <AANLkTinj4TObB1Mb9mG3T74OQiyKd+vK1Dk-WYuT6=Qc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-12-16 16:20   ` Lucas Stach
  2010-12-17  9:23     ` Michel Hermier
  2010-12-19 13:32   ` Xavier Chantry
  1 sibling, 1 reply; 5+ messages in thread
From: Lucas Stach @ 2010-12-16 16:20 UTC (permalink / raw)
  To: Michel Hermier; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

Hi Michel,

had a short look on this, as far as i can tell at the time the patch
looks fairly good and we really need this checks.

Some small notes:

1. In loops with fixed iterations the WAIT_RING should be called before
the loop, not inside. I think it is ok if we waste a few pushbuf entries
to avoid calling libdrm too often.

See for example nvfx_push_vbo and nvfx_render_prim.

2. You decreased the size of the checks in several places. We should
review all of them before pushing this changes. I remember some of them
had some offset for some crude reason. I will review them by this
weekend if no one does it earlier.

-- lynxeye


Am Donnerstag, den 16.12.2010, 12:22 +0100 schrieb Michel Hermier:
> Hi,
> While trying to run a 3D app I needed to modify/add some WAIT_RING
> calls so the push buffer is properly checked, before we try to blindly
> push to it (sine OUT_RING don't perform this checks yet, I have a
> small patch for that for libdrm).
> I allready discussed about it with lynxeye and shining on IRC.
> _______________________________________________
> Nouveau mailing list
> Nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW@public.gmane.org
> http://lists.freedesktop.org/mailman/listinfo/nouveau

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: mesa: Patch that fix/add missing WAIT_RING calls
  2010-12-16 16:20   ` Lucas Stach
@ 2010-12-17  9:23     ` Michel Hermier
  0 siblings, 0 replies; 5+ messages in thread
From: Michel Hermier @ 2010-12-17  9:23 UTC (permalink / raw)
  To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

2010/12/16 Lucas Stach <dev-8ppwABl0HbeELgA04lAiVw@public.gmane.org>:
> Hi Michel,
>
> had a short look on this, as far as i can tell at the time the patch
> looks fairly good and we really need this checks.
>
> Some small notes:
>
> 1. In loops with fixed iterations the WAIT_RING should be called before
> the loop, not inside. I think it is ok if we waste a few pushbuf entries
> to avoid calling libdrm too often.
>
> See for example nvfx_push_vbo and nvfx_render_prim.
>

Calling WAIT_RING is almost a NOOP since most of the function is
inlined in the short path (more likely to happen, else the buffer is
too really small).
So wasting is few pushbuffer entries is ok for me, but since it is
almost a NOOP, I see it as a bad practice. I mean, if you group the
atomical RING command together, one can hardly make a misstake.
On IRC I was advised to change most of the WAIT_RING to BEGIN_RING,
that as quite some sence to me. Even if I guess the subchannel was
binded explicitly for performance reasons or because it's the only
subchannel to support the 3D objects (I didn't checked, but I guess
it's a pure convention), lets be in the safe way, if one day we remove
the explicit bind. While I agree it as some cost to call BEGIN_RING,
it still sounds quite resonable, since it is inlined.


>
> 2. You decreased the size of the checks in several places. We should
> review all of them before pushing this changes. I remember some of them
> had some offset for some crude reason. I will review them by this
> weekend if no one does it earlier.
>

Review them ;) I'm quite confident since I used one game (using quake
engine) for around 45 minutes before quitting cleanly, while in the
past it was crashing randomly ine less then 1 minutes when huge vertex
loads happened ;)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: mesa: Patch that fix/add missing WAIT_RING calls
       [not found] ` <AANLkTinj4TObB1Mb9mG3T74OQiyKd+vK1Dk-WYuT6=Qc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  2010-12-16 16:20   ` Lucas Stach
@ 2010-12-19 13:32   ` Xavier Chantry
       [not found]     ` <AANLkTikmqfYmmNqrNxYAn9f2KiP6V2fk01uCLuOnGZSj-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
  1 sibling, 1 reply; 5+ messages in thread
From: Xavier Chantry @ 2010-12-19 13:32 UTC (permalink / raw)
  To: Michel Hermier; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

On Thu, Dec 16, 2010 at 12:22 PM, Michel Hermier
<michel.hermier-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
> Hi,
> While trying to run a 3D app I needed to modify/add some WAIT_RING
> calls so the push buffer is properly checked, before we try to blindly
> push to it (sine OUT_RING don't perform this checks yet, I have a
> small patch for that for libdrm).
> I allready discussed about it with lynxeye and shining on IRC.
>

As calim already said (and I again yesterday evening), it seems you
failed to account all the existing WAIT_RING already in place.

Either someone wants to revert Luca's change and replace back all the
WAIT_RING/OUT_RING by BEGIN_RING (which means having to compute the
required size once per method, so the size check would be closer to
the OUT_RING calls and easier to follow)...

... or we need to actually figure out where the current code exactly
fails and why. IE which WAIT_RING is missing or which size is wrongly
computed.

I think we should try the second path in any case just to see.

So please provide us with a precise test-case (like hardware, libdrm
assert patch, backtrace of the triggered assert, game used, ...)
Thanks :)

12:19 < calim> also, you realize that the WAIT_RINGs already in the
code are supposed to do all the waiting required, right ?
12:19 < hermier> calim, yes, but some of them have broken or missing values
12:20 < calim> but it is likely there are counting mistakes and
forgotten waits - I was just wondering which if the changes in your
patch actually change something
12:20 < calim> *of the changes
12:20 < hermier> yes, it changes things
12:21 < hermier> I have a patch here in libdrm that check the buffer
before OUT_RING is successfull
12:22 < hermier> and it shows that WAIT_RING don't reserve/wait for
enought buffer, leading to buffer corruptions
12:24 < calim> and in which places ? e.g. emit_vertices_* are supposed
to work without WAIT_RINGs because the maximum amount of vertices that
can be written with the available space has been calculated in advance
12:25 < hermier> calim, and it failed
12:25 < hermier> calim, you can't predict that easily
12:26 < hermier> unless you have the data in your hands
12:26 < calim> well, then the calculation should be fixed; also, I'd
prefer to reintroduce BEGIN_RING in nvfx instead of calling WAIT_RING
for single method packets
12:27 < calim> hermier: you can predict it, the size of a vertex is
known, and thus you can derive how many vertices fit into
AVAIL_RING(chan) wors
12:28 < calim> *words
12:28 < curro> indeed, all those WAIT_RING+OUT_RING's should really be
BEGIN_RING's
12:28 < curro> hermier: ^
12:29 < calim> hm, oh - p_overhead in the vertex count calculation has
+ 64 with the comment "magic fix"
12:30 < calim> highly suspicious

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: mesa: Patch that fix/add missing WAIT_RING calls
       [not found]     ` <AANLkTikmqfYmmNqrNxYAn9f2KiP6V2fk01uCLuOnGZSj-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
@ 2010-12-20  9:16       ` Michel Hermier
  0 siblings, 0 replies; 5+ messages in thread
From: Michel Hermier @ 2010-12-20  9:16 UTC (permalink / raw)
  To: Xavier Chantry; +Cc: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW

2010/12/19 Xavier Chantry <chantry.xavier-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org>:
> On Thu, Dec 16, 2010 at 12:22 PM, Michel Hermier
> <michel.hermier-Re5JQEeQqe8AvxtiuMwx3w@public.gmane.org> wrote:
>> Hi,
>> While trying to run a 3D app I needed to modify/add some WAIT_RING
>> calls so the push buffer is properly checked, before we try to blindly
>> push to it (sine OUT_RING don't perform this checks yet, I have a
>> small patch for that for libdrm).
>> I allready discussed about it with lynxeye and shining on IRC.
>>
>
> As calim already said (and I again yesterday evening), it seems you
> failed to account all the existing WAIT_RING already in place.
>
> Either someone wants to revert Luca's change and replace back all the
> WAIT_RING/OUT_RING by BEGIN_RING (which means having to compute the
> required size once per method, so the size check would be closer to
> the OUT_RING calls and easier to follow)...

For me we have to do so, because you can't always predict the buffer
size requirements, before hand. And the example is there, what we are
talkind about. We *lost* near 5 days, trying to debug a WAIT_RING
behing initialised with invalid *static* size argument. How will you
remember to update all the WAIT_RING in all the path that leads to
your code ? I mean it's unmanageable, if you add a simple inocent
function call that eat a little of your buffer you are done. You'll
trigger some random segfault.

In this particular case maybe there should be a BEGIN_RING for a given
subchannel number if it really helps, but calling to BEGIN_RING before
any function call is an evil necessity.

Anyway it should not cost much to do code like this:

WAIT_RING(chan, SIZE0, + SIZE1 + ...);

BEGIN_RING(chan, CMD_FOO, SIZE0);
OUT_RING(chan, value)...

BEGIN_RING(chan, CMD_BAR, SIZE1);
OUT_RING(chan, value)...

If all is batched in a single pass, and since gcc optimise quite well
the call to WAIT_RING in the BEGIN_RING since almost all the pointer
conditions are predictable, and it should eliminates
unecessary/redudent flushing code. And if you miss the optimisation
the code stay valids anyway.

The only corner case with this, is loops, You can't garantee that
tomorrow you won't loop more than expected (and that you have taken
into account all the MAX conditions), and there is no chance to
optimise this.

It's my point of view, but that Luca's change should be reverted, for
me it's premature optimisation, and a maintenance nightmare since it
will trigger randomly and sporadic crash. But I agree that since the
3D use a dedicated subchannel, there should be a BEGIN_RING equivalent
for dedicated subchannel (that would avoid the subchannel autobinding
path).

> ... or we need to actually figure out where the current code exactly
> fails and why. IE which WAIT_RING is missing or which size is wrongly
> computed.
>
> I think we should try the second path in any case just to see.
>
> So please provide us with a precise test-case (like hardware, libdrm
> assert patch, backtrace of the triggered assert, game used, ...)
> Thanks :)
>
> 12:19 < calim> also, you realize that the WAIT_RINGs already in the
> code are supposed to do all the waiting required, right ?
> 12:19 < hermier> calim, yes, but some of them have broken or missing values
> 12:20 < calim> but it is likely there are counting mistakes and
> forgotten waits - I was just wondering which if the changes in your
> patch actually change something
> 12:20 < calim> *of the changes
> 12:20 < hermier> yes, it changes things
> 12:21 < hermier> I have a patch here in libdrm that check the buffer
> before OUT_RING is successfull
> 12:22 < hermier> and it shows that WAIT_RING don't reserve/wait for
> enought buffer, leading to buffer corruptions
> 12:24 < calim> and in which places ? e.g. emit_vertices_* are supposed
> to work without WAIT_RINGs because the maximum amount of vertices that
> can be written with the available space has been calculated in advance
> 12:25 < hermier> calim, and it failed
> 12:25 < hermier> calim, you can't predict that easily
> 12:26 < hermier> unless you have the data in your hands
> 12:26 < calim> well, then the calculation should be fixed; also, I'd
> prefer to reintroduce BEGIN_RING in nvfx instead of calling WAIT_RING
> for single method packets
> 12:27 < calim> hermier: you can predict it, the size of a vertex is
> known, and thus you can derive how many vertices fit into
> AVAIL_RING(chan) wors
> 12:28 < calim> *words
> 12:28 < curro> indeed, all those WAIT_RING+OUT_RING's should really be
> BEGIN_RING's
> 12:28 < curro> hermier: ^
> 12:29 < calim> hm, oh - p_overhead in the vertex count calculation has
> + 64 with the comment "magic fix"
> 12:30 < calim> highly suspicious
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2010-12-20  9:16 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-12-16 11:22 mesa: Patch that fix/add missing WAIT_RING calls Michel Hermier
     [not found] ` <AANLkTinj4TObB1Mb9mG3T74OQiyKd+vK1Dk-WYuT6=Qc-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-12-16 16:20   ` Lucas Stach
2010-12-17  9:23     ` Michel Hermier
2010-12-19 13:32   ` Xavier Chantry
     [not found]     ` <AANLkTikmqfYmmNqrNxYAn9f2KiP6V2fk01uCLuOnGZSj-JsoAwUIsXosN+BqQ9rBEUg@public.gmane.org>
2010-12-20  9:16       ` Michel Hermier

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.