* Implement buffer_clear for nvc0
@ 2014-05-26 19:53 Tobias Klausmann
2014-05-26 19:53 ` [PATCH] nvc0: Implement buffer_clear for this type of hardware Tobias Klausmann
0 siblings, 1 reply; 3+ messages in thread
From: Tobias Klausmann @ 2014-05-26 19:53 UTC (permalink / raw)
To: nouveau-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
mesa-dev-PD4FTy7X32lNgt0PjOBp9y5qC8QIuHrW,
imirkin-FrUbXkNCsVf2fBVCVOL8/A
Hi, please review the following patch!
Thanks,
Tobias Klausmann
^ permalink raw reply [flat|nested] 3+ messages in thread
* [PATCH] nvc0: Implement buffer_clear for this type of hardware
2014-05-26 19:53 Implement buffer_clear for nvc0 Tobias Klausmann
@ 2014-05-26 19:53 ` Tobias Klausmann
2014-05-26 20:14 ` Ilia Mirkin
0 siblings, 1 reply; 3+ messages in thread
From: Tobias Klausmann @ 2014-05-26 19:53 UTC (permalink / raw)
To: nouveau, mesa-dev, imirkin
---
src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 171 ++++++++++++++++++++++++
1 file changed, 171 insertions(+)
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index 6b7c30c..987b6c4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -345,6 +345,176 @@ nvc0_clear_render_target(struct pipe_context *pipe,
}
static void
+nvc0_clear_buffer_rgb32(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ // FIXME: Find a way to do this with the GPU!
+
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+
+ struct pipe_transfer *pt;
+ struct pipe_box pb;
+ unsigned elements, i;
+ union pipe_color_union color;
+
+ if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
+ nouveau_fence_wait(buf->fence_wr);
+
+ memcpy(&color.ui, data, 12);
+ memset(&color.ui[3], 0, 4);
+
+ elements = size / data_size;
+
+ memset(&pb, 0, sizeof(pb));
+ pb.height = elements;
+ pb.width = 1;
+
+ uint8_t *tf_map = buf->vtbl->transfer_map(pipe, res,
+ 0, PIPE_TRANSFER_WRITE, &pb, &pt);
+
+ for (i = 0; i < elements ; ++i) {
+ memcpy(&tf_map[i*12],color.f,12);
+ }
+ buf->vtbl->transfer_unmap(pipe, pt);
+}
+
+static void
+nvc0_clear_buffer(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size,
+ const void *data, int data_size)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(res);
+
+ union pipe_color_union color;
+ enum pipe_format dst_fmt;
+ unsigned width, height, elements;
+
+ assert(res->target == PIPE_BUFFER);
+ assert(nouveau_bo_memtype(buf->bo) == 0);
+
+ switch (data_size) {
+ case 16:
+ dst_fmt = PIPE_FORMAT_R32G32B32A32_UINT;
+ memcpy(&color.ui, data, 16);
+ break;
+ case 12:
+ //dst_fmt = PIPE_FORMAT_R32G32B32_UINT;
+ //memcpy(&color.ui, data, 12);
+ //memset(&color.ui[3], 0, 4);
+ break;
+ case 8:
+ dst_fmt = PIPE_FORMAT_R32G32_UINT;
+ memcpy(&color.ui, data, 8);
+ memset(&color.ui[2], 0, 8);
+ break;
+ case 4:
+ dst_fmt = PIPE_FORMAT_R32_UINT;
+ memcpy(&color.ui, data, 4);
+ memset(&color.ui[1], 0, 12);
+ break;
+ case 2:
+ dst_fmt = PIPE_FORMAT_R16_UINT;
+ color.ui[0] = util_cpu_to_le32(
+ util_le16_to_cpu(*(unsigned short *)data));
+ memset(&color.ui[1], 0, 12);
+ break;
+ case 1:
+ dst_fmt = PIPE_FORMAT_R8_UINT;
+ color.ui[0] = util_cpu_to_le32(*(unsigned char *)data);
+ memset(&color.ui[1], 0, 12);
+ break;
+ default:
+ assert(!"Unsupported element size");
+ return;
+ }
+
+ assert(size % data_size == 0);
+
+ if (data_size != 12) {
+
+ elements = size / data_size;
+
+ height = (elements + 16383) / 16384;
+
+ width = elements / height;
+
+ if (!PUSH_SPACE(push, 40))
+ return;
+
+ PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
+
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color.f[0]);
+ PUSH_DATAf(push, color.f[1]);
+ PUSH_DATAf(push, color.f[2]);
+ PUSH_DATAf(push, color.f[3]);
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, width << 16);
+ PUSH_DATA (push, height << 16);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+
+ PUSH_DATA (push, width * data_size);
+ PUSH_DATA (push, height);
+
+ PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
+ PUSH_DATA (push, 1 << 12);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, 0x3c);
+
+ if (width * height != elements) {
+ offset += width * height * data_size;
+ width = elements - width * height;
+ height = 1;
+
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, width << 16);
+ PUSH_DATA (push, height << 16);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 2);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_HORIZ(0)), 2);
+ PUSH_DATA (push, width * data_size);
+ PUSH_DATA (push, height);
+
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, 0x3c);
+ }
+ }
+ else {
+ nvc0_clear_buffer_rgb32(pipe,res,offset,size,data,data_size);
+ }
+
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
+
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
nvc0_clear_depth_stencil(struct pipe_context *pipe,
struct pipe_surface *dst,
unsigned clear_flags,
@@ -1363,4 +1533,5 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0)
pipe->flush_resource = nvc0_flush_resource;
pipe->clear_render_target = nvc0_clear_render_target;
pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
+ pipe->clear_buffer = nvc0_clear_buffer;
}
--
1.8.4.5
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [PATCH] nvc0: Implement buffer_clear for this type of hardware
2014-05-26 19:53 ` [PATCH] nvc0: Implement buffer_clear for this type of hardware Tobias Klausmann
@ 2014-05-26 20:14 ` Ilia Mirkin
0 siblings, 0 replies; 3+ messages in thread
From: Ilia Mirkin @ 2014-05-26 20:14 UTC (permalink / raw)
To: Tobias Klausmann
Cc: nouveau@lists.freedesktop.org, mesa-dev@lists.freedesktop.org
Before you read my comments about all the potential improvements,
congratulations on your first nouveau patch :) Well done!
On Mon, May 26, 2014 at 3:53 PM, Tobias Klausmann
<tobias.johannes.klausmann@mni.thm.de> wrote:
It's common for people to throw in a Signed-off-by line, although not
strictly required (like it is in the kernel)
Also, clear_buffer, not buffer_clear (from the subject). "for this
type of hardware" is very generic-sounding, and doesn't really add any
extra info. I'd just say
"nvc0: implement clear_buffer" or "nvc0: implement pipe->clear_buffer"
> ---
> src/gallium/drivers/nouveau/nvc0/nvc0_surface.c | 171 ++++++++++++++++++++++++
> 1 file changed, 171 insertions(+)
>
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
> index 6b7c30c..987b6c4 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
> @@ -345,6 +345,176 @@ nvc0_clear_render_target(struct pipe_context *pipe,
> }
>
> static void
> +nvc0_clear_buffer_rgb32(struct pipe_context *pipe,
> + struct pipe_resource *res,
> + unsigned offset, unsigned size,
> + const void *data, int data_size)
> +{
> + // FIXME: Find a way to do this with the GPU!
While this is hard-coded to the 12 size, nothing about this function
_requires_ that. How about calling this
nvc0_clear_buffer_cpu(...)
And making this work for all data sizes (should be just replacing 12's
with data_size). And then the FIXME (really more like TODO) belongs in
the nvc0_clear_buffer function.
> +
> + struct nvc0_context *nvc0 = nvc0_context(pipe);
> + struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> + struct nv04_resource *buf = nv04_resource(res);
> +
> + struct pipe_transfer *pt;
> + struct pipe_box pb;
> + unsigned elements, i;
> + union pipe_color_union color;
> +
> + if (buf->fence_wr && !nouveau_fence_signalled(buf->fence_wr))
> + nouveau_fence_wait(buf->fence_wr);
No need to do this. The transfer will take care of all these little
details. Also the indentation is funny... please use the same
indentation as used elsewhere in the driver -- 3 space indents, no
tabs. (This is used throughout most of, but not all of, mesa.)
> +
> + memcpy(&color.ui, data, 12);
> + memset(&color.ui[3], 0, 4);
What does the color union add here? Just copy things out of data directly.
> +
> + elements = size / data_size;
> +
> + memset(&pb, 0, sizeof(pb));
> + pb.height = elements;
Is that right? At no point do you tell it the element size, so I think
this should just be 'size'.
> + pb.width = 1;
Traditionally you'd set width = elements, height = 1. Not that your
way doesn't work, it's just weird. Also, perhaps I can interest you in
u_box_1d?
> +
> + uint8_t *tf_map = buf->vtbl->transfer_map(pipe, res,
> + 0, PIPE_TRANSFER_WRITE, &pb, &pt);
> +
> + for (i = 0; i < elements ; ++i) {
> + memcpy(&tf_map[i*12],color.f,12);
> + }
> + buf->vtbl->transfer_unmap(pipe, pt);
> +}
> +
> +static void
> +nvc0_clear_buffer(struct pipe_context *pipe,
> + struct pipe_resource *res,
> + unsigned offset, unsigned size,
> + const void *data, int data_size)
> +{
> + struct nvc0_context *nvc0 = nvc0_context(pipe);
> + struct nouveau_pushbuf *push = nvc0->base.pushbuf;
> + struct nv04_resource *buf = nv04_resource(res);
> +
> + union pipe_color_union color;
> + enum pipe_format dst_fmt;
> + unsigned width, height, elements;
> +
> + assert(res->target == PIPE_BUFFER);
> + assert(nouveau_bo_memtype(buf->bo) == 0);
> +
> + switch (data_size) {
> + case 16:
The style for switch statements (which I hate, but is used everywhere
in mesa) is:
switch (...) {
case foo:
code goes here
i.e. the 'case' is lined up to the 'switch'.
> + dst_fmt = PIPE_FORMAT_R32G32B32A32_UINT;
> + memcpy(&color.ui, data, 16);
> + break;
> + case 12:
> + //dst_fmt = PIPE_FORMAT_R32G32B32_UINT;
> + //memcpy(&color.ui, data, 12);
> + //memset(&color.ui[3], 0, 4);
> + break;
> + case 8:
> + dst_fmt = PIPE_FORMAT_R32G32_UINT;
> + memcpy(&color.ui, data, 8);
> + memset(&color.ui[2], 0, 8);
> + break;
> + case 4:
> + dst_fmt = PIPE_FORMAT_R32_UINT;
> + memcpy(&color.ui, data, 4);
> + memset(&color.ui[1], 0, 12);
> + break;
> + case 2:
> + dst_fmt = PIPE_FORMAT_R16_UINT;
> + color.ui[0] = util_cpu_to_le32(
> + util_le16_to_cpu(*(unsigned short *)data));
> + memset(&color.ui[1], 0, 12);
> + break;
> + case 1:
> + dst_fmt = PIPE_FORMAT_R8_UINT;
> + color.ui[0] = util_cpu_to_le32(*(unsigned char *)data);
> + memset(&color.ui[1], 0, 12);
> + break;
> + default:
> + assert(!"Unsupported element size");
> + return;
> + }
> +
> + assert(size % data_size == 0);
> +
> + if (data_size != 12) {
How about structuring the code as
if (data_size == 12) {
clear_buffer_cpu(...);
return;
}
Since you don't need the fence/state update at the bottom either
(since you don't touch the state, and the GPU isn't the one doing the
work for this clear).
> +
> + elements = size / data_size;
> +
> + height = (elements + 16383) / 16384;
> +
> + width = elements / height;
> +
> + if (!PUSH_SPACE(push, 40))
> + return;
> +
> + PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
> +
> + BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
> + PUSH_DATAf(push, color.f[0]);
> + PUSH_DATAf(push, color.f[1]);
> + PUSH_DATAf(push, color.f[2]);
> + PUSH_DATAf(push, color.f[3]);
> + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
> + PUSH_DATA (push, width << 16);
> + PUSH_DATA (push, height << 16);
> +
> + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
> + PUSH_DATA (push, 1);
You can use IMMED_NVC0 here.
> +
> + BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
> + PUSH_DATAh(push, buf->address + offset);
> + PUSH_DATA (push, buf->address + offset);
> +
> + PUSH_DATA (push, width * data_size);
> + PUSH_DATA (push, height);
> +
> + PUSH_DATA (push, nvc0_format_table[dst_fmt].rt);
> + PUSH_DATA (push, 1 << 12);
> + PUSH_DATA (push, 1);
> + PUSH_DATA (push, 0);
> + PUSH_DATA (push, 0);
> +
> + BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
> + PUSH_DATA (push, 0);
> + BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
> + PUSH_DATA (push, 0x3c);
You can use IMMED_NVC0 for these too. (I'm pretty sure. Feel free to
tell me it doesn't work.)
> +
> + if (width * height != elements) {
> + offset += width * height * data_size;
> + width = elements - width * height;
> + height = 1;
> +
> + BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
> + PUSH_DATA (push, width << 16);
> + PUSH_DATA (push, height << 16);
> +
> + BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
> + PUSH_DATA (push, 1);
Do you need this RT_CONTROL thing here? It's not changing between the runs...
> +
> + BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 2);
> + PUSH_DATAh(push, buf->address + offset);
> + PUSH_DATA (push, buf->address + offset);
> +
> + BEGIN_NVC0(push, NVC0_3D(RT_HORIZ(0)), 2);
> + PUSH_DATA (push, width * data_size);
> + PUSH_DATA (push, height);
> +
> + BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
> + PUSH_DATA (push, 0x3c);
> + }
> + }
> + else {
> + nvc0_clear_buffer_rgb32(pipe,res,offset,size,data,data_size);
> + }
> +
> + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence);
> + nouveau_fence_ref(nvc0->screen->base.fence.current, &buf->fence_wr);
> +
> + nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
> +}
> +
> +static void
> nvc0_clear_depth_stencil(struct pipe_context *pipe,
> struct pipe_surface *dst,
> unsigned clear_flags,
> @@ -1363,4 +1533,5 @@ nvc0_init_surface_functions(struct nvc0_context *nvc0)
> pipe->flush_resource = nvc0_flush_resource;
> pipe->clear_render_target = nvc0_clear_render_target;
> pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
> + pipe->clear_buffer = nvc0_clear_buffer;
> }
> --
> 1.8.4.5
>
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2014-05-26 20:14 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-05-26 19:53 Implement buffer_clear for nvc0 Tobias Klausmann
2014-05-26 19:53 ` [PATCH] nvc0: Implement buffer_clear for this type of hardware Tobias Klausmann
2014-05-26 20:14 ` Ilia Mirkin
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.