From: Maarten Lankhorst <maarten.lankhorst@canonical.com>
To: Ben Skeggs <bskeggs@redhat.com>
Cc: Dave Airlie <airlied@redhat.com>,
dri-devel@lists.freedesktop.org,
LKML <linux-kernel@vger.kernel.org>
Subject: [PATCH] nouveau: Fix alignment requirements on src and dst addresses
Date: Mon, 04 Jun 2012 12:00:31 +0200 [thread overview]
Message-ID: <4FCC873F.7010906@canonical.com> (raw)
Linear copy works by adding the offset to the buffer address,
which may end up not being 16-byte aligned.
Some tests I've written for prime_pcopy show that the engine
allows this correctly, so the restriction on lowest 4 bits of
address can be lifted safely.
The comments added were by envyas, I think because I used
a newer version.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Cc: stable@vger.kernel.org
---
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc b/drivers/gpu/drm/nouveau/nva3_copy.fuc
index abc3662..219850d 100644
--- a/drivers/gpu/drm/nouveau/nva3_copy.fuc
+++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc
@@ -119,9 +119,9 @@ dispatch_dma:
// mthd 0x030c-0x0340, various stuff
.b16 0xc3 14
.b32 #ctx_src_address_high ~0x000000ff
-.b32 #ctx_src_address_low ~0xfffffff0
+.b32 #ctx_src_address_low ~0xffffffff
.b32 #ctx_dst_address_high ~0x000000ff
-.b32 #ctx_dst_address_low ~0xfffffff0
+.b32 #ctx_dst_address_low ~0xffffffff
.b32 #ctx_src_pitch ~0x0007ffff
.b32 #ctx_dst_pitch ~0x0007ffff
.b32 #ctx_xcnt ~0x0000ffff
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc.h b/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
index 1f33fbd..241b272 100644
--- a/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
+++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
@@ -1,37 +1,72 @@
uint32_t nva3_pcopy_data[] = {
+/* 0x0000: ctx_object */
0x00000000,
+/* 0x0004: ctx_dma */
+/* 0x0004: ctx_dma_query */
0x00000000,
+/* 0x0008: ctx_dma_src */
0x00000000,
+/* 0x000c: ctx_dma_dst */
0x00000000,
+/* 0x0010: ctx_query_address_high */
0x00000000,
+/* 0x0014: ctx_query_address_low */
0x00000000,
+/* 0x0018: ctx_query_counter */
0x00000000,
+/* 0x001c: ctx_src_address_high */
0x00000000,
+/* 0x0020: ctx_src_address_low */
0x00000000,
+/* 0x0024: ctx_src_pitch */
0x00000000,
+/* 0x0028: ctx_src_tile_mode */
0x00000000,
+/* 0x002c: ctx_src_xsize */
0x00000000,
+/* 0x0030: ctx_src_ysize */
0x00000000,
+/* 0x0034: ctx_src_zsize */
0x00000000,
+/* 0x0038: ctx_src_zoff */
0x00000000,
+/* 0x003c: ctx_src_xoff */
0x00000000,
+/* 0x0040: ctx_src_yoff */
0x00000000,
+/* 0x0044: ctx_src_cpp */
0x00000000,
+/* 0x0048: ctx_dst_address_high */
0x00000000,
+/* 0x004c: ctx_dst_address_low */
0x00000000,
+/* 0x0050: ctx_dst_pitch */
0x00000000,
+/* 0x0054: ctx_dst_tile_mode */
0x00000000,
+/* 0x0058: ctx_dst_xsize */
0x00000000,
+/* 0x005c: ctx_dst_ysize */
0x00000000,
+/* 0x0060: ctx_dst_zsize */
0x00000000,
+/* 0x0064: ctx_dst_zoff */
0x00000000,
+/* 0x0068: ctx_dst_xoff */
0x00000000,
+/* 0x006c: ctx_dst_yoff */
0x00000000,
+/* 0x0070: ctx_dst_cpp */
0x00000000,
+/* 0x0074: ctx_format */
0x00000000,
+/* 0x0078: ctx_swz_const0 */
0x00000000,
+/* 0x007c: ctx_swz_const1 */
0x00000000,
+/* 0x0080: ctx_xcnt */
0x00000000,
+/* 0x0084: ctx_ycnt */
0x00000000,
0x00000000,
0x00000000,
@@ -63,6 +98,7 @@ uint32_t nva3_pcopy_data[] = {
0x00000000,
0x00000000,
0x00000000,
+/* 0x0100: dispatch_table */
0x00010000,
0x00000000,
0x00000000,
@@ -73,6 +109,7 @@ uint32_t nva3_pcopy_data[] = {
0x00010162,
0x00000000,
0x00030060,
+/* 0x0128: dispatch_dma */
0x00010170,
0x00000000,
0x00010170,
@@ -118,11 +155,11 @@ uint32_t nva3_pcopy_data[] = {
0x0000001c,
0xffffff00,
0x00000020,
- 0x0000000f,
+ 0x00000000,
0x00000048,
0xffffff00,
0x0000004c,
- 0x0000000f,
+ 0x00000000,
0x00000024,
0xfff80000,
0x00000050,
@@ -147,6 +184,7 @@ uint32_t nva3_pcopy_data[] = {
};
uint32_t nva3_pcopy_code[] = {
+/* 0x0000: main */
0x04fe04bd,
0x3517f000,
0xf10010fe,
@@ -158,23 +196,31 @@ uint32_t nva3_pcopy_code[] = {
0x17f11031,
0x27f01200,
0x0012d003,
+/* 0x002f: spin */
0xf40031f4,
0x0ef40028,
+/* 0x0035: ih */
0x8001cffd,
0xf40812c4,
0x21f4060b,
+/* 0x0041: ih_no_chsw */
0x0412c472,
0xf4060bf4,
+/* 0x004a: ih_no_cmd */
0x11c4c321,
0x4001d00c,
+/* 0x0052: swctx */
0x47f101f8,
0x4bfe7700,
0x0007fe00,
0xf00204b9,
0x01f40643,
0x0604fa09,
+/* 0x006b: swctx_load */
0xfa060ef4,
+/* 0x006e: swctx_done */
0x03f80504,
+/* 0x0072: chsw */
0x27f100f8,
0x23cf1400,
0x1e3fc800,
@@ -183,18 +229,22 @@ uint32_t nva3_pcopy_code[] = {
0x1e3af052,
0xf00023d0,
0x24d00147,
+/* 0x0093: chsw_no_unload */
0xcf00f880,
0x3dc84023,
0x220bf41e,
0xf40131f4,
0x57f05221,
0x0367f004,
+/* 0x00a8: chsw_load_ctx_dma */
0xa07856bc,
0xb6018068,
0x87d00884,
0x0162b600,
+/* 0x00bb: chsw_finish_load */
0xf0f018f4,
0x23d00237,
+/* 0x00c3: dispatch */
0xf100f880,
0xcf190037,
0x33cf4032,
@@ -202,6 +252,7 @@ uint32_t nva3_pcopy_code[] = {
0x1024b607,
0x010057f1,
0x74bd64bd,
+/* 0x00dc: dispatch_loop */
0x58005658,
0x50b60157,
0x0446b804,
@@ -211,6 +262,7 @@ uint32_t nva3_pcopy_code[] = {
0xb60276bb,
0x57bb0374,
0xdf0ef400,
+/* 0x0100: dispatch_valid_mthd */
0xb60246bb,
0x45bb0344,
0x01459800,
@@ -220,31 +272,41 @@ uint32_t nva3_pcopy_code[] = {
0xb0014658,
0x1bf40064,
0x00538009,
+/* 0x0127: dispatch_cmd */
0xf4300ef4,
0x55f90132,
0xf40c01f4,
+/* 0x0132: dispatch_invalid_bitfield */
0x25f0250e,
+/* 0x0135: dispatch_illegal_mthd */
0x0125f002,
+/* 0x0138: dispatch_error */
0x100047f1,
0xd00042d0,
0x27f04043,
0x0002d040,
+/* 0x0148: hostirq_wait */
0xf08002cf,
0x24b04024,
0xf71bf400,
+/* 0x0154: dispatch_done */
0x1d0027f1,
0xd00137f0,
0x00f80023,
+/* 0x0160: cmd_nop */
+/* 0x0162: cmd_pm_trigger */
0x27f100f8,
0x34bd2200,
0xd00233f0,
0x00f80023,
+/* 0x0170: cmd_dma */
0x012842b7,
0xf00145b6,
0x43801e39,
0x0040b701,
0x0644b606,
0xf80043d0,
+/* 0x0189: cmd_exec_set_format */
0xf030f400,
0xb00001b0,
0x01b00101,
@@ -256,20 +318,26 @@ uint32_t nva3_pcopy_code[] = {
0x70b63847,
0x0232f401,
0x94bd84bd,
+/* 0x01b4: ncomp_loop */
0xb60f4ac4,
0xb4bd0445,
+/* 0x01bc: bpc_loop */
0xf404a430,
0xa5ff0f18,
0x00cbbbc0,
0xf40231f4,
+/* 0x01ce: cmp_c0 */
0x1bf4220e,
0x10c7f00c,
0xf400cbbb,
+/* 0x01da: cmp_c1 */
0xa430160e,
0x0c18f406,
0xbb14c7f0,
0x0ef400cb,
+/* 0x01e9: cmp_zero */
0x80c7f107,
+/* 0x01ed: bpc_next */
0x01c83800,
0xb60180b6,
0xb5b801b0,
@@ -280,6 +348,7 @@ uint32_t nva3_pcopy_code[] = {
0x98110680,
0x68fd2008,
0x0502f400,
+/* 0x0216: dst_xcnt */
0x75fd64bd,
0x1c078000,
0xf10078fd,
@@ -304,6 +373,7 @@ uint32_t nva3_pcopy_code[] = {
0x980056d0,
0x56d01f06,
0x1030f440,
+/* 0x0276: cmd_exec_set_surface_tiled */
0x579800f8,
0x6879c70a,
0xb66478c7,
@@ -311,9 +381,11 @@ uint32_t nva3_pcopy_code[] = {
0x0e76b060,
0xf0091bf4,
0x0ef40477,
+/* 0x0291: xtile64 */
0x027cf00f,
0xfd1170b6,
0x77f00947,
+/* 0x029d: xtileok */
0x0f5a9806,
0xfd115b98,
0xb7f000ab,
@@ -371,6 +443,7 @@ uint32_t nva3_pcopy_code[] = {
0x67d00600,
0x0060b700,
0x0068d004,
+/* 0x0382: cmd_exec_set_surface_linear */
0x6cf000f8,
0x0260b702,
0x0864b602,
@@ -381,13 +454,16 @@ uint32_t nva3_pcopy_code[] = {
0xb70067d0,
0x98040060,
0x67d00957,
+/* 0x03ab: cmd_exec_wait */
0xf900f800,
0xf110f900,
0xb6080007,
+/* 0x03b6: loop */
0x01cf0604,
0x0114f000,
0xfcfa1bf4,
0xf800fc10,
+/* 0x03c5: cmd_exec_query */
0x0d34c800,
0xf5701bf4,
0xf103ab21,
@@ -417,6 +493,7 @@ uint32_t nva3_pcopy_code[] = {
0x47f10153,
0x44b60800,
0x0045d006,
+/* 0x0438: query_counter */
0x03ab21f5,
0x080c47f1,
0x980644b6,
@@ -439,11 +516,13 @@ uint32_t nva3_pcopy_code[] = {
0x47f10153,
0x44b60800,
0x0045d006,
+/* 0x0492: cmd_exec */
0x21f500f8,
0x3fc803ab,
0x0e0bf400,
0x018921f5,
0x020047f1,
+/* 0x04a7: cmd_exec_no_format */
0xf11e0ef4,
0xb6081067,
0x77f00664,
@@ -451,19 +530,24 @@ uint32_t nva3_pcopy_code[] = {
0x981c0780,
0x67d02007,
0x4067d000,
+/* 0x04c2: cmd_exec_init_src_surface */
0x32f444bd,
0xc854bd02,
0x0bf4043f,
0x8221f50a,
0x0a0ef403,
+/* 0x04d4: src_tiled */
0x027621f5,
+/* 0x04db: cmd_exec_init_dst_surface */
0xf40749f0,
0x57f00231,
0x083fc82c,
0xf50a0bf4,
0xf4038221,
+/* 0x04ee: dst_tiled */
0x21f50a0e,
0x49f00276,
+/* 0x04f5: cmd_exec_kick */
0x0057f108,
0x0654b608,
0xd0210698,
@@ -473,6 +557,8 @@ uint32_t nva3_pcopy_code[] = {
0xc80054d0,
0x0bf40c3f,
0xc521f507,
+/* 0x0519: cmd_exec_done */
+/* 0x051b: cmd_wrcache_flush */
0xf100f803,
0xbd220027,
0x0133f034,
diff --git a/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h b/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
index a8d1745..98cc421 100644
--- a/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
+++ b/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
@@ -1,34 +1,65 @@
uint32_t nvc0_pcopy_data[] = {
+/* 0x0000: ctx_object */
0x00000000,
+/* 0x0004: ctx_query_address_high */
0x00000000,
+/* 0x0008: ctx_query_address_low */
0x00000000,
+/* 0x000c: ctx_query_counter */
0x00000000,
+/* 0x0010: ctx_src_address_high */
0x00000000,
+/* 0x0014: ctx_src_address_low */
0x00000000,
+/* 0x0018: ctx_src_pitch */
0x00000000,
+/* 0x001c: ctx_src_tile_mode */
0x00000000,
+/* 0x0020: ctx_src_xsize */
0x00000000,
+/* 0x0024: ctx_src_ysize */
0x00000000,
+/* 0x0028: ctx_src_zsize */
0x00000000,
+/* 0x002c: ctx_src_zoff */
0x00000000,
+/* 0x0030: ctx_src_xoff */
0x00000000,
+/* 0x0034: ctx_src_yoff */
0x00000000,
+/* 0x0038: ctx_src_cpp */
0x00000000,
+/* 0x003c: ctx_dst_address_high */
0x00000000,
+/* 0x0040: ctx_dst_address_low */
0x00000000,
+/* 0x0044: ctx_dst_pitch */
0x00000000,
+/* 0x0048: ctx_dst_tile_mode */
0x00000000,
+/* 0x004c: ctx_dst_xsize */
0x00000000,
+/* 0x0050: ctx_dst_ysize */
0x00000000,
+/* 0x0054: ctx_dst_zsize */
0x00000000,
+/* 0x0058: ctx_dst_zoff */
0x00000000,
+/* 0x005c: ctx_dst_xoff */
0x00000000,
+/* 0x0060: ctx_dst_yoff */
0x00000000,
+/* 0x0064: ctx_dst_cpp */
0x00000000,
+/* 0x0068: ctx_format */
0x00000000,
+/* 0x006c: ctx_swz_const0 */
0x00000000,
+/* 0x0070: ctx_swz_const1 */
0x00000000,
+/* 0x0074: ctx_xcnt */
0x00000000,
+/* 0x0078: ctx_ycnt */
0x00000000,
0x00000000,
0x00000000,
@@ -63,6 +94,7 @@ uint32_t nvc0_pcopy_data[] = {
0x00000000,
0x00000000,
0x00000000,
+/* 0x0100: dispatch_table */
0x00010000,
0x00000000,
0x00000000,
@@ -111,11 +143,11 @@ uint32_t nvc0_pcopy_data[] = {
0x00000010,
0xffffff00,
0x00000014,
- 0x0000000f,
+ 0x00000000,
0x0000003c,
0xffffff00,
0x00000040,
- 0x0000000f,
+ 0x00000000,
0x00000018,
0xfff80000,
0x00000044,
@@ -140,6 +172,7 @@ uint32_t nvc0_pcopy_data[] = {
};
uint32_t nvc0_pcopy_code[] = {
+/* 0x0000: main */
0x04fe04bd,
0x3517f000,
0xf10010fe,
@@ -151,15 +184,20 @@ uint32_t nvc0_pcopy_code[] = {
0x17f11031,
0x27f01200,
0x0012d003,
+/* 0x002f: spin */
0xf40031f4,
0x0ef40028,
+/* 0x0035: ih */
0x8001cffd,
0xf40812c4,
0x21f4060b,
+/* 0x0041: ih_no_chsw */
0x0412c4ca,
0xf5070bf4,
+/* 0x004b: ih_no_cmd */
0xc4010221,
0x01d00c11,
+/* 0x0053: swctx */
0xf101f840,
0xfe770047,
0x47f1004b,
@@ -188,8 +226,11 @@ uint32_t nvc0_pcopy_code[] = {
0xf00204b9,
0x01f40643,
0x0604fa09,
+/* 0x00c3: swctx_load */
0xfa060ef4,
+/* 0x00c6: swctx_done */
0x03f80504,
+/* 0x00ca: chsw */
0x27f100f8,
0x23cf1400,
0x1e3fc800,
@@ -198,18 +239,22 @@ uint32_t nvc0_pcopy_code[] = {
0x1e3af053,
0xf00023d0,
0x24d00147,
+/* 0x00eb: chsw_no_unload */
0xcf00f880,
0x3dc84023,
0x090bf41e,
0xf40131f4,
+/* 0x00fa: chsw_finish_load */
0x37f05321,
0x8023d002,
+/* 0x0102: dispatch */
0x37f100f8,
0x32cf1900,
0x0033cf40,
0x07ff24e4,
0xf11024b6,
0xbd010057,
+/* 0x011b: dispatch_loop */
0x5874bd64,
0x57580056,
0x0450b601,
@@ -219,6 +264,7 @@ uint32_t nvc0_pcopy_code[] = {
0xbb0f08f4,
0x74b60276,
0x0057bb03,
+/* 0x013f: dispatch_valid_mthd */
0xbbdf0ef4,
0x44b60246,
0x0045bb03,
@@ -229,24 +275,33 @@ uint32_t nvc0_pcopy_code[] = {
0x64b00146,
0x091bf400,
0xf4005380,
+/* 0x0166: dispatch_cmd */
0x32f4300e,
0xf455f901,
0x0ef40c01,
+/* 0x0171: dispatch_invalid_bitfield */
0x0225f025,
+/* 0x0174: dispatch_illegal_mthd */
+/* 0x0177: dispatch_error */
0xf10125f0,
0xd0100047,
0x43d00042,
0x4027f040,
+/* 0x0187: hostirq_wait */
0xcf0002d0,
0x24f08002,
0x0024b040,
+/* 0x0193: dispatch_done */
0xf1f71bf4,
0xf01d0027,
0x23d00137,
+/* 0x019f: cmd_nop */
0xf800f800,
+/* 0x01a1: cmd_pm_trigger */
0x0027f100,
0xf034bd22,
0x23d00233,
+/* 0x01af: cmd_exec_set_format */
0xf400f800,
0x01b0f030,
0x0101b000,
@@ -258,20 +313,26 @@ uint32_t nvc0_pcopy_code[] = {
0x3847c701,
0xf40170b6,
0x84bd0232,
+/* 0x01da: ncomp_loop */
0x4ac494bd,
0x0445b60f,
+/* 0x01e2: bpc_loop */
0xa430b4bd,
0x0f18f404,
0xbbc0a5ff,
0x31f400cb,
0x220ef402,
+/* 0x01f4: cmp_c0 */
0xf00c1bf4,
0xcbbb10c7,
0x160ef400,
+/* 0x0200: cmp_c1 */
0xf406a430,
0xc7f00c18,
0x00cbbb14,
+/* 0x020f: cmp_zero */
0xf1070ef4,
+/* 0x0213: bpc_next */
0x380080c7,
0x80b601c8,
0x01b0b601,
@@ -283,6 +344,7 @@ uint32_t nvc0_pcopy_code[] = {
0x1d08980e,
0xf40068fd,
0x64bd0502,
+/* 0x023c: dst_xcnt */
0x800075fd,
0x78fd1907,
0x1057f100,
@@ -307,15 +369,18 @@ uint32_t nvc0_pcopy_code[] = {
0x1c069800,
0xf44056d0,
0x00f81030,
+/* 0x029c: cmd_exec_set_surface_tiled */
0xc7075798,
0x78c76879,
0x0380b664,
0xb06077c7,
0x1bf40e76,
0x0477f009,
+/* 0x02b7: xtile64 */
0xf00f0ef4,
0x70b6027c,
0x0947fd11,
+/* 0x02c3: xtileok */
0x980677f0,
0x5b980c5a,
0x00abfd0e,
@@ -374,6 +439,7 @@ uint32_t nvc0_pcopy_code[] = {
0xb70067d0,
0xd0040060,
0x00f80068,
+/* 0x03a8: cmd_exec_set_surface_linear */
0xb7026cf0,
0xb6020260,
0x57980864,
@@ -384,12 +450,15 @@ uint32_t nvc0_pcopy_code[] = {
0x0060b700,
0x06579804,
0xf80067d0,
+/* 0x03d1: cmd_exec_wait */
0xf900f900,
0x0007f110,
0x0604b608,
+/* 0x03dc: loop */
0xf00001cf,
0x1bf40114,
0xfc10fcfa,
+/* 0x03eb: cmd_exec_query */
0xc800f800,
0x1bf40d34,
0xd121f570,
@@ -419,6 +488,7 @@ uint32_t nvc0_pcopy_code[] = {
0x0153f026,
0x080047f1,
0xd00644b6,
+/* 0x045e: query_counter */
0x21f50045,
0x47f103d1,
0x44b6080c,
@@ -442,11 +512,13 @@ uint32_t nvc0_pcopy_code[] = {
0x080047f1,
0xd00644b6,
0x00f80045,
+/* 0x04b8: cmd_exec */
0x03d121f5,
0xf4003fc8,
0x21f50e0b,
0x47f101af,
0x0ef40200,
+/* 0x04cd: cmd_exec_no_format */
0x1067f11e,
0x0664b608,
0x800177f0,
@@ -454,18 +526,23 @@ uint32_t nvc0_pcopy_code[] = {
0x1d079819,
0xd00067d0,
0x44bd4067,
+/* 0x04e8: cmd_exec_init_src_surface */
0xbd0232f4,
0x043fc854,
0xf50a0bf4,
0xf403a821,
+/* 0x04fa: src_tiled */
0x21f50a0e,
0x49f0029c,
+/* 0x0501: cmd_exec_init_dst_surface */
0x0231f407,
0xc82c57f0,
0x0bf4083f,
0xa821f50a,
0x0a0ef403,
+/* 0x0514: dst_tiled */
0x029c21f5,
+/* 0x051b: cmd_exec_kick */
0xf10849f0,
0xb6080057,
0x06980654,
@@ -475,7 +552,9 @@ uint32_t nvc0_pcopy_code[] = {
0x54d00546,
0x0c3fc800,
0xf5070bf4,
+/* 0x053f: cmd_exec_done */
0xf803eb21,
+/* 0x0541: cmd_wrcache_flush */
0x0027f100,
0xf034bd22,
0x23d00133,
reply other threads:[~2012-06-04 10:00 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=4FCC873F.7010906@canonical.com \
--to=maarten.lankhorst@canonical.com \
--cc=airlied@redhat.com \
--cc=bskeggs@redhat.com \
--cc=dri-devel@lists.freedesktop.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.