From: "Antonino A. Daplas" <adaplas@pol.net>
To: "Antonino A. Daplas" <adaplas@pol.net>
Cc: linux-fbdev-devel@lists.sourceforge.net,
mardy@users.sourceforge.net, Richard Purdie <rpurdie@rpsys.net>
Subject: Re: [PATCH] Add acceleration support to ATI Imageon
Date: Thu, 23 Mar 2006 20:53:02 +0800 [thread overview]
Message-ID: <44229A2E.1040109@pol.net> (raw)
In-Reply-To: <4422987A.7030900@pol.net>
Antonino A. Daplas wrote:
> Mardy wrote:
>> Hi, please consider applying the attached patch. It adds acceleration
>> support in w100fb.c (i.e. ATI Imageons) for the copyarea and fillrect
>> operations.
>>
>> Since this is my first submission, please let me know if there's
>> something else that should be done.
>>
>> Comments are welcome.
>>
>
> This is good. Just a few comments below... and add a Signed-off-by: line.
>
>
>>
>>
>> ------------------------------------------------------------------------
>>
>> Index: linux-2.6.15/drivers/video/w100fb.c
>> ===================================================================
>> --- linux-2.6.15.orig/drivers/video/w100fb.c 2006-01-03 04:21:10.000000000 +0100
>> +++ linux-2.6.15/drivers/video/w100fb.c 2006-03-22 16:29:59.000000000 +0100
>> @@ -4,8 +4,9 @@
>> * Frame Buffer Device for ATI Imageon w100 (Wallaby)
>> *
>> * Copyright (C) 2002, ATI Corp.
>> - * Copyright (C) 2004-2005 Richard Purdie
>> + * Copyright (C) 2004-2006 Richard Purdie
>> * Copyright (c) 2005 Ian Molton
>> + * Copyright (c) 2006 Alberto Mardegan
>> *
>> * Rewritten for 2.6 by Richard Purdie <rpurdie@rpsys.net>
>> *
>> @@ -14,6 +15,9 @@
>> *
>> * w32xx support by Ian Molton
>> *
>> + * Hardware acceleration support by Alberto Mardegan
>> + * <mardy@users.sourceforge.net>
>> + *
>> * This program is free software; you can redistribute it and/or modify
>> * it under the terms of the GNU General Public License version 2 as
>> * published by the Free Software Foundation.
>> @@ -47,6 +51,7 @@
>> static void w100_update_enable(void);
>> static void w100_update_disable(void);
>> static void calc_hsync(struct w100fb_par *par);
>> +static void w100_init_graphic_engine(struct w100fb_par *par);
>> struct w100_pll_info *w100_get_xtal_table(unsigned int freq);
>>
>> /* Pseudo palette size */
>> @@ -248,6 +253,155 @@
>> }
>>
>>
>> +static void w100_fifo_wait(int entries)
>> +{
>> + union rbbm_status_u status;
>> + int i;
>> +
>> + for (i = 0; i < 2000000; i++) {
>> + status.val = readl(remapped_regs + mmRBBM_STATUS);
>> + if (status.f.cmdfifo_avail >= entries)
>> + return;
>> + udelay(1);
>> + }
>> + printk(KERN_ERR "w100fb: FIFO Timeout!\n");
>> +}
>> +
>> +
>> +static void w100_wait_complete()
>> +{
>> + union rbbm_status_u status;
>> + int i;
>> +
>> + for (i = 0; i < 2000000; i++) {
>> + status.val = readl(remapped_regs + mmRBBM_STATUS);
>> + if (!status.f.gui_active)
>> + return;
>> + udelay(1);
>> + }
>> + printk(KERN_ERR "w100fb: Graphic engine timeout!\n");
>> +}
>
> I presume that w100_wait_complete is a "wait for engine idle" function?
>
>> +
>> +
>> +static void w100_init_graphic_engine(struct w100fb_par *par)
>> +{
>> + union dp_gui_master_cntl_u gmc;
>> + union dp_mix_u dp_mix;
>> + union dp_datatype_u dp_datatype;
>> + union dp_cntl_u dp_cntl;
>> +
>> + w100_fifo_wait(4);
>> + writel(W100_FB_BASE, remapped_regs + mmDST_OFFSET);
>> + writel(par->xres, remapped_regs + mmDST_PITCH);
>> + writel(W100_FB_BASE, remapped_regs + mmSRC_OFFSET);
>> + writel(par->xres, remapped_regs + mmSRC_PITCH);
>> +
>> + w100_fifo_wait(3);
>> + writel(0, remapped_regs + mmSC_TOP_LEFT);
>> + writel((par->yres << 16) | par->xres, remapped_regs + mmSC_BOTTOM_RIGHT);
>> + writel(0x1fff1fff, remapped_regs + mmSRC_SC_BOTTOM_RIGHT);
>> +
>> + w100_fifo_wait(4);
>> + dp_cntl.val = 0;
>> + dp_cntl.f.dst_x_dir = 1;
>> + dp_cntl.f.dst_y_dir = 1;
>> + dp_cntl.f.src_x_dir = 1;
>> + dp_cntl.f.src_y_dir = 1;
>> + dp_cntl.f.dst_major_x = 1;
>> + dp_cntl.f.src_major_x = 1;
>> + writel(dp_cntl.val, remapped_regs + mmDP_CNTL);
>> +
>> + gmc.val = 0;
>> + gmc.f.gmc_src_pitch_offset_cntl = 1;
>> + gmc.f.gmc_dst_pitch_offset_cntl = 1;
>> + gmc.f.gmc_src_clipping = 1;
>> + gmc.f.gmc_dst_clipping = 1;
>> + gmc.f.gmc_brush_datatype = GMC_BRUSH_NONE;
>> + gmc.f.gmc_dst_datatype = 3; /* from DstType_16Bpp_444 */
>> + gmc.f.gmc_src_datatype = SRC_DATATYPE_EQU_DST;
>> + gmc.f.gmc_byte_pix_order = 1;
>> + gmc.f.gmc_default_sel = 0;
>> + gmc.f.gmc_rop3 = ROP3_SRCCOPY;
>> + gmc.f.gmc_dp_src_source = DP_SRC_MEM_RECTANGULAR;
>> + gmc.f.gmc_clr_cmp_fcn_dis = 1;
>> + gmc.f.gmc_wr_msk_dis = 1;
>> + gmc.f.gmc_dp_op = DP_OP_ROP;
>> + writel(gmc.val, remapped_regs + mmDP_GUI_MASTER_CNTL);
>> +
>> + dp_datatype.val = dp_mix.val = 0;
>> + dp_datatype.f.dp_dst_datatype = gmc.f.gmc_dst_datatype;
>> + dp_datatype.f.dp_brush_datatype = gmc.f.gmc_brush_datatype;
>> + dp_datatype.f.dp_src2_type = 0;
>> + dp_datatype.f.dp_src2_datatype = gmc.f.gmc_src_datatype;
>> + dp_datatype.f.dp_src_datatype = gmc.f.gmc_src_datatype;
>> + dp_datatype.f.dp_byte_pix_order = gmc.f.gmc_byte_pix_order;
>> + writel(dp_datatype.val, remapped_regs + mmDP_DATATYPE);
>> +
>> + dp_mix.f.dp_src_source = gmc.f.gmc_dp_src_source;
>> + dp_mix.f.dp_src2_source = 1;
>> + dp_mix.f.dp_rop3 = gmc.f.gmc_rop3;
>> + dp_mix.f.dp_op = gmc.f.gmc_dp_op;
>> + writel(dp_mix.val, remapped_regs + mmDP_MIX);
>> +}
>> +
>> +
>> +static void w100fb_fillrect(struct fb_info *info,
>> + const struct fb_fillrect *rect)
>> +{
>> + union dp_gui_master_cntl_u gmc;
>> +
>> + if (info->state != FBINFO_STATE_RUNNING)
>> + return;
>> + if (info->flags & FBINFO_HWACCEL_DISABLED) {
>> + cfb_fillrect(info, rect);
>> + return;
>> + }
>> +
>> + gmc.val = readl(remapped_regs + mmDP_GUI_MASTER_CNTL);
>> + gmc.f.gmc_rop3 = ROP3_PATCOPY;
>> + gmc.f.gmc_brush_datatype = GMC_BRUSH_SOLID_COLOR;
>> + w100_fifo_wait(2);
>> + writel(gmc.val, remapped_regs + mmDP_GUI_MASTER_CNTL);
>> + writel(rect->color, remapped_regs + mmDP_BRUSH_FRGD_CLR);
>> +
>> + w100_fifo_wait(2);
>> + writel((rect->dy << 16) | (rect->dx & 0xffff), remapped_regs + mmDST_Y_X);
>> + writel((rect->width << 16) | (rect->height & 0xffff),
>> + remapped_regs + mmDST_WIDTH_HEIGHT);
>> +
>> + w100_wait_complete();
>> +}
>
> It's probably more optimal if you need not wait for engine idle here so
> you can maximize the thoroughput of the GPU... If the framebuffer
> needs to be accessed directly, fbcon will call fbops->fb_sync().
>
>> +
>> +
>> +static void w100fb_copyarea(struct fb_info *info,
>> + const struct fb_copyarea *area)
>> +{
>> + u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
>> + u32 h = area->height, w = area->width;
>> + union dp_gui_master_cntl_u gmc;
>> +
>> + if (info->state != FBINFO_STATE_RUNNING)
>> + return;
>> + if (info->flags & FBINFO_HWACCEL_DISABLED) {
>> + cfb_copyarea(info, area);
>> + return;
>> + }
>> +
>> + gmc.val = readl(remapped_regs + mmDP_GUI_MASTER_CNTL);
>> + gmc.f.gmc_rop3 = ROP3_SRCCOPY;
>> + gmc.f.gmc_brush_datatype = GMC_BRUSH_NONE;
>> + w100_fifo_wait(1);
>> + writel(gmc.val, remapped_regs + mmDP_GUI_MASTER_CNTL);
>> +
>> + w100_fifo_wait(3);
>> + writel((sy << 16) | (sx & 0xffff), remapped_regs + mmSRC_Y_X);
>> + writel((dy << 16) | (dx & 0xffff), remapped_regs + mmDST_Y_X);
>> + writel((w << 16) | (h & 0xffff), remapped_regs + mmDST_WIDTH_HEIGHT);
>> +
>> + w100_wait_complete();
>> +}
>
> ... and here too...
>
>> +
>> +
>> /*
>> * Change the resolution by calling the appropriate hardware functions
>> */
>> @@ -265,6 +419,7 @@
>> w100_init_lcd(par);
>> w100_set_dispregs(par);
>> w100_update_enable();
>> + w100_init_graphic_engine(par);
>>
>> calc_hsync(par);
>>
>> @@ -394,8 +549,8 @@
>> .fb_set_par = w100fb_set_par,
>> .fb_setcolreg = w100fb_setcolreg,
>> .fb_blank = w100fb_blank,
>> - .fb_fillrect = cfb_fillrect,
>> - .fb_copyarea = cfb_copyarea,
>> + .fb_fillrect = w100fb_fillrect,
>> + .fb_copyarea = w100fb_copyarea,
>> .fb_imageblit = cfb_imageblit,
>> };
>
> You can wrap w100_wait_complete() as w100fb_sync() and add this to fb_ops, ie
>
> .fb_sync = w100fb_sync.
>
>
>>
>> @@ -543,7 +698,8 @@
>> }
>>
>> info->fbops = &w100fb_ops;
>> - info->flags = FBINFO_DEFAULT;
>> + info->flags = FBINFO_DEFAULT + FBINFO_HWACCEL_COPYAREA +
>> + FBINFO_HWACCEL_FILLRECT;
Although the result is the same, I prefer an OR (|) rather than
addition (+)... At first glance, an OR makes you think of bitsetting
while the addition is a bit confusing at first.
>> info->node = -1;
>> info->screen_base = remapped_fbuf + (W100_FB_BASE-MEM_WINDOW_BASE);
>> info->screen_size = REMAPPED_FB_LEN;
>> Index: linux-2.6.15/drivers/video/w100fb.h
>> ===================================================================
>> --- linux-2.6.15.orig/drivers/video/w100fb.h 2006-01-03 04:21:10.000000000 +0100
>> +++ linux-2.6.15/drivers/video/w100fb.h 2006-03-22 16:10:45.000000000 +0100
>> @@ -122,15 +122,32 @@
>> /* Block DISPLAY End: */
>>
>> /* Block GFX Start: */
>> +#define mmDST_OFFSET 0x1004
>> +#define mmDST_PITCH 0x1008
>> +#define mmDST_Y_X 0x1038
>> +#define mmDST_WIDTH_HEIGHT 0x1198
>> +#define mmDP_GUI_MASTER_CNTL 0x106C
>> #define mmBRUSH_OFFSET 0x108C
>> #define mmBRUSH_Y_X 0x1074
>> +#define mmDP_BRUSH_FRGD_CLR 0x107C
>> +#define mmSRC_OFFSET 0x11AC
>> +#define mmSRC_PITCH 0x11B0
>> +#define mmSRC_Y_X 0x1034
>> #define mmDEFAULT_PITCH_OFFSET 0x10A0
>> #define mmDEFAULT_SC_BOTTOM_RIGHT 0x10A8
>> #define mmDEFAULT2_SC_BOTTOM_RIGHT 0x10AC
>> +#define mmSC_TOP_LEFT 0x11BC
>> +#define mmSC_BOTTOM_RIGHT 0x11C0
>> +#define mmSRC_SC_BOTTOM_RIGHT 0x11C4
>> #define mmGLOBAL_ALPHA 0x1210
>> #define mmFILTER_COEF 0x1214
>> #define mmMVC_CNTL_START 0x11E0
>> #define mmE2_ARITHMETIC_CNTL 0x1220
>> +#define mmDP_CNTL 0x11C8
>> +#define mmDP_CNTL_DST_DIR 0x11CC
>> +#define mmDP_DATATYPE 0x12C4
>> +#define mmDP_MIX 0x12C8
>> +#define mmDP_WRITE_MSK 0x12CC
>> #define mmENG_CNTL 0x13E8
>> #define mmENG_PERF_CNT 0x13F0
>> /* Block GFX End: */
>> @@ -179,6 +196,7 @@
>> /* Block RBBM Start: */
>> #define mmWAIT_UNTIL 0x1400
>> #define mmISYNC_CNTL 0x1404
>> +#define mmRBBM_STATUS 0x0140
>> #define mmRBBM_CNTL 0x0144
>> #define mmNQWAIT_UNTIL 0x0150
>> /* Block RBBM End: */
>> @@ -766,5 +784,145 @@
>> struct pwrmgt_cntl_t f;
>> } __attribute__((packed));
>>
>> +#define SRC_DATATYPE_EQU_DST 3
>> +
>> +#define ROP3_SRCCOPY 0xcc
>> +#define ROP3_PATCOPY 0xf0
>> +
>> +#define GMC_BRUSH_SOLID_COLOR 13
>> +#define GMC_BRUSH_NONE 15
>> +
>> +#define DP_SRC_MEM_RECTANGULAR 2
>> +
>> +#define DP_OP_ROP 0
>> +
>> +struct dp_gui_master_cntl_t {
>> + unsigned long gmc_src_pitch_offset_cntl : 1;
>> + unsigned long gmc_dst_pitch_offset_cntl : 1;
>> + unsigned long gmc_src_clipping : 1;
>> + unsigned long gmc_dst_clipping : 1;
>> + unsigned long gmc_brush_datatype : 4;
>> + unsigned long gmc_dst_datatype : 4;
>> + unsigned long gmc_src_datatype : 3;
>> + unsigned long gmc_byte_pix_order : 1;
>> + unsigned long gmc_default_sel : 1;
>> + unsigned long gmc_rop3 : 8;
>> + unsigned long gmc_dp_src_source : 3;
>> + unsigned long gmc_clr_cmp_fcn_dis : 1;
>> + unsigned long : 1;
>> + unsigned long gmc_wr_msk_dis : 1;
>> + unsigned long gmc_dp_op : 1;
>> +} __attribute__((packed));
>> +
>> +union dp_gui_master_cntl_u {
>> + unsigned long val : 32;
>> + struct dp_gui_master_cntl_t f;
>> +} __attribute__((packed));
>> +
>> +struct rbbm_status_t {
>> + unsigned long cmdfifo_avail : 7;
>> + unsigned long : 1;
>> + unsigned long hirq_on_rbb : 1;
>> + unsigned long cprq_on_rbb : 1;
>> + unsigned long cfrq_on_rbb : 1;
>> + unsigned long hirq_in_rtbuf : 1;
>> + unsigned long cprq_in_rtbuf : 1;
>> + unsigned long cfrq_in_rtbuf : 1;
>> + unsigned long cf_pipe_busy : 1;
>> + unsigned long eng_ev_busy : 1;
>> + unsigned long cp_cmdstrm_busy : 1;
>> + unsigned long e2_busy : 1;
>> + unsigned long rb2d_busy : 1;
>> + unsigned long rb3d_busy : 1;
>> + unsigned long se_busy : 1;
>> + unsigned long re_busy : 1;
>> + unsigned long tam_busy : 1;
>> + unsigned long tdm_busy : 1;
>> + unsigned long pb_busy : 1;
>> + unsigned long : 6;
>> + unsigned long gui_active : 1;
>> +} __attribute__((packed));
>> +
>> +union rbbm_status_u {
>> + unsigned long val : 32;
>> + struct rbbm_status_t f;
>> +} __attribute__((packed));
>> +
>> +struct dp_datatype_t {
>> + unsigned long dp_dst_datatype : 4;
>> + unsigned long : 4;
>> + unsigned long dp_brush_datatype : 4;
>> + unsigned long dp_src2_type : 1;
>> + unsigned long dp_src2_datatype : 3;
>> + unsigned long dp_src_datatype : 3;
>> + unsigned long : 11;
>> + unsigned long dp_byte_pix_order : 1;
>> + unsigned long : 1;
>> +} __attribute__((packed));
>> +
>> +union dp_datatype_u {
>> + unsigned long val : 32;
>> + struct dp_datatype_t f;
>> +} __attribute__((packed));
>> +
>> +struct dp_mix_t {
>> + unsigned long : 8;
>> + unsigned long dp_src_source : 3;
>> + unsigned long dp_src2_source : 3;
>> + unsigned long : 2;
>> + unsigned long dp_rop3 : 8;
>> + unsigned long dp_op : 1;
>> + unsigned long : 7;
>> +} __attribute__((packed));
>> +
>> +union dp_mix_u {
>> + unsigned long val : 32;
>> + struct dp_mix_t f;
>> +} __attribute__((packed));
>> +
>> +struct eng_cntl_t {
>> + unsigned long erc_reg_rd_ws : 1;
>> + unsigned long erc_reg_wr_ws : 1;
>> + unsigned long erc_idle_reg_wr : 1;
>> + unsigned long dis_engine_triggers : 1;
>> + unsigned long dis_rop_src_uses_dst_w_h : 1;
>> + unsigned long dis_src_uses_dst_dirmaj : 1;
>> + unsigned long : 6;
>> + unsigned long force_3dclk_when_2dclk : 1;
>> + unsigned long : 19;
>> +} __attribute__((packed));
>> +
>> +union eng_cntl_u {
>> + unsigned long val : 32;
>> + struct eng_cntl_t f;
>> +} __attribute__((packed));
>> +
>> +struct dp_cntl_t {
>> + unsigned long dst_x_dir : 1;
>> + unsigned long dst_y_dir : 1;
>> + unsigned long src_x_dir : 1;
>> + unsigned long src_y_dir : 1;
>> + unsigned long dst_major_x : 1;
>> + unsigned long src_major_x : 1;
>> + unsigned long : 26;
>> +} __attribute__((packed));
>> +
>> +union dp_cntl_u {
>> + unsigned long val : 32;
>> + struct dp_cntl_t f;
>> +} __attribute__((packed));
>> +
>> +struct dp_cntl_dst_dir_t {
>> + unsigned long : 15;
>> + unsigned long dst_y_dir : 1;
>> + unsigned long : 15;
>> + unsigned long dst_x_dir : 1;
>> +} __attribute__((packed));
>> +
>> +union dp_cntl_dst_dir_u {
>> + unsigned long val : 32;
>> + struct dp_cntl_dst_dir_t f;
>> +} __attribute__((packed));
>> +
>> #endif
>>
>
>
>
-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642
next prev parent reply other threads:[~2006-03-23 13:00 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-03-23 11:29 [PATCH] Add acceleration support to ATI Imageon Mardy
2006-03-23 12:23 ` Geert Uytterhoeven
2006-03-23 13:22 ` Mardy
[not found] ` <4422987A.7030900@pol.net>
2006-03-23 12:53 ` Antonino A. Daplas [this message]
2006-03-23 13:28 ` Mardy
2006-03-23 17:18 ` [PATCH] Add acceleration support to ATI Imageon (new patch) Mardy
2006-03-23 17:35 ` [PATCH2] w100fb.h: convert bitfields to u32 Mardy
2006-03-25 0:39 ` [PATCH] Add acceleration support to ATI Imageon (new patch) Antonino A. Daplas
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=44229A2E.1040109@pol.net \
--to=adaplas@pol.net \
--cc=linux-fbdev-devel@lists.sourceforge.net \
--cc=mardy@users.sourceforge.net \
--cc=rpurdie@rpsys.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).