linux-fbdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Antonino A. Daplas" <adaplas@pol.net>
To: "Antonino A. Daplas" <adaplas@pol.net>
Cc: linux-fbdev-devel@lists.sourceforge.net,
	mardy@users.sourceforge.net, Richard Purdie <rpurdie@rpsys.net>
Subject: Re: [PATCH] Add acceleration support to ATI Imageon
Date: Thu, 23 Mar 2006 20:53:02 +0800	[thread overview]
Message-ID: <44229A2E.1040109@pol.net> (raw)
In-Reply-To: <4422987A.7030900@pol.net>

Antonino A. Daplas wrote:
> Mardy wrote:
>> Hi, please consider applying the attached patch. It adds acceleration
>> support in w100fb.c (i.e. ATI Imageons) for the copyarea and fillrect
>> operations.
>>
>> Since this is my first submission, please let me know if there's
>> something else that should be done.
>>
>> Comments are welcome.
>>
> 
> This is good.  Just a few comments below... and add a Signed-off-by: line.
> 
> 
>>
>>
>> ------------------------------------------------------------------------
>>
>> Index: linux-2.6.15/drivers/video/w100fb.c
>> ===================================================================
>> --- linux-2.6.15.orig/drivers/video/w100fb.c	2006-01-03 04:21:10.000000000 +0100
>> +++ linux-2.6.15/drivers/video/w100fb.c	2006-03-22 16:29:59.000000000 +0100
>> @@ -4,8 +4,9 @@
>>   * Frame Buffer Device for ATI Imageon w100 (Wallaby)
>>   *
>>   * Copyright (C) 2002, ATI Corp.
>> - * Copyright (C) 2004-2005 Richard Purdie
>> + * Copyright (C) 2004-2006 Richard Purdie
>>   * Copyright (c) 2005 Ian Molton
>> + * Copyright (c) 2006 Alberto Mardegan
>>   *
>>   * Rewritten for 2.6 by Richard Purdie <rpurdie@rpsys.net>
>>   *
>> @@ -14,6 +15,9 @@
>>   *
>>   * w32xx support by Ian Molton
>>   *
>> + * Hardware acceleration support by Alberto Mardegan
>> + * <mardy@users.sourceforge.net>
>> + *
>>   * This program is free software; you can redistribute it and/or modify
>>   * it under the terms of the GNU General Public License version 2 as
>>   * published by the Free Software Foundation.
>> @@ -47,6 +51,7 @@
>>  static void w100_update_enable(void);
>>  static void w100_update_disable(void);
>>  static void calc_hsync(struct w100fb_par *par);
>> +static void w100_init_graphic_engine(struct w100fb_par *par);
>>  struct w100_pll_info *w100_get_xtal_table(unsigned int freq);
>>  
>>  /* Pseudo palette size */
>> @@ -248,6 +253,155 @@
>>  }
>>  
>>  
>> +static void w100_fifo_wait(int entries)
>> +{
>> +	union rbbm_status_u status;
>> +	int i;
>> +
>> +	for (i = 0; i < 2000000; i++) {
>> +		status.val = readl(remapped_regs + mmRBBM_STATUS);
>> +		if (status.f.cmdfifo_avail >= entries)
>> +			return;
>> +		udelay(1);
>> +	}
>> +	printk(KERN_ERR "w100fb: FIFO Timeout!\n");
>> +}
>> +
>> +
>> +static void w100_wait_complete()
>> +{
>> +	union rbbm_status_u status;
>> +	int i;
>> +
>> +	for (i = 0; i < 2000000; i++) {
>> +		status.val = readl(remapped_regs + mmRBBM_STATUS);
>> +		if (!status.f.gui_active)
>> +			return;
>> +		udelay(1);
>> +	}
>> +	printk(KERN_ERR "w100fb: Graphic engine timeout!\n");
>> +}
> 
> I presume that w100_wait_complete is a "wait for engine idle" function?
> 
>> +
>> +
>> +static void w100_init_graphic_engine(struct w100fb_par *par)
>> +{
>> +	union dp_gui_master_cntl_u gmc;
>> +	union dp_mix_u dp_mix;
>> +	union dp_datatype_u dp_datatype;
>> +	union dp_cntl_u dp_cntl;
>> +
>> +	w100_fifo_wait(4);
>> +	writel(W100_FB_BASE, remapped_regs + mmDST_OFFSET);
>> +	writel(par->xres, remapped_regs + mmDST_PITCH);
>> +	writel(W100_FB_BASE, remapped_regs + mmSRC_OFFSET);
>> +	writel(par->xres, remapped_regs + mmSRC_PITCH);
>> +
>> +	w100_fifo_wait(3);
>> +	writel(0, remapped_regs + mmSC_TOP_LEFT);
>> +	writel((par->yres << 16) | par->xres, remapped_regs + mmSC_BOTTOM_RIGHT);
>> +	writel(0x1fff1fff, remapped_regs + mmSRC_SC_BOTTOM_RIGHT);
>> +
>> +	w100_fifo_wait(4);
>> +	dp_cntl.val = 0;
>> +	dp_cntl.f.dst_x_dir = 1;
>> +	dp_cntl.f.dst_y_dir = 1;
>> +	dp_cntl.f.src_x_dir = 1;
>> +	dp_cntl.f.src_y_dir = 1;
>> +	dp_cntl.f.dst_major_x = 1;
>> +	dp_cntl.f.src_major_x = 1;
>> +	writel(dp_cntl.val, remapped_regs + mmDP_CNTL);
>> +
>> +	gmc.val = 0;
>> +	gmc.f.gmc_src_pitch_offset_cntl = 1;
>> +	gmc.f.gmc_dst_pitch_offset_cntl = 1;
>> +	gmc.f.gmc_src_clipping = 1;
>> +	gmc.f.gmc_dst_clipping = 1;
>> +	gmc.f.gmc_brush_datatype = GMC_BRUSH_NONE;
>> +	gmc.f.gmc_dst_datatype = 3; /* from DstType_16Bpp_444 */
>> +	gmc.f.gmc_src_datatype = SRC_DATATYPE_EQU_DST;
>> +	gmc.f.gmc_byte_pix_order = 1;
>> +	gmc.f.gmc_default_sel = 0;
>> +	gmc.f.gmc_rop3 = ROP3_SRCCOPY;
>> +	gmc.f.gmc_dp_src_source = DP_SRC_MEM_RECTANGULAR;
>> +	gmc.f.gmc_clr_cmp_fcn_dis = 1;
>> +	gmc.f.gmc_wr_msk_dis = 1;
>> +	gmc.f.gmc_dp_op = DP_OP_ROP;
>> +	writel(gmc.val, remapped_regs + mmDP_GUI_MASTER_CNTL);
>> +
>> +	dp_datatype.val = dp_mix.val = 0;
>> +	dp_datatype.f.dp_dst_datatype = gmc.f.gmc_dst_datatype;
>> +	dp_datatype.f.dp_brush_datatype = gmc.f.gmc_brush_datatype;
>> +	dp_datatype.f.dp_src2_type = 0;
>> +	dp_datatype.f.dp_src2_datatype = gmc.f.gmc_src_datatype;
>> +	dp_datatype.f.dp_src_datatype = gmc.f.gmc_src_datatype;
>> +	dp_datatype.f.dp_byte_pix_order = gmc.f.gmc_byte_pix_order;
>> +	writel(dp_datatype.val, remapped_regs + mmDP_DATATYPE);
>> +
>> +	dp_mix.f.dp_src_source = gmc.f.gmc_dp_src_source;
>> +	dp_mix.f.dp_src2_source = 1;
>> +	dp_mix.f.dp_rop3 = gmc.f.gmc_rop3;
>> +	dp_mix.f.dp_op = gmc.f.gmc_dp_op;
>> +	writel(dp_mix.val, remapped_regs + mmDP_MIX);
>> +}
>> +
>> +
>> +static void w100fb_fillrect(struct fb_info *info,
>> +                            const struct fb_fillrect *rect)
>> +{
>> +	union dp_gui_master_cntl_u gmc;
>> +
>> +	if (info->state != FBINFO_STATE_RUNNING)
>> +		return;
>> +	if (info->flags & FBINFO_HWACCEL_DISABLED) {
>> +		cfb_fillrect(info, rect);
>> +		return;
>> +	}
>> +
>> +	gmc.val = readl(remapped_regs + mmDP_GUI_MASTER_CNTL);
>> +	gmc.f.gmc_rop3 = ROP3_PATCOPY;
>> +	gmc.f.gmc_brush_datatype = GMC_BRUSH_SOLID_COLOR;
>> +	w100_fifo_wait(2);
>> +	writel(gmc.val, remapped_regs + mmDP_GUI_MASTER_CNTL);
>> +	writel(rect->color, remapped_regs + mmDP_BRUSH_FRGD_CLR);
>> +
>> +	w100_fifo_wait(2);
>> +	writel((rect->dy << 16) | (rect->dx & 0xffff), remapped_regs + mmDST_Y_X);
>> +	writel((rect->width << 16) | (rect->height & 0xffff),
>> +	       remapped_regs + mmDST_WIDTH_HEIGHT);
>> +
>> +	w100_wait_complete();
>> +}
> 
> It's probably more optimal if you need not wait for engine idle here so
> you can maximize the thoroughput of the GPU... If the framebuffer
> needs to be accessed directly, fbcon will call fbops->fb_sync().
> 
>> +
>> +
>> +static void w100fb_copyarea(struct fb_info *info,
>> +                            const struct fb_copyarea *area)
>> +{
>> +	u32 dx = area->dx, dy = area->dy, sx = area->sx, sy = area->sy;
>> +	u32 h = area->height, w = area->width;
>> +	union dp_gui_master_cntl_u gmc;
>> +
>> +	if (info->state != FBINFO_STATE_RUNNING)
>> +		return;
>> +	if (info->flags & FBINFO_HWACCEL_DISABLED) {
>> +		cfb_copyarea(info, area);
>> +		return;
>> +	}
>> +
>> +	gmc.val = readl(remapped_regs + mmDP_GUI_MASTER_CNTL);
>> +	gmc.f.gmc_rop3 = ROP3_SRCCOPY;
>> +	gmc.f.gmc_brush_datatype = GMC_BRUSH_NONE;
>> +	w100_fifo_wait(1);
>> +	writel(gmc.val, remapped_regs + mmDP_GUI_MASTER_CNTL);
>> +
>> +	w100_fifo_wait(3);
>> +	writel((sy << 16) | (sx & 0xffff), remapped_regs + mmSRC_Y_X);
>> +	writel((dy << 16) | (dx & 0xffff), remapped_regs + mmDST_Y_X);
>> +	writel((w << 16) | (h & 0xffff), remapped_regs + mmDST_WIDTH_HEIGHT);
>> +
>> +	w100_wait_complete();
>> +}
> 
> ... and here too...
> 
>> +
>> +
>>  /*
>>   *  Change the resolution by calling the appropriate hardware functions
>>   */
>> @@ -265,6 +419,7 @@
>>  	w100_init_lcd(par);
>>  	w100_set_dispregs(par);
>>  	w100_update_enable();
>> +	w100_init_graphic_engine(par);
>>  
>>  	calc_hsync(par);
>>  
>> @@ -394,8 +549,8 @@
>>  	.fb_set_par   = w100fb_set_par,
>>  	.fb_setcolreg = w100fb_setcolreg,
>>  	.fb_blank     = w100fb_blank,
>> -	.fb_fillrect  = cfb_fillrect,
>> -	.fb_copyarea  = cfb_copyarea,
>> +	.fb_fillrect  = w100fb_fillrect,
>> +	.fb_copyarea  = w100fb_copyarea,
>>  	.fb_imageblit = cfb_imageblit,
>>  };
> 
> You can wrap w100_wait_complete() as w100fb_sync() and add this to fb_ops, ie
> 
> .fb_sync = w100fb_sync.
> 
> 
>>  
>> @@ -543,7 +698,8 @@
>>  	}
>>  
>>  	info->fbops = &w100fb_ops;
>> -	info->flags = FBINFO_DEFAULT;
>> +	info->flags = FBINFO_DEFAULT + FBINFO_HWACCEL_COPYAREA +
>> +		FBINFO_HWACCEL_FILLRECT;

Although the result is the same, I prefer an OR (|) rather than
addition (+)... At first glance, an OR makes you think of bitsetting
while the addition is a bit confusing at first.


>>  	info->node = -1;
>>  	info->screen_base = remapped_fbuf + (W100_FB_BASE-MEM_WINDOW_BASE);
>>  	info->screen_size = REMAPPED_FB_LEN;
>> Index: linux-2.6.15/drivers/video/w100fb.h
>> ===================================================================
>> --- linux-2.6.15.orig/drivers/video/w100fb.h	2006-01-03 04:21:10.000000000 +0100
>> +++ linux-2.6.15/drivers/video/w100fb.h	2006-03-22 16:10:45.000000000 +0100
>> @@ -122,15 +122,32 @@
>>  /* Block DISPLAY End: */
>>  
>>  /* Block GFX Start: */
>> +#define mmDST_OFFSET          0x1004
>> +#define mmDST_PITCH           0x1008
>> +#define mmDST_Y_X             0x1038
>> +#define mmDST_WIDTH_HEIGHT    0x1198
>> +#define mmDP_GUI_MASTER_CNTL  0x106C
>>  #define mmBRUSH_OFFSET        0x108C
>>  #define mmBRUSH_Y_X           0x1074
>> +#define mmDP_BRUSH_FRGD_CLR   0x107C
>> +#define mmSRC_OFFSET          0x11AC
>> +#define mmSRC_PITCH           0x11B0
>> +#define mmSRC_Y_X             0x1034
>>  #define mmDEFAULT_PITCH_OFFSET      0x10A0
>>  #define mmDEFAULT_SC_BOTTOM_RIGHT   0x10A8
>>  #define mmDEFAULT2_SC_BOTTOM_RIGHT  0x10AC
>> +#define mmSC_TOP_LEFT         0x11BC
>> +#define mmSC_BOTTOM_RIGHT     0x11C0
>> +#define mmSRC_SC_BOTTOM_RIGHT 0x11C4
>>  #define mmGLOBAL_ALPHA        0x1210
>>  #define mmFILTER_COEF         0x1214
>>  #define mmMVC_CNTL_START      0x11E0
>>  #define mmE2_ARITHMETIC_CNTL  0x1220
>> +#define mmDP_CNTL             0x11C8
>> +#define mmDP_CNTL_DST_DIR     0x11CC
>> +#define mmDP_DATATYPE         0x12C4
>> +#define mmDP_MIX              0x12C8
>> +#define mmDP_WRITE_MSK        0x12CC
>>  #define mmENG_CNTL            0x13E8
>>  #define mmENG_PERF_CNT        0x13F0
>>  /* Block GFX End: */
>> @@ -179,6 +196,7 @@
>>  /* Block RBBM Start: */
>>  #define mmWAIT_UNTIL        0x1400
>>  #define mmISYNC_CNTL        0x1404
>> +#define mmRBBM_STATUS       0x0140
>>  #define mmRBBM_CNTL         0x0144
>>  #define mmNQWAIT_UNTIL      0x0150
>>  /* Block RBBM End: */
>> @@ -766,5 +784,145 @@
>>  	struct pwrmgt_cntl_t f;
>>  } __attribute__((packed));
>>  
>> +#define SRC_DATATYPE_EQU_DST	3
>> +
>> +#define ROP3_SRCCOPY	0xcc
>> +#define ROP3_PATCOPY	0xf0
>> +
>> +#define GMC_BRUSH_SOLID_COLOR	13
>> +#define GMC_BRUSH_NONE			15
>> +
>> +#define DP_SRC_MEM_RECTANGULAR	2
>> +
>> +#define DP_OP_ROP	0
>> +
>> +struct dp_gui_master_cntl_t {
>> +	unsigned long gmc_src_pitch_offset_cntl		 : 1;
>> +	unsigned long gmc_dst_pitch_offset_cntl		 : 1;
>> +	unsigned long gmc_src_clipping				 : 1;
>> +	unsigned long gmc_dst_clipping				 : 1;
>> +	unsigned long gmc_brush_datatype			 : 4;
>> +	unsigned long gmc_dst_datatype				 : 4;
>> +	unsigned long gmc_src_datatype				 : 3;
>> +	unsigned long gmc_byte_pix_order			 : 1;
>> +	unsigned long gmc_default_sel				 : 1;
>> +	unsigned long gmc_rop3						 : 8;
>> +	unsigned long gmc_dp_src_source				 : 3;
>> +	unsigned long gmc_clr_cmp_fcn_dis			 : 1;
>> +	unsigned long								 : 1;
>> +	unsigned long gmc_wr_msk_dis				 : 1;
>> +	unsigned long gmc_dp_op						 : 1;
>> +} __attribute__((packed));
>> +
>> +union dp_gui_master_cntl_u {
>> +	unsigned long val : 32;
>> +	struct dp_gui_master_cntl_t f;
>> +} __attribute__((packed));
>> +
>> +struct rbbm_status_t {
>> +	unsigned long cmdfifo_avail					 : 7;
>> +	unsigned long								 : 1;
>> +	unsigned long hirq_on_rbb					 : 1;
>> +	unsigned long cprq_on_rbb					 : 1;
>> +	unsigned long cfrq_on_rbb					 : 1;
>> +	unsigned long hirq_in_rtbuf					 : 1;
>> +	unsigned long cprq_in_rtbuf					 : 1;
>> +	unsigned long cfrq_in_rtbuf					 : 1;
>> +	unsigned long cf_pipe_busy					 : 1;
>> +	unsigned long eng_ev_busy					 : 1;
>> +	unsigned long cp_cmdstrm_busy				 : 1;
>> +	unsigned long e2_busy						 : 1;
>> +	unsigned long rb2d_busy						 : 1;
>> +	unsigned long rb3d_busy						 : 1;
>> +	unsigned long se_busy						 : 1;
>> +	unsigned long re_busy						 : 1;
>> +	unsigned long tam_busy						 : 1;
>> +	unsigned long tdm_busy						 : 1;
>> +	unsigned long pb_busy						 : 1;
>> +	unsigned long								 : 6;
>> +	unsigned long gui_active					 : 1;
>> +} __attribute__((packed));
>> +
>> +union rbbm_status_u {
>> +	unsigned long val : 32;
>> +	struct rbbm_status_t f;
>> +} __attribute__((packed));
>> +
>> +struct dp_datatype_t {
>> +	unsigned long dp_dst_datatype                : 4;
>> +	unsigned long                                : 4;
>> +	unsigned long dp_brush_datatype              : 4;
>> +	unsigned long dp_src2_type                   : 1;
>> +	unsigned long dp_src2_datatype               : 3;
>> +	unsigned long dp_src_datatype                : 3;
>> +	unsigned long                                : 11;
>> +	unsigned long dp_byte_pix_order              : 1;
>> +	unsigned long                                : 1;
>> +} __attribute__((packed));
>> +
>> +union dp_datatype_u {
>> +	unsigned long val : 32;
>> +	struct dp_datatype_t f;
>> +} __attribute__((packed));
>> +
>> +struct dp_mix_t {
>> +	unsigned long                                : 8;
>> +	unsigned long dp_src_source                  : 3;
>> +	unsigned long dp_src2_source                 : 3;
>> +	unsigned long                                : 2;
>> +	unsigned long dp_rop3                        : 8;
>> +	unsigned long dp_op                          : 1;
>> +	unsigned long                                : 7;
>> +} __attribute__((packed));
>> +
>> +union dp_mix_u {
>> +	unsigned long val : 32;
>> +	struct dp_mix_t f;
>> +} __attribute__((packed));
>> +
>> +struct eng_cntl_t {
>> +	unsigned long erc_reg_rd_ws                  : 1;
>> +	unsigned long erc_reg_wr_ws                  : 1;
>> +	unsigned long erc_idle_reg_wr                : 1;
>> +	unsigned long dis_engine_triggers            : 1;
>> +	unsigned long dis_rop_src_uses_dst_w_h       : 1;
>> +	unsigned long dis_src_uses_dst_dirmaj        : 1;
>> +	unsigned long                                : 6;
>> +	unsigned long force_3dclk_when_2dclk         : 1;
>> +	unsigned long                                : 19;
>> +} __attribute__((packed));
>> +
>> +union eng_cntl_u {
>> +	unsigned long val : 32;
>> +	struct eng_cntl_t f;
>> +} __attribute__((packed));
>> +
>> +struct dp_cntl_t {
>> +	unsigned long dst_x_dir                      : 1;
>> +	unsigned long dst_y_dir                      : 1;
>> +	unsigned long src_x_dir                      : 1;
>> +	unsigned long src_y_dir                      : 1;
>> +	unsigned long dst_major_x                    : 1;
>> +	unsigned long src_major_x                    : 1;
>> +	unsigned long                                : 26;
>> +} __attribute__((packed));
>> +
>> +union dp_cntl_u {
>> +	unsigned long val : 32;
>> +	struct dp_cntl_t f;
>> +} __attribute__((packed));
>> +
>> +struct dp_cntl_dst_dir_t {
>> +	unsigned long                                : 15;
>> +	unsigned long dst_y_dir                      : 1;
>> +	unsigned long                                : 15;
>> +	unsigned long dst_x_dir                      : 1;
>> +} __attribute__((packed));
>> +
>> +union dp_cntl_dst_dir_u {
>> +	unsigned long val : 32;
>> +	struct dp_cntl_dst_dir_t f;
>> +} __attribute__((packed));
>> +
>>  #endif
>>  
> 
> 
> 



-------------------------------------------------------
This SF.Net email is sponsored by xPML, a groundbreaking scripting language
that extends applications into web and mobile media. Attend the live webcast
and join the prime developer group breaking into this new coding territory!
http://sel.as-us.falkag.net/sel?cmd=lnk&kid=110944&bid=241720&dat=121642

  parent reply	other threads:[~2006-03-23 13:00 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-03-23 11:29 [PATCH] Add acceleration support to ATI Imageon Mardy
2006-03-23 12:23 ` Geert Uytterhoeven
2006-03-23 13:22   ` Mardy
     [not found] ` <4422987A.7030900@pol.net>
2006-03-23 12:53   ` Antonino A. Daplas [this message]
2006-03-23 13:28     ` Mardy
2006-03-23 17:18       ` [PATCH] Add acceleration support to ATI Imageon (new patch) Mardy
2006-03-23 17:35         ` [PATCH2] w100fb.h: convert bitfields to u32 Mardy
2006-03-25  0:39         ` [PATCH] Add acceleration support to ATI Imageon (new patch) Antonino A. Daplas

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=44229A2E.1040109@pol.net \
    --to=adaplas@pol.net \
    --cc=linux-fbdev-devel@lists.sourceforge.net \
    --cc=mardy@users.sourceforge.net \
    --cc=rpurdie@rpsys.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).