From mboxrd@z Thu Jan 1 00:00:00 1970 From: Paul Mundt Subject: [PATCH] kyrofb support Date: Tue, 13 Jan 2004 20:33:06 -0500 Sender: linux-fbdev-devel-admin@lists.sourceforge.net Message-ID: <20040114013306.GB9515@linux-sh.org> Mime-Version: 1.0 Content-Type: multipart/signed; micalg=pgp-sha1; protocol="application/pgp-signature"; boundary="3lcZGd9BuhuYXNfi" Return-path: Received: from sc8-sf-mx1-b.sourceforge.net ([10.3.1.11] helo=sc8-sf-mx1.sourceforge.net) by sc8-sf-list1.sourceforge.net with esmtp (Exim 4.30) id 1AgZuX-0000TU-1G for linux-fbdev-devel@lists.sourceforge.net; Tue, 13 Jan 2004 17:33:33 -0800 Received: from smtp.golden.net ([199.166.210.31] helo=newsmtp.golden.net) by sc8-sf-mx1.sourceforge.net with esmtp (TLSv1:AES256-SHA:256) (Exim 4.30) id 1AgZuV-0005mZ-A9 for linux-fbdev-devel@lists.sourceforge.net; Tue, 13 Jan 2004 17:33:31 -0800 Content-Disposition: inline Errors-To: linux-fbdev-devel-admin@lists.sourceforge.net List-Unsubscribe: , List-Id: List-Post: List-Help: List-Subscribe: , List-Archive: To: linux-fbdev-devel@lists.sourceforge.net Cc: James Simmons , Andrew Morton --3lcZGd9BuhuYXNfi Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Content-Transfer-Encoding: quoted-printable This patch adds support for the Kyro graphics boards (STG4000/PowerVR 3/etc= .) to 2.6.1. This is a direct port and substantial cleanup / rewrite of the 2.4 driver that's available in the sh64 tree at linux-sh.bkbits.net. Some of the overlay code and the STG4000 bits are still a bit ugly, so be forewarned. Andrew, please consider for -mm unless anyone has any glaring objections. drivers/video/Kconfig | 10=20 drivers/video/Makefile | 1=20 drivers/video/fbmem.c | 6=20 drivers/video/kyro/Makefile | 13=20 drivers/video/kyro/STG4000InitDevice.c | 328 ++++++++++++ drivers/video/kyro/STG4000Interface.h | 60 ++ drivers/video/kyro/STG4000OverlayDevice.c | 609 ++++++++++++++++++++++ drivers/video/kyro/STG4000Ramdac.c | 176 ++++++ drivers/video/kyro/STG4000Reg.h | 285 ++++++++++ drivers/video/kyro/STG4000VTG.c | 173 ++++++ drivers/video/kyro/fbdev.c | 812 +++++++++++++++++++++++++= +++++ include/video/kyro.h | 101 +++ 12 files changed, 2574 insertions(+) --- linux-2.6.1/drivers/video/Kconfig Thu Oct 9 09:42:00 2003 +++ fb--devel--2.6/drivers/video/Kconfig Wed Jan 7 17:21:30 2004 @@ -734,6 +734,16 @@ config FB_NEOMAGIC To compile this driver as a module, choose M here: the module will be called neofb. =20 +config FB_KYRO + tristate "IMG Kyro support" + depends on FB && PCI + help + Say Y here if you have a STG4000 / Kyro / PowerVR 3 based + graphics board. + + To compile this driver as a module, choose M here: the + module will be called kyrofb. + config FB_3DFX tristate "3Dfx Banshee/Voodoo3 display support" depends on FB && PCI --- linux-2.6.1/drivers/video/Makefile Mon Sep 8 18:33:16 2003 +++ fb--devel--2.6/drivers/video/Makefile Wed Jan 7 17:15:24 2004 @@ -70,6 +70,7 @@ obj-$(CONFIG_FB_VIRTUAL) +=3D vfb obj-$(CONFIG_FB_HIT) +=3D hitfb.o cfbfillrect.o cfbcopyarea.o= cfbimgblt.o obj-$(CONFIG_FB_E1355) +=3D epson1355fb.o obj-$(CONFIG_FB_PVR2) +=3D pvr2fb.o cfbfillrect.o cfbcopyarea.= o cfbimgblt.o +obj-$(CONFIG_FB_KYRO) +=3D kyro/ cfbfillrect.o cfbcopyarea.o c= fbimgblt.o obj-$(CONFIG_FB_VOODOO1) +=3D sstfb.o cfbfillrect.o cfbcopyarea.o= cfbimgblt.o =20 obj-$(CONFIG_FB_FFB) +=3D ffb.o sbuslib.o cfbimgblt.o cfbcop= yarea.o --- linux-2.6.1/drivers/video/fbmem.c Mon Sep 8 18:33:17 2003 +++ fb--devel--2.6/drivers/video/fbmem.c Wed Jan 7 17:22:49 2004 @@ -159,6 +159,8 @@ extern int tcx_init(void); extern int tcx_setup(char*); extern int leo_init(void); extern int leo_setup(char*); +extern int kyrofb_init(void); +extern int kyrofb_setup(char*); =20 static struct { const char *name; @@ -360,6 +362,10 @@ static struct { #ifdef CONFIG_FB_VOODOO1 { "sstfb", sstfb_init, sstfb_setup }, #endif +#ifdef CONFIG_FB_KYRO + { "kyrofb", kyrofb_init, kyrofb_setup }, +#endif + /* * Generic drivers that don't use resource management (yet) */ --- /dev/null Fri Aug 30 19:31:37 2002 +++ fb--devel--2.6/drivers/video/kyro/Makefile Sat Jan 10 17:17:06 2004 @@ -0,0 +1,13 @@ +# +# Makefile for the Kyro framebuffer driver +# +# Note! Dependencies are done automagically by 'make dep', which also +# removes any old dependencies. DON'T put your own dependencies here +# unless it's something special (ie not a .c file). +# +# Note 2! The CFLAGS definitions are now in the main makefile... + +obj-$(CONFIG_FB_KYRO) +=3D kyrofb.o + +kyrofb-objs :=3D STG4000Ramdac.o STG4000VTG.o STG4000OverlayDevice.o \ + STG4000InitDevice.o fbdev.o --- /dev/null Fri Aug 30 19:31:37 2002 +++ fb--devel--2.6/drivers/video/kyro/STG4000InitDevice.c Wed Jan 7 17:07:= 55 2004 @@ -0,0 +1,328 @@ +/* + * linux/drivers/video/kyro/STG4000InitDevice.c + * + * Copyright (C) 2000 Imagination Technologies Ltd + * Copyright (C) 2002 STMicroelectronics + * + * This file is subject to the terms and conditions of the GNU General Pub= lic + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include + +#include "STG4000Reg.h" + +/* SDRAM fixed settings */ +#define SDRAM_CFG_0 0x49A1 +#define SDRAM_CFG_1 0xA732 +#define SDRAM_CFG_2 0x31 +#define SDRAM_ARB_CFG 0xA0 +#define SDRAM_REFRESH 0x20 + +/* Reset values */ +#define PMX2_SOFTRESET_DAC_RST 0x0001 +#define PMX2_SOFTRESET_C1_RST 0x0004 +#define PMX2_SOFTRESET_C2_RST 0x0008 +#define PMX2_SOFTRESET_3D_RST 0x0010 +#define PMX2_SOFTRESET_VIDIN_RST 0x0020 +#define PMX2_SOFTRESET_TLB_RST 0x0040 +#define PMX2_SOFTRESET_SD_RST 0x0080 +#define PMX2_SOFTRESET_VGA_RST 0x0100 +#define PMX2_SOFTRESET_ROM_RST 0x0200 /* reserved bit, do not reset */ +#define PMX2_SOFTRESET_TA_RST 0x0400 +#define PMX2_SOFTRESET_REG_RST 0x4000 +#define PMX2_SOFTRESET_ALL 0x7fff + +/* Core clock freq */ +#define CORE_PLL_FREQ 1000000 + +/* Reference Clock freq */ +#define REF_FREQ 14318 + +/* PCI Registers */ +static u16 CorePllControl =3D 0x70; + +#define PCI_CONFIG_SUBSYS_ID 0x2e + +/* Misc */ +#define CORE_PLL_MODE_REG_0_7 3 +#define CORE_PLL_MODE_REG_8_15 2 +#define CORE_PLL_MODE_CONFIG_REG 1 +#define DAC_PLL_CONFIG_REG 0 + +#define STG_MAX_VCO 500000 +#define STG_MIN_VCO 100000 + +/* PLL Clock */ +#define STG4K3_PLL_SCALER 8 /* scale numbers by 2^8 for fixed poin= t calc */ +#define STG4K3_PLL_MIN_R 2 /* Minimum multiplier */ +#define STG4K3_PLL_MAX_R 33 /* Max */ +#define STG4K3_PLL_MIN_F 2 /* Minimum divisor */ +#define STG4K3_PLL_MAX_F 513 /* Max */ +#define STG4K3_PLL_MIN_OD 0 /* Min output divider (shift) */ +#define STG4K3_PLL_MAX_OD 2 /* Max */ +#define STG4K3_PLL_MIN_VCO_SC (100000000 >> STG4K3_PLL_SCALER) /* Min = VCO rate */ +#define STG4K3_PLL_MAX_VCO_SC (500000000 >> STG4K3_PLL_SCALER) /* Max = VCO rate */ +#define STG4K3_PLL_MINR_VCO_SC (100000000 >> STG4K3_PLL_SCALER) /* Min = VCO rate (restricted) */ +#define STG4K3_PLL_MAXR_VCO_SC (500000000 >> STG4K3_PLL_SCALER) /* Max = VCO rate (restricted) */ +#define STG4K3_PLL_MINR_VCO 100000000 /* Min VCO rate (restricted) */ +#define STG4K3_PLL_MAX_VCO 500000000 /* Max VCO rate */ +#define STG4K3_PLL_MAXR_VCO 500000000 /* Max VCO rate (restricted) */ + +#define OS_DELAY(X) \ +{ \ +volatile u32 i,count=3D0; \ + for(i=3D0;i> 4; + dwChipSpeedIdx =3D (dwSubSysID & 0x180) >> 7; + + if (dwMemTypeIdx > 4 || dwChipSpeedIdx > 2) + return 0; + + /* Program SD-RAM interface */ + STG_WRITE_REG(SDRAMArbiterConf, adwSDRAMArgCfg0[dwMemTypeIdx]); + if (dwRevID < 5) { + STG_WRITE_REG(SDRAMConf0, 0x49A1); + STG_WRITE_REG(SDRAMConf1, adwSDRAMCfg1[dwMemTypeIdx]); + } else { + STG_WRITE_REG(SDRAMConf0, 0x4DF1); + STG_WRITE_REG(SDRAMConf1, adwSDRAMCfg2[dwMemTypeIdx]); + } + + STG_WRITE_REG(SDRAMConf2, 0x31); + STG_WRITE_REG(SDRAMRefresh, adwSDRAMRsh[dwChipSpeedIdx]); + + return adwChipSpeed[dwChipSpeedIdx] * 10000; +} + +/*-----------------------------------------------------------------------*/ + +u32 ProgramClock(u32 refClock, + u32 coreClock, + u32 * FOut, u32 * ROut, u32 * POut) +{ + u32 R =3D 0, F =3D 0, OD =3D 0, ODIndex =3D 0; + u32 ulBestR =3D 0, ulBestF =3D 0, ulBestOD =3D 0; + u32 ulBestVCO =3D 0, ulBestClk =3D 0, ulBestScore =3D 0; + u32 ulScore, ulPhaseScore, ulVcoScore; + u32 ulTmp =3D 0, ulVCO; + u32 ulScaleClockReq, ulMinClock, ulMaxClock; + u32 ODValues[] =3D { 1, 2, 0 }; + + /* Translate clock in Hz */ + coreClock *=3D 100; /* in Hz */ + refClock *=3D 1000; /* in Hz */ + + /* Work out acceptable clock + * The method calculates ~ +- 0.4% (1/256) + */ + ulMinClock =3D coreClock - (coreClock >> 8); + ulMaxClock =3D coreClock + (coreClock >> 8); + + /* Scale clock required for use in calculations */ + ulScaleClockReq =3D coreClock >> STG4K3_PLL_SCALER; + + /* Iterate through post divider values */ + for (ODIndex =3D 0; ODIndex < 3; ODIndex++) { + OD =3D ODValues[ODIndex]; + R =3D STG4K3_PLL_MIN_R; + + /* loop for pre-divider from min to max */ + while (R <=3D STG4K3_PLL_MAX_R) { + /* estimate required feedback multiplier */ + ulTmp =3D R * (ulScaleClockReq << OD); + + /* F =3D ClkRequired * R * (2^OD) / Fref */ + F =3D (u32)(ulTmp / (refClock >> STG4K3_PLL_SCALER)); + + /* compensate for accuracy */ + if (F > STG4K3_PLL_MIN_F) + F--; + + + /*=20 + * We should be close to our target frequency (if it's + * achievable with current OD & R) let's iterate + * through F for best fit + */ + while ((F >=3D STG4K3_PLL_MIN_F) && + (F <=3D STG4K3_PLL_MAX_F)) { + /* Calc VCO at full accuracy */ + ulVCO =3D refClock / R; + ulVCO =3D F * ulVCO; + + /*=20 + * Check it's within restricted VCO range + * unless of course the desired frequency is + * above the restricted range, then test + * against VCO limit + */ + if ((ulVCO >=3D STG4K3_PLL_MINR_VCO) && + ((ulVCO <=3D STG4K3_PLL_MAXR_VCO) || + ((coreClock > STG4K3_PLL_MAXR_VCO) + && (ulVCO <=3D STG4K3_PLL_MAX_VCO)))) { + ulTmp =3D (ulVCO >> OD); /* Clock =3D VCO / (2^OD) */ + + /* Is this clock good enough? */ + if ((ulTmp >=3D ulMinClock) + && (ulTmp <=3D ulMaxClock)) { + ulPhaseScore =3D (((refClock / R) - (refClock / STG4K3_PLL_MAX_R))) = / ((refClock - (refClock / STG4K3_PLL_MAX_R)) >> 10); + + ulVcoScore =3D ((ulVCO - STG4K3_PLL_MINR_VCO)) / ((STG4K3_PLL_MAXR_V= CO - STG4K3_PLL_MINR_VCO) >> 10); + ulScore =3D ulPhaseScore + ulVcoScore;=20 + + if (!ulBestScore) { + ulBestVCO =3D ulVCO; + ulBestOD =3D OD; + ulBestF =3D F; + ulBestR =3D R; + ulBestClk =3D ulTmp; + ulBestScore =3D + ulScore; + } + /* is this better, ( aim for highest Score) */ + /*---------------------------------------------------------------------= ----- + Here we want to use a scoring system which wi= ll take account of both the + value at the phase comparater and the VCO outp= ut =20 + to do this we will use a cumulative score bet= ween the two = =20 + The way this ends up is that we choose the first= value in the loop anyway + but we shall keep this code in case new restrict= ions come into play + ------------------------------------------------= --------------------------*/ + if ((ulScore >=3D ulBestScore) && (OD > 0)) { + ulBestVCO =3D ulVCO; + ulBestOD =3D OD; + ulBestF =3D F; + ulBestR =3D R; + ulBestClk =3D ulTmp; + ulBestScore =3D + ulScore; + } + } + } + F++; + } + R++; + } + } + + /* + did we find anything? + Then return RFOD + */ + if (ulBestScore) { + *ROut =3D ulBestR; + *FOut =3D ulBestF; + + if ((ulBestOD =3D=3D 2) || (ulBestOD =3D=3D 3)) { + *POut =3D 3; + } else + *POut =3D ulBestOD; + + } + + return (ulBestClk); +} + +int SetCoreClockPLL(volatile STG4000REG * pSTGReg, struct pci_dev *pDev) +{ + u32 F, R, P; + u16 core_pll =3D 0, sub; + u32 ulCoreClock; + u32 tmp; + u32 ulChipSpeed; + u8 rev; + + STG_WRITE_REG(IntMask, 0xFFFF); + + /* Disable Primary Core Thread0 */ + tmp =3D STG_READ_REG(Thread0Enable); + CLEAR_BIT(0); + STG_WRITE_REG(Thread0Enable, tmp); + + /* Disable Primary Core Thread1 */ + tmp =3D STG_READ_REG(Thread1Enable); + CLEAR_BIT(0); + STG_WRITE_REG(Thread1Enable, tmp); + + STG_WRITE_REG(SoftwareReset, + PMX2_SOFTRESET_REG_RST | PMX2_SOFTRESET_ROM_RST); + STG_WRITE_REG(SoftwareReset, + PMX2_SOFTRESET_REG_RST | PMX2_SOFTRESET_TA_RST | + PMX2_SOFTRESET_ROM_RST); + + /* Need to play around to reset TA */ + STG_WRITE_REG(TAConfiguration, 0); + STG_WRITE_REG(SoftwareReset, + PMX2_SOFTRESET_REG_RST | PMX2_SOFTRESET_ROM_RST); + STG_WRITE_REG(SoftwareReset, + PMX2_SOFTRESET_REG_RST | PMX2_SOFTRESET_TA_RST | + PMX2_SOFTRESET_ROM_RST); + + pci_read_config_word(pDev, PCI_CONFIG_SUBSYS_ID, &sub); + pci_read_config_byte(pDev, PCI_REVISION_ID, &rev); + + ulChipSpeed =3D InitSDRAMRegisters(pSTGReg, (u32)sub, (u32)rev); + + if (ulChipSpeed =3D=3D 0) + return -EINVAL; + + ulCoreClock =3D ProgramClock(REF_FREQ, CORE_PLL_FREQ, &F, &R, &P); + + core_pll |=3D ((P) | ((F - 2) << 2) | ((R - 2) << 11)); + + /* Set Core PLL Control to Core PLL Mode */ + + /* Send bits 0:7 of the Core PLL Mode register */ + tmp =3D ((CORE_PLL_MODE_REG_0_7 << 8) | (core_pll & 0x00FF)); + pci_write_config_word(pDev, CorePllControl, tmp); + /* Without some delay between the PCI config writes the clock does + not reliably set when the code is compiled -O3 + */ + OS_DELAY(1000000); + + tmp |=3D SET_BIT(14); + pci_write_config_word(pDev, CorePllControl, tmp); + OS_DELAY(1000000); + + /* Send bits 8:15 of the Core PLL Mode register */ + tmp =3D + ((CORE_PLL_MODE_REG_8_15 << 8) | ((core_pll & 0xFF00) >> 8)); + pci_write_config_word(pDev, CorePllControl, tmp); + OS_DELAY(1000000); + + tmp |=3D SET_BIT(14); + pci_write_config_word(pDev, CorePllControl, tmp); + OS_DELAY(1000000); + + STG_WRITE_REG(SoftwareReset, PMX2_SOFTRESET_ALL); + +#if 0 + /* Enable Primary Core Thread0 */ + tmp =3D ((STG_READ_REG(Thread0Enable)) | SET_BIT(0)); + STG_WRITE_REG(Thread0Enable, tmp); + + /* Enable Primary Core Thread1 */ + tmp =3D ((STG_READ_REG(Thread1Enable)) | SET_BIT(0)); + STG_WRITE_REG(Thread1Enable, tmp); +#endif + + return 0; +} --- /dev/null Fri Aug 30 19:31:37 2002 +++ fb--devel--2.6/drivers/video/kyro/STG4000Interface.h Sat Jan 10 20:46:4= 3 2004 @@ -0,0 +1,60 @@ +/* + * linux/drivers/video/kyro/STG4000Interface.h + * + * Copyright (C) 2002 STMicroelectronics + * + * This file is subject to the terms and conditions of the GNU General Pub= lic + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#ifndef _STG4000INTERFACE_H +#define _STG4000INTERFACE_H + +struct pci_dev; + +/* + * Ramdac Setup + */ +extern int InitialiseRamdac(volatile STG4000REG *pSTGReg, u32 displayDepth, + u32 displayWidth, u32 displayHeight, + s32 HSyncPolarity, s32 VSyncPolarity, + u32 *pixelClock); + +extern void DisableRamdacOutput(volatile STG4000REG * pSTGReg); +extern void EnableRamdacOutput(volatile STG4000REG * pSTGReg); + +/* + * Timing generator setup + */ +extern void DisableVGA(volatile STG4000REG * pSTGReg); +extern void StopVTG(volatile STG4000REG * pSTGReg); +extern void StartVTG(volatile STG4000REG * pSTGReg); +extern void SetupVTG(volatile STG4000REG * pSTGReg, + const struct kyrofb_info * pTiming); + +extern u32 ProgramClock(u32 refClock, u32 coreClock, u32 *FOut, u32 *ROut,= u32 *POut); +extern int SetCoreClockPLL(volatile STG4000REG * pSTGReg, struct pci_dev *= pDev); + +/* + * Overlay setup + */ +extern void ResetOverlayRegisters(volatile STG4000REG * pSTGReg); + +extern int CreateOverlaySurface(volatile STG4000REG * pSTGReg, + u32 ulWidth, u32 ulHeight, + int bLinear, + u32 ulOverlayOffset, + u32 * retStride, u32 * retUVStride); + +extern int SetOverlayBlendMode(volatile STG4000REG * pSTGReg, + OVRL_BLEND_MODE mode, + u32 ulAlpha, u32 ulColorKey); + +extern int SetOverlayViewPort(volatile STG4000REG * pSTGReg, + u32 left, u32 top, + u32 right, u32 bottom); + +extern void EnableOverlayPlane(volatile STG4000REG * pSTGReg); + +#endif /* _STG4000INTERFACE_H */ --- /dev/null Fri Aug 30 19:31:37 2002 +++ fb--devel--2.6/drivers/video/kyro/STG4000OverlayDevice.c Wed Jan 7 17:= 07:55 2004 @@ -0,0 +1,609 @@ +/* + * linux/drivers/video/kyro/STG4000OverlayDevice.c + * + * Copyright (C) 2000 Imagination Technologies Ltd + * Copyright (C) 2002 STMicroelectronics + * + * This file is subject to the terms and conditions of the GNU General Pub= lic + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include + +#include "STG4000Reg.h" + +/* HW Defines */ + +#define STG4000_NO_SCALING 0x800 +#define STG4000_NO_DECIMATION 0xFFFFFFFF + +/* Primary surface */ +#define STG4000_PRIM_NUM_PIX 5 +#define STG4000_PRIM_ALIGN 4 +#define STG4000_PRIM_ADDR_BITS 20 + +#define STG4000_PRIM_MIN_WIDTH 640 +#define STG4000_PRIM_MAX_WIDTH 1600 +#define STG4000_PRIM_MIN_HEIGHT 480 +#define STG4000_PRIM_MAX_HEIGHT 1200 + +/* Overlay surface */ +#define STG4000_OVRL_NUM_PIX 4 +#define STG4000_OVRL_ALIGN 2 +#define STG4000_OVRL_ADDR_BITS 20 +#define STG4000_OVRL_NUM_MODES 5 + +#define STG4000_OVRL_MIN_WIDTH 0 +#define STG4000_OVRL_MAX_WIDTH 720 +#define STG4000_OVRL_MIN_HEIGHT 0 +#define STG4000_OVRL_MAX_HEIGHT 576 + +/* Decimation and Scaling */ +u32 adwDecim8[33] =3D { + 0xffffffff, 0xfffeffff, 0xffdffbff, 0xfefefeff, 0xfdf7efbf, + 0xfbdf7bdf, 0xf7bbddef, 0xeeeeeeef, 0xeeddbb77, 0xedb76db7, + 0xdb6db6db, 0xdb5b5b5b, 0xdab5ad6b, 0xd5ab55ab, 0xd555aaab, + 0xaaaaaaab, 0xaaaa5555, 0xaa952a55, 0xa94a5295, 0xa5252525, + 0xa4924925, 0x92491249, 0x91224489, 0x91111111, 0x90884211, + 0x88410821, 0x88102041, 0x81010101, 0x80800801, 0x80010001, + 0x80000001, 0x00000001, 0x00000000 +}; + +typedef struct _OVRL_SRC_DEST { + /*clipped on-screen pixel position of overlay */ + u32 ulDstX1; + u32 ulDstY1; + u32 ulDstX2; + u32 ulDstY2; + + /*clipped pixel pos of source data within buffer thses need to be 128 bit= word aligned */ + u32 ulSrcX1; + u32 ulSrcY1; + u32 ulSrcX2; + u32 ulSrcY2; + + /* on-screen pixel position of overlay */ + s32 lDstX1; + s32 lDstY1; + s32 lDstX2; + s32 lDstY2; +} OVRL_SRC_DEST; + +static u32 ovlWidth, ovlHeight, ovlStride; +static int ovlLinear; + +void ResetOverlayRegisters(volatile STG4000REG * pSTGReg) +{ + u32 tmp; + + /* Set Overlay address to default */ + tmp =3D STG_READ_REG(DACOverlayAddr); + CLEAR_BITS_FRM_TO(0, 20); + CLEAR_BIT(31); + STG_WRITE_REG(DACOverlayAddr, tmp); + + /* Set Overlay U address */ + tmp =3D STG_READ_REG(DACOverlayUAddr); + CLEAR_BITS_FRM_TO(0, 20); + STG_WRITE_REG(DACOverlayUAddr, tmp); + + /* Set Overlay V address */ + tmp =3D STG_READ_REG(DACOverlayVAddr); + CLEAR_BITS_FRM_TO(0, 20); + STG_WRITE_REG(DACOverlayVAddr, tmp); + + /* Set Overlay Size */ + tmp =3D STG_READ_REG(DACOverlaySize); + CLEAR_BITS_FRM_TO(0, 10); + CLEAR_BITS_FRM_TO(12, 31); + STG_WRITE_REG(DACOverlaySize, tmp); + + /* Set Overlay Vt Decimation */ + tmp =3D STG4000_NO_DECIMATION; + STG_WRITE_REG(DACOverlayVtDec, tmp); + + /* Set Overlay format to default value */ + tmp =3D STG_READ_REG(DACPixelFormat); + CLEAR_BITS_FRM_TO(4, 7); + CLEAR_BITS_FRM_TO(16, 22); + STG_WRITE_REG(DACPixelFormat, tmp); + + /* Set Vertical scaling to default */ + tmp =3D STG_READ_REG(DACVerticalScal); + CLEAR_BITS_FRM_TO(0, 11); + CLEAR_BITS_FRM_TO(16, 22); + tmp |=3D STG4000_NO_SCALING; /* Set to no scaling */ + STG_WRITE_REG(DACVerticalScal, tmp); + + /* Set Horizontal Scaling to default */ + tmp =3D STG_READ_REG(DACHorizontalScal); + CLEAR_BITS_FRM_TO(0, 11); + CLEAR_BITS_FRM_TO(16, 17); + tmp |=3D STG4000_NO_SCALING; /* Set to no scaling */ + STG_WRITE_REG(DACHorizontalScal, tmp); + + /* Set Blend mode to Alpha Blend */ + /* ????? SG 08/11/2001 Surely this isn't the alpha blend mode, + hopefully its overwrite + */ + tmp =3D STG_READ_REG(DACBlendCtrl); + CLEAR_BITS_FRM_TO(0, 30); + tmp =3D (GRAPHICS_MODE << 28); + STG_WRITE_REG(DACBlendCtrl, tmp); + +} + +/*------------------------------------------------------------------------= -*/ + +int CreateOverlaySurface(volatile STG4000REG * pSTGReg, + u32 inWidth, + u32 inHeight, + int bLinear, + u32 ulOverlayOffset, + u32 * retStride, u32 * retUVStride) +{ + u32 tmp; + u32 ulStride; + + if (inWidth > STG4000_OVRL_MAX_WIDTH || + inHeight > STG4000_OVRL_MAX_HEIGHT) { + return -EINVAL; + } + + /* Stride in 16 byte words - 16Bpp */ + if (bLinear) { + /* Format is 16bits so num 16 byte words is width/8 */ + if ((inWidth & 0x7) =3D=3D 0) { /* inWidth % 8 */ + ulStride =3D (inWidth / 8); + } else { + /* Round up to next 16byte boundary */ + ulStride =3D ((inWidth + 8) / 8); + } + } else { + /* Y component is 8bits so num 16 byte words is width/16 */ + if ((inWidth & 0xf) =3D=3D 0) { /* inWidth % 16 */ + ulStride =3D (inWidth / 16); + } else { + /* Round up to next 16byte boundary */ + ulStride =3D ((inWidth + 16) / 16); + } + } + + + /* Set Overlay address and Format mode */ + tmp =3D STG_READ_REG(DACOverlayAddr); + CLEAR_BITS_FRM_TO(0, 20); + if (bLinear) { + CLEAR_BIT(31); /* Overlay format to Linear */ + } else { + tmp |=3D SET_BIT(31); /* Overlay format to Planer */ + } + + /* Only bits 24:4 of the Overlay address */ + tmp |=3D (ulOverlayOffset >> 4); + STG_WRITE_REG(DACOverlayAddr, tmp); + + if (!bLinear) { + u32 uvSize =3D + (inWidth & 0x1) ? (inWidth + 1 / 2) : (inWidth / 2); + u32 uvStride; + u32 ulOffset; + /* Y component is 8bits so num 32 byte words is width/32 */ + if ((uvSize & 0xf) =3D=3D 0) { /* inWidth % 16 */ + uvStride =3D (uvSize / 16); + } else { + /* Round up to next 32byte boundary */ + uvStride =3D ((uvSize + 16) / 16); + } + + ulOffset =3D ulOverlayOffset + (inHeight * (ulStride * 16)); + /* Align U,V data to 32byte boundary */ + if ((ulOffset & 0x1f) !=3D 0) + ulOffset =3D (ulOffset + 32L) & 0xffffffE0L; + + tmp =3D STG_READ_REG(DACOverlayUAddr); + CLEAR_BITS_FRM_TO(0, 20); + tmp |=3D (ulOffset >> 4); + STG_WRITE_REG(DACOverlayUAddr, tmp); + + ulOffset +=3D (inHeight / 2) * (uvStride * 16); + /* Align U,V data to 32byte boundary */ + if ((ulOffset & 0x1f) !=3D 0) + ulOffset =3D (ulOffset + 32L) & 0xffffffE0L; + + tmp =3D STG_READ_REG(DACOverlayVAddr); + CLEAR_BITS_FRM_TO(0, 20); + tmp |=3D (ulOffset >> 4); + STG_WRITE_REG(DACOverlayVAddr, tmp); + + *retUVStride =3D uvStride * 16; + } + + + /* Set Overlay YUV pixel format + * Make sure that LUT not used - ?????? + */ + tmp =3D STG_READ_REG(DACPixelFormat); + /* Only support Planer or UYVY linear formats */ + CLEAR_BITS_FRM_TO(4, 9); + STG_WRITE_REG(DACPixelFormat, tmp); + + ovlWidth =3D inWidth; + ovlHeight =3D inHeight; + ovlStride =3D ulStride; + ovlLinear =3D bLinear; + *retStride =3D ulStride << 4; /* In bytes */ + + return 0; +} + +/*------------------------------------------------------------------------= -*/ + +int SetOverlayBlendMode(volatile STG4000REG * pSTGReg, + OVRL_BLEND_MODE mode, + u32 ulAlpha, u32 ulColorKey) +{ + u32 tmp; + + tmp =3D STG_READ_REG(DACBlendCtrl); + CLEAR_BITS_FRM_TO(28, 30); + tmp |=3D (mode << 28); + + switch (mode) { + case COLOR_KEY: + CLEAR_BITS_FRM_TO(0, 23); + tmp |=3D (ulColorKey & 0x00FFFFFF); + break; + + case GLOBAL_ALPHA: + CLEAR_BITS_FRM_TO(24, 27); + tmp |=3D ((ulAlpha & 0xF) << 24); + break; + + case CK_PIXEL_ALPHA: + CLEAR_BITS_FRM_TO(0, 23); + tmp |=3D (ulColorKey & 0x00FFFFFF); + break; + + case CK_GLOBAL_ALPHA: + CLEAR_BITS_FRM_TO(0, 23); + tmp |=3D (ulColorKey & 0x00FFFFFF); + CLEAR_BITS_FRM_TO(24, 27); + tmp |=3D ((ulAlpha & 0xF) << 24); + break; + + case GRAPHICS_MODE: + case PER_PIXEL_ALPHA: + break; + + default: + return -EINVAL; + } + + STG_WRITE_REG(DACBlendCtrl, tmp); + + return 0; +} + +/*------------------------------------------------------------------------= -*/ + +void EnableOverlayPlane(volatile STG4000REG * pSTGReg) +{ + u32 tmp; + /* Enable Overlay */ + tmp =3D STG_READ_REG(DACPixelFormat); + tmp |=3D SET_BIT(7); + STG_WRITE_REG(DACPixelFormat, tmp); + + /* Set video stream control */ + tmp =3D STG_READ_REG(DACStreamCtrl); + tmp |=3D SET_BIT(1); /* video stream */ + STG_WRITE_REG(DACStreamCtrl, tmp); +} + +/*------------------------------------------------------------------------= -*/ + +static u32 Overlap(u32 ulBits, u32 ulPattern) +{ + u32 ulCount =3D 0; + + while (ulBits) { + if (!(ulPattern & 1)) + ulCount++; + ulBits--; + ulPattern =3D ulPattern >> 1; + } + + return ulCount; + +} + +int SetOverlayViewPort(volatile STG4000REG * pSTGReg, + u32 left, u32 top, + u32 right, u32 bottom) +{ + OVRL_SRC_DEST srcDest; + + u32 ulSrcTop, ulSrcBottom; + u32 ulSrc, ulDest; + u32 ulFxScale, ulFxOffset; + u32 ulHeight, ulWidth; + u32 ulPattern; + u32 ulDecimate, ulDecimated; + u32 ulApplied; + u32 ulDacXScale, ulDacYScale; + u32 ulScale; + u32 ulLeft, ulRight; + u32 ulSrcLeft, ulSrcRight; + u32 ulScaleLeft, ulScaleRight; + u32 ulhDecim; + u32 ulsVal; + u32 ulVertDecFactor; + int bResult; + u32 ulClipOff =3D 0; + u32 ulBits =3D 0; + u32 ulsAdd =3D 0; + u32 tmp, ulStride; + u32 ulExcessPixels, ulClip, ulExtraLines; + + + srcDest.ulSrcX1 =3D 0; + srcDest.ulSrcY1 =3D 0; + srcDest.ulSrcX2 =3D ovlWidth - 1; + srcDest.ulSrcY2 =3D ovlHeight - 1; + + srcDest.ulDstX1 =3D left; + srcDest.ulDstY1 =3D top; + srcDest.ulDstX2 =3D right; + srcDest.ulDstY2 =3D bottom; + + srcDest.lDstX1 =3D srcDest.ulDstX1; + srcDest.lDstY1 =3D srcDest.ulDstY1; + srcDest.lDstX2 =3D srcDest.ulDstX2; + srcDest.lDstY2 =3D srcDest.ulDstY2; + + /************* Vertical decimation/scaling ******************/ + + /* Get Src Top and Bottom */ + ulSrcTop =3D srcDest.ulSrcY1; + ulSrcBottom =3D srcDest.ulSrcY2; + + ulSrc =3D ulSrcBottom - ulSrcTop; + ulDest =3D srcDest.lDstY2 - srcDest.lDstY1; /* on-screen overlay */ + + if (ulSrc <=3D 1) + return -EINVAL; + + /* First work out the position we are to display as offset from the=20 + * source of the buffer + */ + ulFxScale =3D (ulDest << 11) / ulSrc; /* fixed point scale factor */ + ulFxOffset =3D (srcDest.lDstY2 - srcDest.ulDstY2) << 11; + + ulSrcBottom =3D ulSrcBottom - (ulFxOffset / ulFxScale); + ulSrc =3D ulSrcBottom - ulSrcTop; + ulHeight =3D ulSrc; + + ulDest =3D srcDest.ulDstY2 - (srcDest.ulDstY1 - 1); + ulPattern =3D adwDecim8[ulBits]; + + /* At this point ulSrc represents the input decimator */ + if (ulSrc > ulDest) { + ulDecimate =3D ulSrc - ulDest; + ulBits =3D 0; + ulApplied =3D ulSrc / 32; + + while (((ulBits * ulApplied) + + Overlap((ulSrc % 32), + adwDecim8[ulBits])) < ulDecimate) + ulBits++; + + ulPattern =3D adwDecim8[ulBits]; + ulDecimated =3D + (ulBits * ulApplied) + Overlap((ulSrc % 32), + ulPattern); + ulSrc =3D ulSrc - ulDecimated; /* the number number of lines that will g= o into the scaler */ + } + + if (ulBits && (ulBits !=3D 32)) { + ulVertDecFactor =3D (63 - ulBits) / (32 - ulBits); /* vertical decimatio= n factor scaled up to nearest integer */ + } else { + ulVertDecFactor =3D 1; + } + + ulDacYScale =3D ((ulSrc - 1) * 2048) / (ulDest + 1); + + tmp =3D STG_READ_REG(DACOverlayVtDec); /* Decimation */ + CLEAR_BITS_FRM_TO(0, 31); + tmp =3D ulPattern; + STG_WRITE_REG(DACOverlayVtDec, tmp); + + /***************** Horizontal decimation/scaling ************************= ***/ + + /*=20 + * Now we handle the horizontal case, this is a simplified verison of + * the vertical case in that we decimate by factors of 2. as we are + * working in words we should always be able to decimate by these + * factors. as we always have to have a buffer which is aligned to a + * whole number of 128 bit words, we must align the left side to the + * lowest to the next lowest 128 bit boundary, and the right hand edge + * to the next largets boundary, (in a similar way to how we didi it in + * PMX1) as the left and right hand edges are aligned to these + * boundaries normally this only becomes an issue when we are chopping + * of one of the sides We shall work out vertical stuff first + */ + ulSrc =3D srcDest.ulSrcX2 - srcDest.ulSrcX1; + ulDest =3D srcDest.lDstX2 - srcDest.lDstX1; +#ifdef _OLDCODE + ulLeft =3D srcDest.ulDstX1; + ulRight =3D srcDest.ulDstX2; +#else + if (srcDest.ulDstX1 > 2) { + ulLeft =3D srcDest.ulDstX1 + 2; + ulRight =3D srcDest.ulDstX2 + 1; + } else { + ulLeft =3D srcDest.ulDstX1; + ulRight =3D srcDest.ulDstX2 + 1; + } +#endif + /* first work out the position we are to display as offset from the sourc= e of the buffer */ + bResult =3D 1; + + do { + if (ulDest =3D=3D 0) + return -EINVAL; + + /* source pixels per dest pixel <<11 */ + ulFxScale =3D ((ulSrc - 1) << 11) / (ulDest); + + /* then number of destination pixels out we are */ + ulFxOffset =3D ulFxScale * ((srcDest.ulDstX1 - srcDest.lDstX1) + ulClipO= ff); + ulFxOffset >>=3D 11; + + /* this replaces the code which was making a decision as to use either u= lFxOffset or ulSrcX1 */ + ulSrcLeft =3D srcDest.ulSrcX1 + ulFxOffset; + + /* then number of destination pixels out we are */ + ulFxOffset =3D ulFxScale * (srcDest.lDstX2 - srcDest.ulDstX2);=09 + ulFxOffset >>=3D 11; + + ulSrcRight =3D srcDest.ulSrcX2 - ulFxOffset; + + /*=20 + * we must align these to our 128 bit boundaries. we shall + * round down the pixel pos to the nearest 8 pixels. + */ + ulScaleLeft =3D ulSrcLeft; + ulScaleRight =3D ulSrcRight; + + /* shift fxscale until it is in the range of the scaler */ + ulhDecim =3D 0; + ulScale =3D (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / (ulRig= ht - ulLeft + 2); + + while (ulScale > 0x800) { + ulhDecim++; + ulScale =3D (((ulSrcRight - ulSrcLeft) - 1) << (11 - ulhDecim)) / (ulRi= ght - ulLeft + 2); + } + + /*=20 + * to try and get the best values We first try and use + * src/dwdest for the scale factor, then we move onto src-1 + * + * we want to check to see if we will need to clip data, if so + * then we should clip our source so that we don't need to + */ + if (!ovlLinear) { + ulSrcLeft &=3D ~0x1f; + + /*=20 + * we must align the right hand edge to the next 32 + * pixel` boundary, must be on a 256 boundary so u, and + * v are 128 bit aligned + */ + ulSrcRight =3D (ulSrcRight + 0x1f) & ~0x1f; + } else { + ulSrcLeft &=3D ~0x7; + + /*=20 + * we must align the right hand edge to the next + * 8pixel` boundary + */ + ulSrcRight =3D (ulSrcRight + 0x7) & ~0x7; + } + + /* this is the input size line store needs to cope with */ + ulWidth =3D ulSrcRight - ulSrcLeft; + + /*=20 + * use unclipped value to work out scale factror this is the + * scale factor we want we shall now work out the horizonal + * decimation and scaling + */ + ulsVal =3D ((ulWidth / 8) >> ulhDecim); + + if ((ulWidth !=3D (ulsVal << ulhDecim) * 8)) + ulsAdd =3D 1; + + /* input pixels to scaler; */ + ulSrc =3D ulWidth >> ulhDecim; + + if (ulSrc <=3D 2) + return -EINVAL; + + ulExcessPixels =3D ((((ulScaleLeft - ulSrcLeft)) << (11 - ulhDecim)) / u= lScale); + + ulClip =3D (ulSrc << 11) / ulScale; + ulClip -=3D (ulRight - ulLeft); + ulClip +=3D ulExcessPixels; + + if (ulClip) + ulClip--; + + /* We may need to do more here if we really have a HW rev < 5 */ + } while (!bResult); + + ulExtraLines =3D (1 << ulhDecim) * ulVertDecFactor; + ulExtraLines +=3D 64; + ulHeight +=3D ulExtraLines; + + ulDacXScale =3D ulScale; + + + tmp =3D STG_READ_REG(DACVerticalScal); + CLEAR_BITS_FRM_TO(0, 11); + CLEAR_BITS_FRM_TO(16, 22); /* Vertical Scaling */ + + /* Calculate new output line stride, this is always the number of 422 + words in the line buffer, so it doesn't matter if the + mode is 420. Then set the vertical scale register. + */ + ulStride =3D (ulWidth >> (ulhDecim + 3)) + ulsAdd; + tmp |=3D ((ulStride << 16) | (ulDacYScale)); /* DAC_LS_CTRL =3D stride */ + STG_WRITE_REG(DACVerticalScal, tmp); + + /* Now set up the overlay size using the modified width and height + from decimate and scaling calculations + */ + tmp =3D STG_READ_REG(DACOverlaySize); + CLEAR_BITS_FRM_TO(0, 10); + CLEAR_BITS_FRM_TO(12, 31); + + if (ovlLinear) { + tmp |=3D + (ovlStride | ((ulHeight + 1) << 12) | + (((ulWidth / 8) - 1) << 23)); + } else { + tmp |=3D + (ovlStride | ((ulHeight + 1) << 12) | + (((ulWidth / 32) - 1) << 23)); + } + + STG_WRITE_REG(DACOverlaySize, tmp); + + /* Set Video Window Start */ + tmp =3D ((ulLeft << 16)) | (srcDest.ulDstY1); + STG_WRITE_REG(DACVidWinStart, tmp); + + /* Set Video Window End */ + tmp =3D ((ulRight) << 16) | (srcDest.ulDstY2); + STG_WRITE_REG(DACVidWinEnd, tmp); + + /* Finally set up the rest of the overlay regs in the order + done in the IMG driver + */ + tmp =3D STG_READ_REG(DACPixelFormat); + tmp =3D ((ulExcessPixels << 16) | tmp) & 0x7fffffff; + STG_WRITE_REG(DACPixelFormat, tmp); + + tmp =3D STG_READ_REG(DACHorizontalScal); + CLEAR_BITS_FRM_TO(0, 11); + CLEAR_BITS_FRM_TO(16, 17); + tmp |=3D ((ulhDecim << 16) | (ulDacXScale)); + STG_WRITE_REG(DACHorizontalScal, tmp); + + return 0; +} + --- /dev/null Fri Aug 30 19:31:37 2002 +++ fb--devel--2.6/drivers/video/kyro/STG4000Ramdac.c Sun Jan 11 00:26:04 2= 004 @@ -0,0 +1,176 @@ +/* + * linux/drivers/video/kyro/STG4000Ramdac.c + * + * Copyright (C) 2002 STMicroelectronics + * + * This file is subject to the terms and conditions of the GNU General Pub= lic + * License. See the file COPYING in the main directory of this archive + * for more details. + */ + +#include +#include +#include +#include