* [PATCH 02/12] RADEON: Move r100_*_*reg out of line
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 03/12] RADEON: drop inlines in r600_blit.c Andi Kleen
` (10 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie
From: Andi Kleen <ak@linux.intel.com>
This shrinks the sizes of a lot of functions in the radeon driver
dramatically.
With a non force inline + -Os kernel this is default anyways.
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/gpu/drm/radeon/r100.c | 40 ++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/radeon/radeon.h | 43 +++-----------------------------------
2 files changed, 44 insertions(+), 39 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7fcdbbb..9a1efac 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3965,3 +3965,43 @@ int r100_init(struct radeon_device *rdev)
}
return 0;
}
+
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+ if (reg < rdev->rmmio_size)
+ return readl(((void __iomem *)rdev->rmmio) + reg);
+ else {
+ writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+ return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ }
+}
+
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+ if (reg < rdev->rmmio_size)
+ writel(v, ((void __iomem *)rdev->rmmio) + reg);
+ else {
+ writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+ writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+ }
+}
+
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
+{
+ if (reg < rdev->rio_mem_size)
+ return ioread32(rdev->rio_mem + reg);
+ else {
+ iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+ return ioread32(rdev->rio_mem + RADEON_MM_DATA);
+ }
+}
+
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+ if (reg < rdev->rio_mem_size)
+ iowrite32(v, rdev->rio_mem + reg);
+ else {
+ iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+ iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
+ }
+}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c1e056b..8ac6cba 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1252,45 +1252,10 @@ int radeon_device_init(struct radeon_device *rdev,
void radeon_device_fini(struct radeon_device *rdev);
int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
-static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
-{
- if (reg < rdev->rmmio_size)
- return readl((rdev->rmmio) + reg);
- else {
- writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
- return readl((rdev->rmmio) + RADEON_MM_DATA);
- }
-}
-
-static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
- if (reg < rdev->rmmio_size)
- writel(v, (rdev->rmmio) + reg);
- else {
- writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
- writel(v, (rdev->rmmio) + RADEON_MM_DATA);
- }
-}
-
-static inline u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
-{
- if (reg < rdev->rio_mem_size)
- return ioread32(rdev->rio_mem + reg);
- else {
- iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
- return ioread32(rdev->rio_mem + RADEON_MM_DATA);
- }
-}
-
-static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
- if (reg < rdev->rio_mem_size)
- iowrite32(v, rdev->rio_mem + reg);
- else {
- iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
- iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
- }
-}
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
/*
* Cast helper
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 03/12] RADEON: drop inlines in r600_blit.c
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
2011-10-13 23:08 ` [PATCH 02/12] RADEON: Move r100_*_*reg out of line Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 04/12] RADEON: Remove now unused functions in radeon driver Andi Kleen
` (9 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie
From: Andi Kleen <ak@linux.intel.com>
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/gpu/drm/radeon/r600_blit.c | 24 ++++++++++++------------
1 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 7f10434..3c031a4 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -41,7 +41,7 @@
#define COLOR_5_6_5 0x8
#define COLOR_8_8_8_8 0x1a
-static inline void
+static void
set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
{
u32 cb_color_info;
@@ -99,7 +99,7 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64
ADVANCE_RING();
}
-static inline void
+static void
cp_set_surface_sync(drm_radeon_private_t *dev_priv,
u32 sync_type, u32 size, u64 mc_addr)
{
@@ -121,7 +121,7 @@ cp_set_surface_sync(drm_radeon_private_t *dev_priv,
ADVANCE_RING();
}
-static inline void
+static void
set_shaders(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -184,7 +184,7 @@ set_shaders(struct drm_device *dev)
R600_SH_ACTION_ENA, 512, gpu_addr);
}
-static inline void
+static void
set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
{
uint32_t sq_vtx_constant_word2;
@@ -220,7 +220,7 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
R600_VC_ACTION_ENA, 48, gpu_addr);
}
-static inline void
+static void
set_tex_resource(drm_radeon_private_t *dev_priv,
int format, int w, int h, int pitch, u64 gpu_addr)
{
@@ -258,7 +258,7 @@ set_tex_resource(drm_radeon_private_t *dev_priv,
}
-static inline void
+static void
set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
{
RING_LOCALS;
@@ -282,7 +282,7 @@ set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
ADVANCE_RING();
}
-static inline void
+static void
draw_auto(drm_radeon_private_t *dev_priv)
{
RING_LOCALS;
@@ -311,7 +311,7 @@ draw_auto(drm_radeon_private_t *dev_priv)
COMMIT_RING();
}
-static inline void
+static void
set_default_state(drm_radeon_private_t *dev_priv)
{
int i;
@@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv)
ADVANCE_RING();
}
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
{
u32 result, i, exponent, fraction;
@@ -515,7 +515,7 @@ static inline uint32_t i2f(uint32_t input)
}
-static inline int r600_nomm_get_vb(struct drm_device *dev)
+static int r600_nomm_get_vb(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
dev_priv->blit_vb = radeon_freelist_get(dev);
@@ -526,7 +526,7 @@ static inline int r600_nomm_get_vb(struct drm_device *dev)
return 0;
}
-static inline void r600_nomm_put_vb(struct drm_device *dev)
+static void r600_nomm_put_vb(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -534,7 +534,7 @@ static inline void r600_nomm_put_vb(struct drm_device *dev)
radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
}
-static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
{
drm_radeon_private_t *dev_priv = dev->dev_private;
return (((char *)dev->agp_buffer_map->handle +
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 04/12] RADEON: Remove now unused functions in radeon driver
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
2011-10-13 23:08 ` [PATCH 02/12] RADEON: Move r100_*_*reg out of line Andi Kleen
2011-10-13 23:08 ` [PATCH 03/12] RADEON: drop inlines in r600_blit.c Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 05/12] FB_ATY: Move register accesses out of line Andi Kleen
` (8 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie
From: Andi Kleen <ak@linux.intel.com>
With the dropped inlines gccs starts warning about genuinely unused
functions. Remove r600_bpe_from_format, evergreen_cs_track_validate_cb,
evergreen-cs_packet_next_is_pkt3_nop which are all unused.
Cc: David Airlie <airlied@linux.ie>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/gpu/drm/radeon/evergreen_cs.c | 28 ----------------------------
drivers/gpu/drm/radeon/r600_cs.c | 19 -------------------
2 files changed, 0 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 35dce99..7fdfa8e 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -122,12 +122,6 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
track->db_s_write_bo = NULL;
}
-static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
-{
- /* XXX fill in */
- return 0;
-}
-
static int evergreen_cs_track_check(struct radeon_cs_parser *p)
{
struct evergreen_cs_track *track = p->track;
@@ -236,28 +230,6 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
}
/**
- * evergreen_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
- * @parser: parser structure holding parsing context.
- *
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
-{
- struct radeon_cs_packet p3reloc;
- int r;
-
- r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
- if (r) {
- return 0;
- }
- if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
- return 0;
- }
- return 1;
-}
-
-/**
* evergreen_cs_packet_next_vline() - parse userspace VLINE packet
* @parser: parser structure holding parsing context.
*
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 7339c0b..0a2e023 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -223,25 +223,6 @@ static int fmt_get_nblocksy(u32 format, u32 h)
return (h + bh - 1) / bh;
}
-static int r600_bpe_from_format(u32 *bpe, u32 format)
-{
- unsigned res;
-
- if (format >= ARRAY_SIZE(color_formats_table))
- goto fail;
-
- res = color_formats_table[format].blocksize;
- if (res == 0)
- goto fail;
-
- *bpe = res;
- return 0;
-
-fail:
- *bpe = 16;
- return -EINVAL;
-}
-
struct array_mode_checker {
int array_mode;
u32 group_size;
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 05/12] FB_ATY: Move register accesses out of line
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (2 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 04/12] RADEON: Remove now unused functions in radeon driver Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 06/12] RADEON: Remove more bogus inlines in the radeon driver Andi Kleen
` (7 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie, benh
From: Andi Kleen <ak@linux.intel.com>
This fixes size regressions like
radeon_set_suspend 1724 7873 +6149
radeon_reinitialize_M10 3974 9285 +5311
radeon_pm_disable_dynamic_mode 868 6125 +5257
radeon_pm_enable_dynamic_mode 985 6065 +5080
radeon_pm_start_mclk_sclk - 4614 +4614
radeon_write_mode 1252 5377 +4125
among others compared to a non force inline kernel.
Cc: David Airlie <airlied@linux.ie>
Cc: benh@kernel.crashing.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/video/aty/radeon_accel.c | 88 ++++++++++++++++++++++++++++++++++
drivers/video/aty/radeonfb.h | 96 +++-----------------------------------
2 files changed, 95 insertions(+), 89 deletions(-)
diff --git a/drivers/video/aty/radeon_accel.c b/drivers/video/aty/radeon_accel.c
index a469a3d..0f1e367 100644
--- a/drivers/video/aty/radeon_accel.c
+++ b/drivers/video/aty/radeon_accel.c
@@ -326,3 +326,91 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo)
radeon_engine_idle ();
}
+
+
+void _OUTREGP(struct radeonfb_info *rinfo, u32 addr, u32 val, u32 mask)
+{
+ unsigned long flags;
+ unsigned int tmp;
+
+ spin_lock_irqsave(&rinfo->reg_lock, flags);
+ tmp = INREG(addr);
+ tmp &= (mask);
+ tmp |= (val);
+ OUTREG(addr, tmp);
+ spin_unlock_irqrestore(&rinfo->reg_lock, flags);
+}
+
+
+/*
+ * Note about PLL register accesses:
+ *
+ * I have removed the spinlock on them on purpose. The driver now
+ * expects that it will only manipulate the PLL registers in normal
+ * task environment, where radeon_msleep() will be called, protected
+ * by a semaphore (currently the console semaphore) so that no conflict
+ * will happen on the PLL register index.
+ *
+ * With the latest changes to the VT layer, this is guaranteed for all
+ * calls except the actual drawing/blits which aren't supposed to use
+ * the PLL registers anyway
+ *
+ * This is very important for the workarounds to work properly. The only
+ * possible exception to this rule is the call to unblank(), which may
+ * be done at irq time if an oops is in progress.
+ */
+void radeon_pll_errata_after_index(struct radeonfb_info *rinfo)
+{
+ if (!(rinfo->errata & CHIP_ERRATA_PLL_DUMMYREADS))
+ return;
+
+ (void)INREG(CLOCK_CNTL_DATA);
+ (void)INREG(CRTC_GEN_CNTL);
+}
+
+void radeon_pll_errata_after_data(struct radeonfb_info *rinfo)
+{
+ if (rinfo->errata & CHIP_ERRATA_PLL_DELAY) {
+ /* we can't deal with posted writes here ... */
+ _radeon_msleep(rinfo, 5);
+ }
+ if (rinfo->errata & CHIP_ERRATA_R300_CG) {
+ u32 save, tmp;
+ save = INREG(CLOCK_CNTL_INDEX);
+ tmp = save & ~(0x3f | PLL_WR_EN);
+ OUTREG(CLOCK_CNTL_INDEX, tmp);
+ tmp = INREG(CLOCK_CNTL_DATA);
+ OUTREG(CLOCK_CNTL_INDEX, save);
+ }
+}
+
+u32 __INPLL(struct radeonfb_info *rinfo, u32 addr)
+{
+ u32 data;
+
+ OUTREG8(CLOCK_CNTL_INDEX, addr & 0x0000003f);
+ radeon_pll_errata_after_index(rinfo);
+ data = INREG(CLOCK_CNTL_DATA);
+ radeon_pll_errata_after_data(rinfo);
+ return data;
+}
+
+void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index, u32 val)
+{
+
+ OUTREG8(CLOCK_CNTL_INDEX, (index & 0x0000003f) | 0x00000080);
+ radeon_pll_errata_after_index(rinfo);
+ OUTREG(CLOCK_CNTL_DATA, val);
+ radeon_pll_errata_after_data(rinfo);
+}
+
+void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index, u32 val, u32 mask)
+{
+ unsigned int tmp;
+
+ tmp = __INPLL(rinfo, index);
+ tmp &= (mask);
+ tmp |= (val);
+ __OUTPLL(rinfo, index, tmp);
+}
+
diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h
index 7351e66..cde9c2e 100644
--- a/drivers/video/aty/radeonfb.h
+++ b/drivers/video/aty/radeonfb.h
@@ -393,98 +393,16 @@ static inline void _radeon_msleep(struct radeonfb_info *rinfo, unsigned long ms)
#define INREG(addr) readl((rinfo->mmio_base)+addr)
#define OUTREG(addr,val) writel(val, (rinfo->mmio_base)+addr)
-static inline void _OUTREGP(struct radeonfb_info *rinfo, u32 addr,
- u32 val, u32 mask)
-{
- unsigned long flags;
- unsigned int tmp;
-
- spin_lock_irqsave(&rinfo->reg_lock, flags);
- tmp = INREG(addr);
- tmp &= (mask);
- tmp |= (val);
- OUTREG(addr, tmp);
- spin_unlock_irqrestore(&rinfo->reg_lock, flags);
-}
+void _OUTREGP(struct radeonfb_info *rinfo, u32 addr, u32 val, u32 mask);
+void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index,
+ u32 val, u32 mask);
+void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index, u32 val);
+u32 __INPLL(struct radeonfb_info *rinfo, u32 addr);
+void radeon_pll_errata_after_data(struct radeonfb_info *rinfo);
+void radeon_pll_errata_after_index(struct radeonfb_info *rinfo);
#define OUTREGP(addr,val,mask) _OUTREGP(rinfo, addr, val,mask)
-/*
- * Note about PLL register accesses:
- *
- * I have removed the spinlock on them on purpose. The driver now
- * expects that it will only manipulate the PLL registers in normal
- * task environment, where radeon_msleep() will be called, protected
- * by a semaphore (currently the console semaphore) so that no conflict
- * will happen on the PLL register index.
- *
- * With the latest changes to the VT layer, this is guaranteed for all
- * calls except the actual drawing/blits which aren't supposed to use
- * the PLL registers anyway
- *
- * This is very important for the workarounds to work properly. The only
- * possible exception to this rule is the call to unblank(), which may
- * be done at irq time if an oops is in progress.
- */
-static inline void radeon_pll_errata_after_index(struct radeonfb_info *rinfo)
-{
- if (!(rinfo->errata & CHIP_ERRATA_PLL_DUMMYREADS))
- return;
-
- (void)INREG(CLOCK_CNTL_DATA);
- (void)INREG(CRTC_GEN_CNTL);
-}
-
-static inline void radeon_pll_errata_after_data(struct radeonfb_info *rinfo)
-{
- if (rinfo->errata & CHIP_ERRATA_PLL_DELAY) {
- /* we can't deal with posted writes here ... */
- _radeon_msleep(rinfo, 5);
- }
- if (rinfo->errata & CHIP_ERRATA_R300_CG) {
- u32 save, tmp;
- save = INREG(CLOCK_CNTL_INDEX);
- tmp = save & ~(0x3f | PLL_WR_EN);
- OUTREG(CLOCK_CNTL_INDEX, tmp);
- tmp = INREG(CLOCK_CNTL_DATA);
- OUTREG(CLOCK_CNTL_INDEX, save);
- }
-}
-
-static inline u32 __INPLL(struct radeonfb_info *rinfo, u32 addr)
-{
- u32 data;
-
- OUTREG8(CLOCK_CNTL_INDEX, addr & 0x0000003f);
- radeon_pll_errata_after_index(rinfo);
- data = INREG(CLOCK_CNTL_DATA);
- radeon_pll_errata_after_data(rinfo);
- return data;
-}
-
-static inline void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index,
- u32 val)
-{
-
- OUTREG8(CLOCK_CNTL_INDEX, (index & 0x0000003f) | 0x00000080);
- radeon_pll_errata_after_index(rinfo);
- OUTREG(CLOCK_CNTL_DATA, val);
- radeon_pll_errata_after_data(rinfo);
-}
-
-
-static inline void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index,
- u32 val, u32 mask)
-{
- unsigned int tmp;
-
- tmp = __INPLL(rinfo, index);
- tmp &= (mask);
- tmp |= (val);
- __OUTPLL(rinfo, index, tmp);
-}
-
-
#define INPLL(addr) __INPLL(rinfo, addr)
#define OUTPLL(index, val) __OUTPLL(rinfo, index, val)
#define OUTPLLP(index, val, mask) __OUTPLLP(rinfo, index, val, mask)
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 06/12] RADEON: Remove more bogus inlines in the radeon driver.
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (3 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 05/12] FB_ATY: Move register accesses out of line Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 07/12] RADEON: Move more code out of line Andi Kleen
` (6 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, airlied
From: Andi Kleen <ak@linux.intel.com>
Remove bogus inlines in evergreen and r100.
Cc: airlied@linux.ie
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/gpu/drm/radeon/evergreen.c | 4 +-
drivers/gpu/drm/radeon/evergreen_blit_kms.c | 2 +-
drivers/gpu/drm/radeon/r100.c | 106 +++++++++++++++++++++++++-
drivers/gpu/drm/radeon/r100_track.h | 110 ++-------------------------
4 files changed, 114 insertions(+), 108 deletions(-)
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index c4ffa14f..1c5cd09 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2586,7 +2586,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
return 0;
}
-static inline void evergreen_irq_ack(struct radeon_device *rdev)
+static void evergreen_irq_ack(struct radeon_device *rdev)
{
u32 tmp;
@@ -2697,7 +2697,7 @@ void evergreen_irq_suspend(struct radeon_device *rdev)
r600_rlc_stop(rdev);
}
-static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
+static u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
{
u32 wptr, tmp;
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2eb2518..7eb78b3 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -584,7 +584,7 @@ set_default_state(struct radeon_device *rdev)
}
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
{
u32 result, i, exponent, fraction;
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 9a1efac..6720929 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -68,6 +68,108 @@ MODULE_FIRMWARE(FIRMWARE_R520);
* r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
*/
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ unsigned idx,
+ unsigned reg)
+{
+ int r;
+ u32 tile_flags = 0;
+ u32 tmp;
+ struct radeon_cs_reloc *reloc;
+ u32 value;
+
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+ idx, reg);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ value = radeon_get_ib_value(p, idx);
+ tmp = value & 0x003fffff;
+ tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+
+ if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+ tile_flags |= RADEON_DST_TILE_MACRO;
+ if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+ if (reg == RADEON_SRC_PITCH_OFFSET) {
+ DRM_ERROR("Cannot src blit from microtiled surface\n");
+ r100_cs_dump_packet(p, pkt);
+ return -EINVAL;
+ }
+ tile_flags |= RADEON_DST_TILE_MICRO;
+ }
+
+ tmp |= tile_flags;
+ p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
+ return 0;
+}
+
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ int idx)
+{
+ unsigned c, i;
+ struct radeon_cs_reloc *reloc;
+ struct r100_cs_track *track;
+ int r = 0;
+ volatile uint32_t *ib;
+ u32 idx_value;
+
+ ib = p->ib->ptr;
+ track = (struct r100_cs_track *)p->track;
+ c = radeon_get_ib_value(p, idx++) & 0x1F;
+ if (c > 16) {
+ DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return -EINVAL;
+ }
+ track->num_arrays = c;
+ for (i = 0; i < (c - 1); i+=2, idx+=3) {
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ idx_value = radeon_get_ib_value(p, idx);
+ ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+
+ track->arrays[i + 0].esize = idx_value >> 8;
+ track->arrays[i + 0].robj = reloc->robj;
+ track->arrays[i + 0].esize &= 0x7F;
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+ track->arrays[i + 1].robj = reloc->robj;
+ track->arrays[i + 1].esize = idx_value >> 24;
+ track->arrays[i + 1].esize &= 0x7F;
+ }
+ if (c & 1) {
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ idx_value = radeon_get_ib_value(p, idx);
+ ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+ track->arrays[i + 0].robj = reloc->robj;
+ track->arrays[i + 0].esize = idx_value >> 8;
+ track->arrays[i + 0].esize &= 0x7F;
+ }
+ return r;
+}
+
void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
{
/* enable the pflip int */
@@ -588,7 +690,7 @@ void r100_irq_disable(struct radeon_device *rdev)
WREG32(R_000044_GEN_INT_STATUS, tmp);
}
-static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
+static uint32_t r100_irq_ack(struct radeon_device *rdev)
{
uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
uint32_t irq_mask = RADEON_SW_INT_TEST |
@@ -3147,7 +3249,7 @@ void r100_bandwidth_update(struct radeon_device *rdev)
}
}
-static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
+static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
{
DRM_ERROR("pitch %d\n", t->pitch);
DRM_ERROR("use_pitch %d\n", t->use_pitch);
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index 686f9dc..6a603b3 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -92,106 +92,10 @@ int r200_packet0_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx, unsigned reg);
-
-
-static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
- struct radeon_cs_packet *pkt,
- unsigned idx,
- unsigned reg)
-{
- int r;
- u32 tile_flags = 0;
- u32 tmp;
- struct radeon_cs_reloc *reloc;
- u32 value;
-
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
- idx, reg);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- value = radeon_get_ib_value(p, idx);
- tmp = value & 0x003fffff;
- tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-
- if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
- tile_flags |= RADEON_DST_TILE_MACRO;
- if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
- if (reg == RADEON_SRC_PITCH_OFFSET) {
- DRM_ERROR("Cannot src blit from microtiled surface\n");
- r100_cs_dump_packet(p, pkt);
- return -EINVAL;
- }
- tile_flags |= RADEON_DST_TILE_MICRO;
- }
-
- tmp |= tile_flags;
- p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
- return 0;
-}
-
-static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
- struct radeon_cs_packet *pkt,
- int idx)
-{
- unsigned c, i;
- struct radeon_cs_reloc *reloc;
- struct r100_cs_track *track;
- int r = 0;
- volatile uint32_t *ib;
- u32 idx_value;
-
- ib = p->ib->ptr;
- track = (struct r100_cs_track *)p->track;
- c = radeon_get_ib_value(p, idx++) & 0x1F;
- if (c > 16) {
- DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return -EINVAL;
- }
- track->num_arrays = c;
- for (i = 0; i < (c - 1); i+=2, idx+=3) {
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- idx_value = radeon_get_ib_value(p, idx);
- ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-
- track->arrays[i + 0].esize = idx_value >> 8;
- track->arrays[i + 0].robj = reloc->robj;
- track->arrays[i + 0].esize &= 0x7F;
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
- track->arrays[i + 1].robj = reloc->robj;
- track->arrays[i + 1].esize = idx_value >> 24;
- track->arrays[i + 1].esize &= 0x7F;
- }
- if (c & 1) {
- r = r100_cs_packet_next_reloc(p, &reloc);
- if (r) {
- DRM_ERROR("No reloc for packet3 %d\n",
- pkt->opcode);
- r100_cs_dump_packet(p, pkt);
- return r;
- }
- idx_value = radeon_get_ib_value(p, idx);
- ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
- track->arrays[i + 0].robj = reloc->robj;
- track->arrays[i + 0].esize = idx_value >> 8;
- track->arrays[i + 0].esize &= 0x7F;
- }
- return r;
-}
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ unsigned idx,
+ unsigned reg);
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ int idx);
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 07/12] RADEON: Move more code out of line
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (4 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 06/12] RADEON: Remove more bogus inlines in the radeon driver Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 08/12] X86: Move alloc_intr_gate " Andi Kleen
` (5 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, airlied
From: Andi Kleen <ak@linux.intel.com>
With this patch I'm only about 50k larger with DRM debugging
enables (why is that enabled by default?!?), and slightly
smaller without.
Cc: airlied@linux.ie
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/gpu/drm/radeon/r100.c | 39 ++++++++++++++++++++++++++
drivers/gpu/drm/radeon/r300_cmdbuf.c | 2 +-
drivers/gpu/drm/radeon/r600.c | 4 +-
drivers/gpu/drm/radeon/r600_blit_kms.c | 2 +-
drivers/gpu/drm/radeon/radeon.h | 39 +++++----------------------
drivers/gpu/drm/radeon/radeon_atombios.c | 4 +-
drivers/gpu/drm/radeon/radeon_irq.c | 2 +-
drivers/gpu/drm/radeon/radeon_legacy_tv.c | 2 +-
drivers/gpu/drm/radeon/radeon_object.c | 41 ++++++++++++++++++++++++++++
drivers/gpu/drm/radeon/radeon_object.h | 42 ++--------------------------
drivers/gpu/drm/radeon/radeon_state.c | 16 +++++-----
11 files changed, 106 insertions(+), 87 deletions(-)
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 6720929..1bbed1f 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4107,3 +4107,42 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
}
}
+
+/* Better place? */
+u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
+{
+ struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+ u32 pg_idx, pg_offset;
+ u32 idx_value = 0;
+ int new_page;
+
+ pg_idx = (idx * 4) / PAGE_SIZE;
+ pg_offset = (idx * 4) % PAGE_SIZE;
+
+ if (ibc->kpage_idx[0] == pg_idx)
+ return ibc->kpage[0][pg_offset/4];
+ if (ibc->kpage_idx[1] == pg_idx)
+ return ibc->kpage[1][pg_offset/4];
+
+ new_page = radeon_cs_update_pages(p, pg_idx);
+ if (new_page < 0) {
+ p->parser_error = new_page;
+ return 0;
+ }
+
+ idx_value = ibc->kpage[new_page][pg_offset/4];
+ return idx_value;
+}
+
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+{
+#if DRM_DEBUG_CODE
+ if (rdev->cp.count_dw <= 0) {
+ DRM_ERROR("radeon: writting more dword to ring than expected !\n");
+ }
+#endif
+ rdev->cp.ring[rdev->cp.wptr++] = v;
+ rdev->cp.wptr &= rdev->cp.ptr_mask;
+ rdev->cp.count_dw--;
+ rdev->cp.ring_free_dw--;
+}
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index c5c2742..1fe98b4 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -791,7 +791,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
/**
* Emit the sequence to pacify R300.
*/
-static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
+static void r300_pacify(drm_radeon_private_t *dev_priv)
{
uint32_t cache_z, cache_3d, cache_2d;
RING_LOCALS;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 720dd99..c85047f 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -3137,7 +3137,7 @@ int r600_irq_set(struct radeon_device *rdev)
return 0;
}
-static inline void r600_irq_ack(struct radeon_device *rdev)
+static void r600_irq_ack(struct radeon_device *rdev)
{
u32 tmp;
@@ -3238,7 +3238,7 @@ void r600_irq_disable(struct radeon_device *rdev)
r600_disable_interrupt_state(rdev);
}
-static inline u32 r600_get_ih_wptr(struct radeon_device *rdev)
+static u32 r600_get_ih_wptr(struct radeon_device *rdev)
{
u32 wptr, tmp;
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 9aa74c3..bbbafe6 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -450,7 +450,7 @@ set_default_state(struct radeon_device *rdev)
radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
}
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
{
u32 result, i, exponent, fraction;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8ac6cba..5ed7ef7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -601,32 +601,7 @@ struct radeon_cs_parser {
extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
extern int radeon_cs_finish_pages(struct radeon_cs_parser *p);
-
-
-static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
-{
- struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
- u32 pg_idx, pg_offset;
- u32 idx_value = 0;
- int new_page;
-
- pg_idx = (idx * 4) / PAGE_SIZE;
- pg_offset = (idx * 4) % PAGE_SIZE;
-
- if (ibc->kpage_idx[0] == pg_idx)
- return ibc->kpage[0][pg_offset/4];
- if (ibc->kpage_idx[1] == pg_idx)
- return ibc->kpage[1][pg_offset/4];
-
- new_page = radeon_cs_update_pages(p, pg_idx);
- if (new_page < 0) {
- p->parser_error = new_page;
- return 0;
- }
-
- idx_value = ibc->kpage[new_page][pg_offset/4];
- return idx_value;
-}
+extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
struct radeon_cs_packet {
unsigned idx;
@@ -1378,19 +1353,19 @@ void radeon_atombios_fini(struct radeon_device *rdev);
/*
* RING helpers.
*/
+
+#if DRM_DEBUG_CODE == 0
static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
{
-#if DRM_DEBUG_CODE
- if (rdev->cp.count_dw <= 0) {
- DRM_ERROR("radeon: writting more dword to ring than expected !\n");
- }
-#endif
rdev->cp.ring[rdev->cp.wptr++] = v;
rdev->cp.wptr &= rdev->cp.ptr_mask;
rdev->cp.count_dw--;
rdev->cp.ring_free_dw--;
}
-
+#else
+/* With debugging this is just too big to inline */
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v);
+#endif
/*
* ASICs macro.
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bf2b615..08d0b94 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -62,7 +62,7 @@ union atom_supported_devices {
struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1;
};
-static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
+static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
uint8_t id)
{
struct atom_context *ctx = rdev->mode_info.atom_context;
@@ -228,7 +228,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev)
}
}
-static inline struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
+static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
u8 id)
{
struct atom_context *ctx = rdev->mode_info.atom_context;
diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c
index 465746b..00da384 100644
--- a/drivers/gpu/drm/radeon/radeon_irq.c
+++ b/drivers/gpu/drm/radeon/radeon_irq.c
@@ -129,7 +129,7 @@ void radeon_disable_vblank(struct drm_device *dev, int crtc)
}
}
-static inline u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
+static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
{
u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS);
u32 irq_mask = RADEON_SW_INT_TEST;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
index c7b6cb4..b37ec0f 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
@@ -864,7 +864,7 @@ void radeon_legacy_tv_adjust_crtc_reg(struct drm_encoder *encoder,
*v_sync_strt_wid = tmp;
}
-static inline int get_post_div(int value)
+static int get_post_div(int value)
{
int post_div;
switch (value) {
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 976c3b1..1c85152 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -515,3 +515,44 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
}
return 0;
}
+
+int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
+{
+ int r;
+
+ r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+ if (unlikely(r != 0))
+ return r;
+ spin_lock(&bo->tbo.bdev->fence_lock);
+ if (mem_type)
+ *mem_type = bo->tbo.mem.mem_type;
+ if (bo->tbo.sync_obj)
+ r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+ spin_unlock(&bo->tbo.bdev->fence_lock);
+ ttm_bo_unreserve(&bo->tbo);
+ return r;
+}
+
+
+/**
+ * radeon_bo_reserve - reserve bo
+ * @bo: bo structure
+ * @no_wait: don't sleep while trying to reserve (return -EBUSY)
+ *
+ * Returns:
+ * -EBUSY: buffer is busy and @no_wait is true
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
+{
+ int r;
+
+ r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+ if (unlikely(r != 0)) {
+ if (r != -ERESTARTSYS)
+ dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
+ return r;
+ }
+ return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index ede6c13..b07f0f9 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -52,28 +52,7 @@ static inline unsigned radeon_mem_type_to_domain(u32 mem_type)
return 0;
}
-/**
- * radeon_bo_reserve - reserve bo
- * @bo: bo structure
- * @no_wait: don't sleep while trying to reserve (return -EBUSY)
- *
- * Returns:
- * -EBUSY: buffer is busy and @no_wait is true
- * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
- * a signal. Release all buffer reservations and return to user-space.
- */
-static inline int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
-{
- int r;
-
- r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
- if (unlikely(r != 0)) {
- if (r != -ERESTARTSYS)
- dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
- return r;
- }
- return 0;
-}
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait);
static inline void radeon_bo_unreserve(struct radeon_bo *bo)
{
@@ -118,23 +97,8 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
return bo->tbo.addr_space_offset;
}
-static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
- bool no_wait)
-{
- int r;
-
- r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
- if (unlikely(r != 0))
- return r;
- spin_lock(&bo->tbo.bdev->fence_lock);
- if (mem_type)
- *mem_type = bo->tbo.mem.mem_type;
- if (bo->tbo.sync_obj)
- r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
- spin_unlock(&bo->tbo.bdev->fence_lock);
- ttm_bo_unreserve(&bo->tbo);
- return r;
-}
+extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
+ bool no_wait);
extern int radeon_bo_create(struct radeon_device *rdev,
unsigned long size, int byte_align,
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 92e7ea7..e8422ae 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -272,12 +272,12 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
return 0;
}
-static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
- dev_priv,
- struct drm_file *file_priv,
- drm_radeon_kcmd_buffer_t *
- cmdbuf,
- unsigned int *cmdsz)
+static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
+ dev_priv,
+ struct drm_file *file_priv,
+ drm_radeon_kcmd_buffer_t *
+ cmdbuf,
+ unsigned int *cmdsz)
{
u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
u32 offset, narrays;
@@ -446,8 +446,8 @@ static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
* CP hardware state programming functions
*/
-static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
- struct drm_clip_rect * box)
+static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
+ struct drm_clip_rect * box)
{
RING_LOCALS;
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 08/12] X86: Move alloc_intr_gate out of line
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (5 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 07/12] RADEON: Move more code out of line Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 09/12] Don't use inline node_page_state for sysfs output functions Andi Kleen
` (4 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, x86
From: Andi Kleen <ak@linux.intel.com>
This saves about 2.5k text on a non force inline kernel.
Cc: x86@kernel.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/include/asm/desc.h | 17 +----------------
arch/x86/kernel/irqinit.c | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+), 16 deletions(-)
diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 41935fa..f7183e1 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -336,22 +336,7 @@ extern int first_system_vector;
/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
extern unsigned long used_vectors[];
-static inline void alloc_system_vector(int vector)
-{
- if (!test_bit(vector, used_vectors)) {
- set_bit(vector, used_vectors);
- if (first_system_vector > vector)
- first_system_vector = vector;
- } else {
- BUG();
- }
-}
-
-static inline void alloc_intr_gate(unsigned int n, void *addr)
-{
- alloc_system_vector(n);
- set_intr_gate(n, addr);
-}
+void alloc_intr_gate(unsigned int n, void *addr);
/*
* This routine sets up an interrupt gate at directory privilege level 3.
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index b3300e6..b6e769b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -162,6 +162,26 @@ void setup_vector_irq(int cpu)
__setup_vector_irq(cpu);
}
+static void alloc_system_vector(int vector)
+{
+ if (!test_bit(vector, used_vectors)) {
+ set_bit(vector, used_vectors);
+ if (first_system_vector > vector)
+ first_system_vector = vector;
+ } else
+ BUG();
+}
+
+/*
+ * This could be made __init if xen didn't abuse it in its
+ * suspend path!
+ */
+void alloc_intr_gate(unsigned int n, void *addr)
+{
+ alloc_system_vector(n);
+ set_intr_gate(n, addr);
+}
+
static void __init smp_intr_init(void)
{
#ifdef CONFIG_SMP
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 09/12] Don't use inline node_page_state for sysfs output functions
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (6 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 08/12] X86: Move alloc_intr_gate " Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 10/12] REISERFS: reiserfs drop unnecessary inlines Andi Kleen
` (3 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
These are not time critical, and using an out of line function
saves about 2.5k text on a non force inline kernel.
I left the main hotpath user -- readahead -- inline for now.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/base/node.c | 72 +++++++++++++++++++++++++++-----------------------
1 files changed, 39 insertions(+), 33 deletions(-)
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 793f796..0b8e7a2 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -58,6 +58,12 @@ static inline ssize_t node_read_cpulist(struct sys_device *dev,
static SYSDEV_ATTR(cpumap, S_IRUGO, node_read_cpumask, NULL);
static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
+/* Don't inline */
+static unsigned long my_node_page_state(int node, enum zone_stat_item item)
+{
+ return node_page_state(node, item);
+}
+
#define K(x) ((x) << (PAGE_SHIFT - 10))
static ssize_t node_read_meminfo(struct sys_device * dev,
struct sysdev_attribute *attr, char * buf)
@@ -82,16 +88,16 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
nid, K(i.totalram),
nid, K(i.freeram),
nid, K(i.totalram - i.freeram),
- nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
- node_page_state(nid, NR_ACTIVE_FILE)),
- nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
- node_page_state(nid, NR_INACTIVE_FILE)),
- nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
- nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
- nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
- nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
- nid, K(node_page_state(nid, NR_UNEVICTABLE)),
- nid, K(node_page_state(nid, NR_MLOCK)));
+ nid, K(my_node_page_state(nid, NR_ACTIVE_ANON) +
+ my_node_page_state(nid, NR_ACTIVE_FILE)),
+ nid, K(my_node_page_state(nid, NR_INACTIVE_ANON) +
+ my_node_page_state(nid, NR_INACTIVE_FILE)),
+ nid, K(my_node_page_state(nid, NR_ACTIVE_ANON)),
+ nid, K(my_node_page_state(nid, NR_INACTIVE_ANON)),
+ nid, K(my_node_page_state(nid, NR_ACTIVE_FILE)),
+ nid, K(my_node_page_state(nid, NR_INACTIVE_FILE)),
+ nid, K(my_node_page_state(nid, NR_UNEVICTABLE)),
+ nid, K(my_node_page_state(nid, NR_MLOCK)));
#ifdef CONFIG_HIGHMEM
n += sprintf(buf + n,
@@ -123,30 +129,30 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
"Node %d AnonHugePages: %8lu kB\n"
#endif
,
- nid, K(node_page_state(nid, NR_FILE_DIRTY)),
- nid, K(node_page_state(nid, NR_WRITEBACK)),
- nid, K(node_page_state(nid, NR_FILE_PAGES)),
- nid, K(node_page_state(nid, NR_FILE_MAPPED)),
- nid, K(node_page_state(nid, NR_ANON_PAGES)
+ nid, K(my_node_page_state(nid, NR_FILE_DIRTY)),
+ nid, K(my_node_page_state(nid, NR_WRITEBACK)),
+ nid, K(my_node_page_state(nid, NR_FILE_PAGES)),
+ nid, K(my_node_page_state(nid, NR_FILE_MAPPED)),
+ nid, K(my_node_page_state(nid, NR_ANON_PAGES)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- + node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
+ + my_node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
HPAGE_PMD_NR
#endif
),
- nid, K(node_page_state(nid, NR_SHMEM)),
- nid, node_page_state(nid, NR_KERNEL_STACK) *
+ nid, K(my_node_page_state(nid, NR_SHMEM)),
+ nid, my_node_page_state(nid, NR_KERNEL_STACK) *
THREAD_SIZE / 1024,
- nid, K(node_page_state(nid, NR_PAGETABLE)),
- nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
- nid, K(node_page_state(nid, NR_BOUNCE)),
- nid, K(node_page_state(nid, NR_WRITEBACK_TEMP)),
- nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
- node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
- nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
- nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE))
+ nid, K(my_node_page_state(nid, NR_PAGETABLE)),
+ nid, K(my_node_page_state(nid, NR_UNSTABLE_NFS)),
+ nid, K(my_node_page_state(nid, NR_BOUNCE)),
+ nid, K(my_node_page_state(nid, NR_WRITEBACK_TEMP)),
+ nid, K(my_node_page_state(nid, NR_SLAB_RECLAIMABLE) +
+ my_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
+ nid, K(my_node_page_state(nid, NR_SLAB_RECLAIMABLE)),
+ nid, K(my_node_page_state(nid, NR_SLAB_UNRECLAIMABLE))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
, nid,
- K(node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
+ K(my_node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
HPAGE_PMD_NR)
#endif
);
@@ -167,12 +173,12 @@ static ssize_t node_read_numastat(struct sys_device * dev,
"interleave_hit %lu\n"
"local_node %lu\n"
"other_node %lu\n",
- node_page_state(dev->id, NUMA_HIT),
- node_page_state(dev->id, NUMA_MISS),
- node_page_state(dev->id, NUMA_FOREIGN),
- node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
- node_page_state(dev->id, NUMA_LOCAL),
- node_page_state(dev->id, NUMA_OTHER));
+ my_node_page_state(dev->id, NUMA_HIT),
+ my_node_page_state(dev->id, NUMA_MISS),
+ my_node_page_state(dev->id, NUMA_FOREIGN),
+ my_node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
+ my_node_page_state(dev->id, NUMA_LOCAL),
+ my_node_page_state(dev->id, NUMA_OTHER));
}
static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 10/12] REISERFS: reiserfs drop unnecessary inlines
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (7 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 09/12] Don't use inline node_page_state for sysfs output functions Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
` (2 subsequent siblings)
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, viro, fweisbec
From: Andi Kleen <ak@linux.intel.com>
Drop some inlines to shrink code size with force inline
Still some unfixed growth in:
balance_leaf 7190 8766 +1576
search_by_key 1963 3317 +1354
Cc: viro@zeniv.linux.org.uk
Cc: fweisbec@gmail.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
fs/reiserfs/do_balan.c | 14 +++++++-------
1 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 60c0804..8b3c44c 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -21,7 +21,7 @@
#include <linux/buffer_head.h>
#include <linux/kernel.h>
-static inline void buffer_info_init_left(struct tree_balance *tb,
+static void buffer_info_init_left(struct tree_balance *tb,
struct buffer_info *bi)
{
bi->tb = tb;
@@ -30,7 +30,7 @@ static inline void buffer_info_init_left(struct tree_balance *tb,
bi->bi_position = get_left_neighbor_position(tb, 0);
}
-static inline void buffer_info_init_right(struct tree_balance *tb,
+static void buffer_info_init_right(struct tree_balance *tb,
struct buffer_info *bi)
{
bi->tb = tb;
@@ -39,7 +39,7 @@ static inline void buffer_info_init_right(struct tree_balance *tb,
bi->bi_position = get_right_neighbor_position(tb, 0);
}
-static inline void buffer_info_init_tbS0(struct tree_balance *tb,
+static void buffer_info_init_tbS0(struct tree_balance *tb,
struct buffer_info *bi)
{
bi->tb = tb;
@@ -48,7 +48,7 @@ static inline void buffer_info_init_tbS0(struct tree_balance *tb,
bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
}
-static inline void buffer_info_init_bh(struct tree_balance *tb,
+static void buffer_info_init_bh(struct tree_balance *tb,
struct buffer_info *bi,
struct buffer_head *bh)
{
@@ -58,7 +58,7 @@ static inline void buffer_info_init_bh(struct tree_balance *tb,
bi->bi_position = 0;
}
-inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
+void do_balance_mark_leaf_dirty(struct tree_balance *tb,
struct buffer_head *bh, int flag)
{
journal_mark_dirty(tb->transaction_handle,
@@ -1967,7 +1967,7 @@ static void check_internal_levels(struct tree_balance *tb)
*/
-static inline void do_balance_starts(struct tree_balance *tb)
+static void do_balance_starts(struct tree_balance *tb)
{
/* use print_cur_tb() to see initial state of struct
tree_balance */
@@ -1983,7 +1983,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
#endif
}
-static inline void do_balance_completed(struct tree_balance *tb)
+static void do_balance_completed(struct tree_balance *tb)
{
#ifdef CONFIG_REISERFS_CHECK
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* [PATCH 11/12] i915: Move i915_read/write out of line
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (8 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 10/12] REISERFS: reiserfs drop unnecessary inlines Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-15 13:49 ` Daniel Vetter
2011-10-18 5:47 ` Ben Widawsky
2011-10-13 23:08 ` [PATCH 12/12] Force always inline for gcc 4.5 when optimizing for size Andi Kleen
2011-10-18 8:59 ` [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c David Airlie
11 siblings, 2 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, keithp
From: Andi Kleen <ak@linux.intel.com>
With the tracing code in there they are far too big to inline.
.text savings compared to a non force inline kernel:
i915_restore_display 4393 12036 +7643
i915_save_display 4295 11459 +7164
i915_handle_error 2979 6666 +3687
i915_driver_irq_handler 2923 5086 +2163
i915_ringbuffer_info 458 1661 +1203
i915_save_vga - 1200 +1200
i915_driver_irq_uninstall 453 1624 +1171
i915_driver_irq_postinstall 913 2078 +1165
ironlake_enable_drps 719 1872 +1153
i915_restore_vga - 1142 +1142
intel_display_capture_error_state 784 2030 +1246
intel_init_emon 719 2016 +1297
and more ...
[AK: these are older numbers, with the new SNB forcewake checks
it will be even worse]
Cc: keithp@keithp.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
drivers/gpu/drm/i915/i915_drv.c | 40 +++++++++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_drv.h | 22 ++------------------
2 files changed, 43 insertions(+), 19 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f07e425..c2de142 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -895,3 +895,43 @@ module_exit(i915_exit);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL and additional rights");
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+ (((dev_priv)->info->gen >= 6) && \
+ ((reg) < 0x40000) && \
+ ((reg) != FORCEWAKE))
+
+#define __i915_read(x, y) \
+u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
+ u##x val = 0; \
+ if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+ gen6_gt_force_wake_get(dev_priv); \
+ val = read##y(dev_priv->regs + reg); \
+ gen6_gt_force_wake_put(dev_priv); \
+ } else { \
+ val = read##y(dev_priv->regs + reg); \
+ } \
+ trace_i915_reg_rw(false, reg, val, sizeof(val)); \
+ return val; \
+}
+
+__i915_read(8, b)
+__i915_read(16, w)
+__i915_read(32, l)
+__i915_read(64, q)
+#undef __i915_read
+
+#define __i915_write(x, y) \
+void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
+ trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+ if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+ __gen6_gt_wait_for_fifo(dev_priv); \
+ } \
+ write##y(val, dev_priv->regs + reg); \
+}
+__i915_write(8, b)
+__i915_write(16, w)
+__i915_write(32, l)
+__i915_write(64, q)
+#undef __i915_write
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7916bd9..7d171ea 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1354,18 +1354,7 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
((reg) != FORCEWAKE))
#define __i915_read(x, y) \
-static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
- u##x val = 0; \
- if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
- gen6_gt_force_wake_get(dev_priv); \
- val = read##y(dev_priv->regs + reg); \
- gen6_gt_force_wake_put(dev_priv); \
- } else { \
- val = read##y(dev_priv->regs + reg); \
- } \
- trace_i915_reg_rw(false, reg, val, sizeof(val)); \
- return val; \
-}
+ u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
__i915_read(8, b)
__i915_read(16, w)
@@ -1374,13 +1363,8 @@ __i915_read(64, q)
#undef __i915_read
#define __i915_write(x, y) \
-static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
- trace_i915_reg_rw(true, reg, val, sizeof(val)); \
- if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
- __gen6_gt_wait_for_fifo(dev_priv); \
- } \
- write##y(val, dev_priv->regs + reg); \
-}
+ void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
+
__i915_write(8, b)
__i915_write(16, w)
__i915_write(32, l)
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH 11/12] i915: Move i915_read/write out of line
2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
@ 2011-10-15 13:49 ` Daniel Vetter
2011-10-18 5:47 ` Ben Widawsky
1 sibling, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2011-10-15 13:49 UTC (permalink / raw)
To: Andi Kleen; +Cc: linux-kernel, akpm, Andi Kleen, dri-devel
On Thu, Oct 13, 2011 at 04:08:51PM -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> With the tracing code in there they are far too big to inline.
>
> .text savings compared to a non force inline kernel:
>
> i915_restore_display 4393 12036 +7643
> i915_save_display 4295 11459 +7164
> i915_handle_error 2979 6666 +3687
> i915_driver_irq_handler 2923 5086 +2163
> i915_ringbuffer_info 458 1661 +1203
> i915_save_vga - 1200 +1200
> i915_driver_irq_uninstall 453 1624 +1171
> i915_driver_irq_postinstall 913 2078 +1165
> ironlake_enable_drps 719 1872 +1153
> i915_restore_vga - 1142 +1142
> intel_display_capture_error_state 784 2030 +1246
> intel_init_emon 719 2016 +1297
>
> and more ...
>
> [AK: these are older numbers, with the new SNB forcewake checks
> it will be even worse]
>
> Cc: keithp@keithp.com
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
--
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48
^ permalink raw reply [flat|nested] 15+ messages in thread
* Re: [PATCH 11/12] i915: Move i915_read/write out of line
2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
2011-10-15 13:49 ` Daniel Vetter
@ 2011-10-18 5:47 ` Ben Widawsky
1 sibling, 0 replies; 15+ messages in thread
From: Ben Widawsky @ 2011-10-18 5:47 UTC (permalink / raw)
To: keithp; +Cc: Andi Kleen, linux-kernel, dri-devel, akpm, Andi Kleen, intel-gfx
On Thu, 13 Oct 2011 16:08:51 -0700
Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> With the tracing code in there they are far too big to inline.
>
> .text savings compared to a non force inline kernel:
>
> i915_restore_display 4393 12036 +7643
> i915_save_display 4295 11459 +7164
> i915_handle_error 2979 6666 +3687
> i915_driver_irq_handler 2923 5086 +2163
> i915_ringbuffer_info 458 1661 +1203
> i915_save_vga - 1200 +1200
> i915_driver_irq_uninstall 453 1624 +1171
> i915_driver_irq_postinstall 913 2078 +1165
> ironlake_enable_drps 719 1872 +1153
> i915_restore_vga - 1142 +1142
> intel_display_capture_error_state 784 2030 +1246
> intel_init_emon 719 2016 +1297
>
> and more ...
>
> [AK: these are older numbers, with the new SNB forcewake checks
> it will be even worse]
>
> Cc: keithp@keithp.com
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.c | 40 +++++++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/i915_drv.h | 22 ++------------------
> 2 files changed, 43 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index f07e425..c2de142 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -895,3 +895,43 @@ module_exit(i915_exit);
> MODULE_AUTHOR(DRIVER_AUTHOR);
> MODULE_DESCRIPTION(DRIVER_DESC);
> MODULE_LICENSE("GPL and additional rights");
> +
> +/* We give fast paths for the really cool registers */
> +#define NEEDS_FORCE_WAKE(dev_priv, reg) \
> + (((dev_priv)->info->gen >= 6) && \
> + ((reg) < 0x40000) && \
> + ((reg) != FORCEWAKE))
> +
> +#define __i915_read(x, y) \
> +u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
> + u##x val = 0; \
> + if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> + gen6_gt_force_wake_get(dev_priv); \
> + val = read##y(dev_priv->regs + reg); \
> + gen6_gt_force_wake_put(dev_priv); \
> + } else { \
> + val = read##y(dev_priv->regs + reg); \
> + } \
> + trace_i915_reg_rw(false, reg, val, sizeof(val)); \
> + return val; \
> +}
> +
> +__i915_read(8, b)
> +__i915_read(16, w)
> +__i915_read(32, l)
> +__i915_read(64, q)
> +#undef __i915_read
> +
> +#define __i915_write(x, y) \
> +void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
> + trace_i915_reg_rw(true, reg, val, sizeof(val)); \
> + if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> + __gen6_gt_wait_for_fifo(dev_priv); \
> + } \
> + write##y(val, dev_priv->regs + reg); \
> +}
> +__i915_write(8, b)
> +__i915_write(16, w)
> +__i915_write(32, l)
> +__i915_write(64, q)
> +#undef __i915_write
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7916bd9..7d171ea 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1354,18 +1354,7 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
> ((reg) != FORCEWAKE))
>
> #define __i915_read(x, y) \
> -static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
> - u##x val = 0; \
> - if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> - gen6_gt_force_wake_get(dev_priv); \
> - val = read##y(dev_priv->regs + reg); \
> - gen6_gt_force_wake_put(dev_priv); \
> - } else { \
> - val = read##y(dev_priv->regs + reg); \
> - } \
> - trace_i915_reg_rw(false, reg, val, sizeof(val)); \
> - return val; \
> -}
> + u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
>
> __i915_read(8, b)
> __i915_read(16, w)
> @@ -1374,13 +1363,8 @@ __i915_read(64, q)
> #undef __i915_read
>
> #define __i915_write(x, y) \
> -static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
> - trace_i915_reg_rw(true, reg, val, sizeof(val)); \
> - if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> - __gen6_gt_wait_for_fifo(dev_priv); \
> - } \
> - write##y(val, dev_priv->regs + reg); \
> -}
> + void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
> +
> __i915_write(8, b)
> __i915_write(16, w)
> __i915_write(32, l)
Acked-by: Ben Widawsky <ben@bwidawsk.net>
The forcewake increased size should have been fixed a bit with the
forcewake struct encapsulation patch I posted to intel-gfx mailing list.
Keith, if you take this, could you also look into that patch?
<1315951648-5380-1-git-send-email-ben@bwidawsk.net>
^ permalink raw reply [flat|nested] 15+ messages in thread
* [PATCH 12/12] Force always inline for gcc 4.5 when optimizing for size
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (9 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
2011-10-18 8:59 ` [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c David Airlie
11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
I found that gcc 4.5 didn't inline a lot of inlines with
CONFIG_OPTIMIZE_INLINING and CONFIG_CC_OPTIMIZE_FOR_SIZE. It was quite
common to have very small inlines to be out of line, or worse inline
statics in include files to be out of line with a copy for every file
using it too.
This is handily visible in a function graph trace for might_fault:
10) | might_fault() {
10) | _cond_resched() {
10) | should_resched() {
10) | need_resched() {
10) 0.063 us | test_ti_thread_flag();
10) 0.643 us | }
10) 1.238 us | }
10) 1.845 us | }
10) 2.438 us | }
Note all of these functions are very small and should be definitely
inlined in each other. In many cases even copy_from_user
ends up out of line now which is really bad!
If I switch to -O2 it is also not quite as bad, but since a lot
of people use -Os I was trying to fix it up.
So this patch forces inlining with gcc 4.5 with -Os.
Unfortunately it costs some code size with just this patch.
text data bss dec hex filename
11507035 1940276 1191936 14639247 df608f vmlinux-O2
10189858 1908124 1187840 13285822 cab9be vmlinux-Os-force
9808525 1940204 1187840 12936569 c56579 vmlinux-Os-orig
It turned out most of this was because of unnecessary inlines.
With a lot of inlines removed I now get:
11175824 1977200 1191936 14344960 dae300 vmlinux-inlines-removed-no-optimize
11642530 2018416 1191936 14852882 e2a312 vmlinux-master-no-optimize
11530439 2001264 1191936 14723639 e0aa37 vmlinux-master-optimize
which is significantly smaller.
I haven't tested earlier gcc 4.x versions, but they may need
the same treatment.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
include/linux/compiler-gcc.h | 5 ++++-
1 files changed, 4 insertions(+), 1 deletions(-)
diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 59e4028..e477a7c 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -44,9 +44,12 @@
/*
* Force always-inline if the user requests it so via the .config,
* or if gcc is too old:
+ * When optimizing for size on gcc 4.5 always force inlining too.
*/
#if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
- !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
+ !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) || \
+ (defined(CONFIG_CC_OPTIMIZE_FOR_SIZE) && \
+ (__GNUC__ == 4 && __GNUC_MINOR__ == 5))
# define inline inline __attribute__((always_inline))
# define __inline__ __inline__ __attribute__((always_inline))
# define __inline __inline __attribute__((always_inline))
--
1.7.4.4
^ permalink raw reply related [flat|nested] 15+ messages in thread* Re: [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
` (10 preceding siblings ...)
2011-10-13 23:08 ` [PATCH 12/12] Force always inline for gcc 4.5 when optimizing for size Andi Kleen
@ 2011-10-18 8:59 ` David Airlie
11 siblings, 0 replies; 15+ messages in thread
From: David Airlie @ 2011-10-18 8:59 UTC (permalink / raw)
To: Andi Kleen; +Cc: akpm, Andi Kleen, dri-devel, linux-kernel
>
> From: Andi Kleen <ak@linux.intel.com>
Hi Andi,
I've merged all the RADEON: patches to drm-next locally, with one minor change (moving some of the out-of-lines to a more appropriate place).
Thanks,
Dave.
^ permalink raw reply [flat|nested] 15+ messages in thread