public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c
@ 2011-10-13 23:08 Andi Kleen
  2011-10-13 23:08 ` [PATCH 02/12] RADEON: Move r100_*_*reg out of line Andi Kleen
                   ` (11 more replies)
  0 siblings, 12 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie

From: Andi Kleen <ak@linux.intel.com>

Fixes

evergreen_cs_parse                          4080   23124  +19044

and others compared to a non force inline kernel.

Cc: David Airlie <airlied@linux.ie>

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/gpu/drm/radeon/evergreen_cs.c |    8 ++++----
 drivers/gpu/drm/radeon/r600_cs.c      |   24 ++++++++++++------------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index a134790..35dce99 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -122,7 +122,7 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 	track->db_s_write_bo = NULL;
 }
 
-static inline int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
+static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 {
 	/* XXX fill in */
 	return 0;
@@ -242,7 +242,7 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
  * Check next packet is relocation packet3, do bo validation and compute
  * GPU offset using the provided start.
  **/
-static inline int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
+static int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 {
 	struct radeon_cs_packet p3reloc;
 	int r;
@@ -414,7 +414,7 @@ static int evergreen_cs_parse_packet0(struct radeon_cs_parser *p,
  * if register is safe. If register is not flag as safe this function
  * will test it against a list of register needind special handling.
  */
-static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct evergreen_cs_track *track = (struct evergreen_cs_track *)p->track;
 	struct radeon_cs_reloc *reloc;
@@ -990,7 +990,7 @@ static inline int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u3
  * This function will check that the resource has valid field and that
  * the texture and mipmap bo object are big enough to cover this resource.
  */
-static inline int evergreen_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+static int evergreen_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 						   struct radeon_bo *texture,
 						   struct radeon_bo *mipmap)
 {
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index cf83aa0..7339c0b 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -162,7 +162,7 @@ static const struct gpu_formats color_formats_table[] = {
 	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
 };
 
-static inline bool fmt_is_valid_color(u32 format)
+static bool fmt_is_valid_color(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
@@ -173,7 +173,7 @@ static inline bool fmt_is_valid_color(u32 format)
 	return false;
 }
 
-static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
+static bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
@@ -187,7 +187,7 @@ static inline bool fmt_is_valid_texture(u32 format, enum radeon_family family)
 	return false;
 }
 
-static inline int fmt_get_blocksize(u32 format)
+static int fmt_get_blocksize(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return 0;
@@ -195,7 +195,7 @@ static inline int fmt_get_blocksize(u32 format)
 	return color_formats_table[format].blocksize;
 }
 
-static inline int fmt_get_nblocksx(u32 format, u32 w)
+static int fmt_get_nblocksx(u32 format, u32 w)
 {
 	unsigned bw;
 
@@ -209,7 +209,7 @@ static inline int fmt_get_nblocksx(u32 format, u32 w)
 	return (w + bw - 1) / bw;
 }
 
-static inline int fmt_get_nblocksy(u32 format, u32 h)
+static int fmt_get_nblocksy(u32 format, u32 h)
 {
 	unsigned bh;
 
@@ -223,7 +223,7 @@ static inline int fmt_get_nblocksy(u32 format, u32 h)
 	return (h + bh - 1) / bh;
 }
 
-static inline int r600_bpe_from_format(u32 *bpe, u32 format)
+static int r600_bpe_from_format(u32 *bpe, u32 format)
 {
  	unsigned res;
 
@@ -252,7 +252,7 @@ struct array_mode_checker {
 };
 
 /* returns alignment in pixels for pitch/height/depth and bytes for base */
-static inline int r600_get_array_mode_alignment(struct array_mode_checker *values,
+static int r600_get_array_mode_alignment(struct array_mode_checker *values,
 						u32 *pitch_align,
 						u32 *height_align,
 						u32 *depth_align,
@@ -331,7 +331,7 @@ static void r600_cs_track_init(struct r600_cs_track *track)
 	track->db_depth_control = 0xFFFFFFFF;
 }
 
-static inline int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
+static int r600_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
 {
 	struct r600_cs_track *track = p->track;
 	u32 slice_tile_max, size, tmp;
@@ -737,7 +737,7 @@ static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
  * Check next packet is relocation packet3, do bo validation and compute
  * GPU offset using the provided start.
  **/
-static inline int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
+static int r600_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
 {
 	struct radeon_cs_packet p3reloc;
 	int r;
@@ -911,7 +911,7 @@ static int r600_cs_parse_packet0(struct radeon_cs_parser *p,
  * if register is safe. If register is not flag as safe this function
  * will test it against a list of register needind special handling.
  */
-static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
+static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 {
 	struct r600_cs_track *track = (struct r600_cs_track *)p->track;
 	struct radeon_cs_reloc *reloc;
@@ -1215,7 +1215,7 @@ static inline int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx
 	return 0;
 }
 
-static inline unsigned mip_minify(unsigned size, unsigned level)
+static unsigned mip_minify(unsigned size, unsigned level)
 {
 	unsigned val;
 
@@ -1285,7 +1285,7 @@ static void r600_texture_size(unsigned nfaces, unsigned blevel, unsigned llevel,
  * This function will check that the resource has valid field and that
  * the texture and mipmap bo object are big enough to cover this resource.
  */
-static inline int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
+static int r600_check_texture_resource(struct radeon_cs_parser *p,  u32 idx,
 					      struct radeon_bo *texture,
 					      struct radeon_bo *mipmap,
 					      u64 base_offset,
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 02/12] RADEON: Move r100_*_*reg out of line
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 03/12] RADEON: drop inlines in r600_blit.c Andi Kleen
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie

From: Andi Kleen <ak@linux.intel.com>

This shrinks the sizes of a lot of functions in the radeon driver
dramatically.

With a non force inline + -Os kernel this is default anyways.

Cc: David Airlie <airlied@linux.ie>

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/gpu/drm/radeon/r100.c   |   40 ++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon.h |   43 +++-----------------------------------
 2 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 7fcdbbb..9a1efac 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3965,3 +3965,43 @@ int r100_init(struct radeon_device *rdev)
 	}
 	return 0;
 }
+
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
+{
+	if (reg < rdev->rmmio_size)
+		return readl(((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		return readl(((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
+{
+	if (reg < rdev->rmmio_size)
+		writel(v, ((void __iomem *)rdev->rmmio) + reg);
+	else {
+		writel(reg, ((void __iomem *)rdev->rmmio) + RADEON_MM_INDEX);
+		writel(v, ((void __iomem *)rdev->rmmio) + RADEON_MM_DATA);
+	}
+}
+
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
+{
+	if (reg < rdev->rio_mem_size)
+		return ioread32(rdev->rio_mem + reg);
+	else {
+		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
+	}
+}
+
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
+{
+	if (reg < rdev->rio_mem_size)
+		iowrite32(v, rdev->rio_mem + reg);
+	else {
+		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
+		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
+	}
+}
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index c1e056b..8ac6cba 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1252,45 +1252,10 @@ int radeon_device_init(struct radeon_device *rdev,
 void radeon_device_fini(struct radeon_device *rdev);
 int radeon_gpu_wait_for_idle(struct radeon_device *rdev);
 
-static inline uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg)
-{
-	if (reg < rdev->rmmio_size)
-		return readl((rdev->rmmio) + reg);
-	else {
-		writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
-		return readl((rdev->rmmio) + RADEON_MM_DATA);
-	}
-}
-
-static inline void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v)
-{
-	if (reg < rdev->rmmio_size)
-		writel(v, (rdev->rmmio) + reg);
-	else {
-		writel(reg, (rdev->rmmio) + RADEON_MM_INDEX);
-		writel(v, (rdev->rmmio) + RADEON_MM_DATA);
-	}
-}
-
-static inline u32 r100_io_rreg(struct radeon_device *rdev, u32 reg)
-{
-	if (reg < rdev->rio_mem_size)
-		return ioread32(rdev->rio_mem + reg);
-	else {
-		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
-		return ioread32(rdev->rio_mem + RADEON_MM_DATA);
-	}
-}
-
-static inline void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
-{
-	if (reg < rdev->rio_mem_size)
-		iowrite32(v, rdev->rio_mem + reg);
-	else {
-		iowrite32(reg, rdev->rio_mem + RADEON_MM_INDEX);
-		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
-	}
-}
+uint32_t r100_mm_rreg(struct radeon_device *rdev, uint32_t reg);
+void r100_mm_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
+u32 r100_io_rreg(struct radeon_device *rdev, u32 reg);
+void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v);
 
 /*
  * Cast helper
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 03/12] RADEON: drop inlines in r600_blit.c
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
  2011-10-13 23:08 ` [PATCH 02/12] RADEON: Move r100_*_*reg out of line Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 04/12] RADEON: Remove now unused functions in radeon driver Andi Kleen
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie

From: Andi Kleen <ak@linux.intel.com>

Cc: David Airlie <airlied@linux.ie>

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/gpu/drm/radeon/r600_blit.c |   24 ++++++++++++------------
 1 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r600_blit.c b/drivers/gpu/drm/radeon/r600_blit.c
index 7f10434..3c031a4 100644
--- a/drivers/gpu/drm/radeon/r600_blit.c
+++ b/drivers/gpu/drm/radeon/r600_blit.c
@@ -41,7 +41,7 @@
 #define COLOR_5_6_5           0x8
 #define COLOR_8_8_8_8         0x1a
 
-static inline void
+static void
 set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64 gpu_addr)
 {
 	u32 cb_color_info;
@@ -99,7 +99,7 @@ set_render_target(drm_radeon_private_t *dev_priv, int format, int w, int h, u64
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 		    u32 sync_type, u32 size, u64 mc_addr)
 {
@@ -121,7 +121,7 @@ cp_set_surface_sync(drm_radeon_private_t *dev_priv,
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 set_shaders(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
@@ -184,7 +184,7 @@ set_shaders(struct drm_device *dev)
 			    R600_SH_ACTION_ENA, 512, gpu_addr);
 }
 
-static inline void
+static void
 set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 {
 	uint32_t sq_vtx_constant_word2;
@@ -220,7 +220,7 @@ set_vtx_resource(drm_radeon_private_t *dev_priv, u64 gpu_addr)
 				    R600_VC_ACTION_ENA, 48, gpu_addr);
 }
 
-static inline void
+static void
 set_tex_resource(drm_radeon_private_t *dev_priv,
 		 int format, int w, int h, int pitch, u64 gpu_addr)
 {
@@ -258,7 +258,7 @@ set_tex_resource(drm_radeon_private_t *dev_priv,
 
 }
 
-static inline void
+static void
 set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 {
 	RING_LOCALS;
@@ -282,7 +282,7 @@ set_scissors(drm_radeon_private_t *dev_priv, int x1, int y1, int x2, int y2)
 	ADVANCE_RING();
 }
 
-static inline void
+static void
 draw_auto(drm_radeon_private_t *dev_priv)
 {
 	RING_LOCALS;
@@ -311,7 +311,7 @@ draw_auto(drm_radeon_private_t *dev_priv)
 	COMMIT_RING();
 }
 
-static inline void
+static void
 set_default_state(drm_radeon_private_t *dev_priv)
 {
 	int i;
@@ -489,7 +489,7 @@ set_default_state(drm_radeon_private_t *dev_priv)
 	ADVANCE_RING();
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
@@ -515,7 +515,7 @@ static inline uint32_t i2f(uint32_t input)
 }
 
 
-static inline int r600_nomm_get_vb(struct drm_device *dev)
+static int r600_nomm_get_vb(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	dev_priv->blit_vb = radeon_freelist_get(dev);
@@ -526,7 +526,7 @@ static inline int r600_nomm_get_vb(struct drm_device *dev)
 	return 0;
 }
 
-static inline void r600_nomm_put_vb(struct drm_device *dev)
+static void r600_nomm_put_vb(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 
@@ -534,7 +534,7 @@ static inline void r600_nomm_put_vb(struct drm_device *dev)
 	radeon_cp_discard_buffer(dev, dev_priv->blit_vb->file_priv->master, dev_priv->blit_vb);
 }
 
-static inline void *r600_nomm_get_vb_ptr(struct drm_device *dev)
+static void *r600_nomm_get_vb_ptr(struct drm_device *dev)
 {
 	drm_radeon_private_t *dev_priv = dev->dev_private;
 	return (((char *)dev->agp_buffer_map->handle +
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 04/12] RADEON: Remove now unused functions in radeon driver
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
  2011-10-13 23:08 ` [PATCH 02/12] RADEON: Move r100_*_*reg out of line Andi Kleen
  2011-10-13 23:08 ` [PATCH 03/12] RADEON: drop inlines in r600_blit.c Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 05/12] FB_ATY: Move register accesses out of line Andi Kleen
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie

From: Andi Kleen <ak@linux.intel.com>

With the dropped inlines gccs starts warning about genuinely unused
functions. Remove r600_bpe_from_format, evergreen_cs_track_validate_cb,
evergreen-cs_packet_next_is_pkt3_nop which are all unused.

Cc: David Airlie <airlied@linux.ie>

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/gpu/drm/radeon/evergreen_cs.c |   28 ----------------------------
 drivers/gpu/drm/radeon/r600_cs.c      |   19 -------------------
 2 files changed, 0 insertions(+), 47 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index 35dce99..7fdfa8e 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -122,12 +122,6 @@ static void evergreen_cs_track_init(struct evergreen_cs_track *track)
 	track->db_s_write_bo = NULL;
 }
 
-static int evergreen_cs_track_validate_cb(struct radeon_cs_parser *p, int i)
-{
-	/* XXX fill in */
-	return 0;
-}
-
 static int evergreen_cs_track_check(struct radeon_cs_parser *p)
 {
 	struct evergreen_cs_track *track = p->track;
@@ -236,28 +230,6 @@ static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser *p,
 }
 
 /**
- * evergreen_cs_packet_next_is_pkt3_nop() - test if next packet is packet3 nop for reloc
- * @parser:		parser structure holding parsing context.
- *
- * Check next packet is relocation packet3, do bo validation and compute
- * GPU offset using the provided start.
- **/
-static int evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
-{
-	struct radeon_cs_packet p3reloc;
-	int r;
-
-	r = evergreen_cs_packet_parse(p, &p3reloc, p->idx);
-	if (r) {
-		return 0;
-	}
-	if (p3reloc.type != PACKET_TYPE3 || p3reloc.opcode != PACKET3_NOP) {
-		return 0;
-	}
-	return 1;
-}
-
-/**
  * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
  * @parser:		parser structure holding parsing context.
  *
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 7339c0b..0a2e023 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -223,25 +223,6 @@ static int fmt_get_nblocksy(u32 format, u32 h)
 	return (h + bh - 1) / bh;
 }
 
-static int r600_bpe_from_format(u32 *bpe, u32 format)
-{
- 	unsigned res;
-
-	if (format >= ARRAY_SIZE(color_formats_table))
-		goto fail;
-
-	res = color_formats_table[format].blocksize;
-	if (res == 0)
-		goto fail;
-
-	*bpe = res;
-	return 0;
-
-fail:
-	*bpe = 16;
-	return -EINVAL;
-}
-
 struct array_mode_checker {
 	int array_mode;
 	u32 group_size;
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 05/12] FB_ATY: Move register accesses out of line
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (2 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 04/12] RADEON: Remove now unused functions in radeon driver Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 06/12] RADEON: Remove more bogus inlines in the radeon driver Andi Kleen
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, David Airlie, benh

From: Andi Kleen <ak@linux.intel.com>

This fixes size regressions like

radeon_set_suspend                          1724    7873   +6149
radeon_reinitialize_M10                     3974    9285   +5311
radeon_pm_disable_dynamic_mode               868    6125   +5257
radeon_pm_enable_dynamic_mode                985    6065   +5080
radeon_pm_start_mclk_sclk                      -    4614   +4614
radeon_write_mode                           1252    5377   +4125

among others compared to a non force inline kernel.

Cc: David Airlie <airlied@linux.ie>
Cc: benh@kernel.crashing.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/video/aty/radeon_accel.c |   88 ++++++++++++++++++++++++++++++++++
 drivers/video/aty/radeonfb.h     |   96 +++-----------------------------------
 2 files changed, 95 insertions(+), 89 deletions(-)

diff --git a/drivers/video/aty/radeon_accel.c b/drivers/video/aty/radeon_accel.c
index a469a3d..0f1e367 100644
--- a/drivers/video/aty/radeon_accel.c
+++ b/drivers/video/aty/radeon_accel.c
@@ -326,3 +326,91 @@ void radeonfb_engine_init (struct radeonfb_info *rinfo)
 
 	radeon_engine_idle ();
 }
+
+
+void _OUTREGP(struct radeonfb_info *rinfo, u32 addr, u32 val, u32 mask)
+{
+	unsigned long flags;
+	unsigned int tmp;
+
+	spin_lock_irqsave(&rinfo->reg_lock, flags);
+	tmp = INREG(addr);
+	tmp &= (mask);
+	tmp |= (val);
+	OUTREG(addr, tmp);
+	spin_unlock_irqrestore(&rinfo->reg_lock, flags);
+}
+
+
+/*
+ * Note about PLL register accesses:
+ *
+ * I have removed the spinlock on them on purpose. The driver now
+ * expects that it will only manipulate the PLL registers in normal
+ * task environment, where radeon_msleep() will be called, protected
+ * by a semaphore (currently the console semaphore) so that no conflict
+ * will happen on the PLL register index.
+ *
+ * With the latest changes to the VT layer, this is guaranteed for all
+ * calls except the actual drawing/blits which aren't supposed to use
+ * the PLL registers anyway
+ *
+ * This is very important for the workarounds to work properly. The only
+ * possible exception to this rule is the call to unblank(), which may
+ * be done at irq time if an oops is in progress.
+ */
+void radeon_pll_errata_after_index(struct radeonfb_info *rinfo)
+{
+	if (!(rinfo->errata & CHIP_ERRATA_PLL_DUMMYREADS))
+		return;
+
+	(void)INREG(CLOCK_CNTL_DATA);
+	(void)INREG(CRTC_GEN_CNTL);
+}
+
+void radeon_pll_errata_after_data(struct radeonfb_info *rinfo)
+{
+	if (rinfo->errata & CHIP_ERRATA_PLL_DELAY) {
+		/* we can't deal with posted writes here ... */
+		_radeon_msleep(rinfo, 5);
+	}
+	if (rinfo->errata & CHIP_ERRATA_R300_CG) {
+		u32 save, tmp;
+		save = INREG(CLOCK_CNTL_INDEX);
+		tmp = save & ~(0x3f | PLL_WR_EN);
+		OUTREG(CLOCK_CNTL_INDEX, tmp);
+		tmp = INREG(CLOCK_CNTL_DATA);
+		OUTREG(CLOCK_CNTL_INDEX, save);
+	}
+}
+
+u32 __INPLL(struct radeonfb_info *rinfo, u32 addr)
+{
+	u32 data;
+
+	OUTREG8(CLOCK_CNTL_INDEX, addr & 0x0000003f);
+	radeon_pll_errata_after_index(rinfo);
+	data = INREG(CLOCK_CNTL_DATA);
+	radeon_pll_errata_after_data(rinfo);
+	return data;
+}
+
+void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index, u32 val)
+{
+
+	OUTREG8(CLOCK_CNTL_INDEX, (index & 0x0000003f) | 0x00000080);
+	radeon_pll_errata_after_index(rinfo);
+	OUTREG(CLOCK_CNTL_DATA, val);
+	radeon_pll_errata_after_data(rinfo);
+}
+
+void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index, u32 val, u32 mask)
+{
+	unsigned int tmp;
+
+	tmp  = __INPLL(rinfo, index);
+	tmp &= (mask);
+	tmp |= (val);
+	__OUTPLL(rinfo, index, tmp);
+}
+
diff --git a/drivers/video/aty/radeonfb.h b/drivers/video/aty/radeonfb.h
index 7351e66..cde9c2e 100644
--- a/drivers/video/aty/radeonfb.h
+++ b/drivers/video/aty/radeonfb.h
@@ -393,98 +393,16 @@ static inline void _radeon_msleep(struct radeonfb_info *rinfo, unsigned long ms)
 #define INREG(addr)		readl((rinfo->mmio_base)+addr)
 #define OUTREG(addr,val)	writel(val, (rinfo->mmio_base)+addr)
 
-static inline void _OUTREGP(struct radeonfb_info *rinfo, u32 addr,
-		       u32 val, u32 mask)
-{
-	unsigned long flags;
-	unsigned int tmp;
-
-	spin_lock_irqsave(&rinfo->reg_lock, flags);
-	tmp = INREG(addr);
-	tmp &= (mask);
-	tmp |= (val);
-	OUTREG(addr, tmp);
-	spin_unlock_irqrestore(&rinfo->reg_lock, flags);
-}
+void _OUTREGP(struct radeonfb_info *rinfo, u32 addr, u32 val, u32 mask);
+void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index,
+			     u32 val, u32 mask);
+void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index, u32 val);
+u32 __INPLL(struct radeonfb_info *rinfo, u32 addr);
+void radeon_pll_errata_after_data(struct radeonfb_info *rinfo);
+void radeon_pll_errata_after_index(struct radeonfb_info *rinfo);
 
 #define OUTREGP(addr,val,mask)	_OUTREGP(rinfo, addr, val,mask)
 
-/*
- * Note about PLL register accesses:
- *
- * I have removed the spinlock on them on purpose. The driver now
- * expects that it will only manipulate the PLL registers in normal
- * task environment, where radeon_msleep() will be called, protected
- * by a semaphore (currently the console semaphore) so that no conflict
- * will happen on the PLL register index.
- *
- * With the latest changes to the VT layer, this is guaranteed for all
- * calls except the actual drawing/blits which aren't supposed to use
- * the PLL registers anyway
- *
- * This is very important for the workarounds to work properly. The only
- * possible exception to this rule is the call to unblank(), which may
- * be done at irq time if an oops is in progress.
- */
-static inline void radeon_pll_errata_after_index(struct radeonfb_info *rinfo)
-{
-	if (!(rinfo->errata & CHIP_ERRATA_PLL_DUMMYREADS))
-		return;
-
-	(void)INREG(CLOCK_CNTL_DATA);
-	(void)INREG(CRTC_GEN_CNTL);
-}
-
-static inline void radeon_pll_errata_after_data(struct radeonfb_info *rinfo)
-{
-	if (rinfo->errata & CHIP_ERRATA_PLL_DELAY) {
-		/* we can't deal with posted writes here ... */
-		_radeon_msleep(rinfo, 5);
-	}
-	if (rinfo->errata & CHIP_ERRATA_R300_CG) {
-		u32 save, tmp;
-		save = INREG(CLOCK_CNTL_INDEX);
-		tmp = save & ~(0x3f | PLL_WR_EN);
-		OUTREG(CLOCK_CNTL_INDEX, tmp);
-		tmp = INREG(CLOCK_CNTL_DATA);
-		OUTREG(CLOCK_CNTL_INDEX, save);
-	}
-}
-
-static inline u32 __INPLL(struct radeonfb_info *rinfo, u32 addr)
-{
-	u32 data;
-
-	OUTREG8(CLOCK_CNTL_INDEX, addr & 0x0000003f);
-	radeon_pll_errata_after_index(rinfo);
-	data = INREG(CLOCK_CNTL_DATA);
-	radeon_pll_errata_after_data(rinfo);
-	return data;
-}
-
-static inline void __OUTPLL(struct radeonfb_info *rinfo, unsigned int index,
-			    u32 val)
-{
-
-	OUTREG8(CLOCK_CNTL_INDEX, (index & 0x0000003f) | 0x00000080);
-	radeon_pll_errata_after_index(rinfo);
-	OUTREG(CLOCK_CNTL_DATA, val);
-	radeon_pll_errata_after_data(rinfo);
-}
-
-
-static inline void __OUTPLLP(struct radeonfb_info *rinfo, unsigned int index,
-			     u32 val, u32 mask)
-{
-	unsigned int tmp;
-
-	tmp  = __INPLL(rinfo, index);
-	tmp &= (mask);
-	tmp |= (val);
-	__OUTPLL(rinfo, index, tmp);
-}
-
-
 #define INPLL(addr)			__INPLL(rinfo, addr)
 #define OUTPLL(index, val)		__OUTPLL(rinfo, index, val)
 #define OUTPLLP(index, val, mask)	__OUTPLLP(rinfo, index, val, mask)
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 06/12] RADEON: Remove more bogus inlines in the radeon driver.
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (3 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 05/12] FB_ATY: Move register accesses out of line Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 07/12] RADEON: Move more code out of line Andi Kleen
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, airlied

From: Andi Kleen <ak@linux.intel.com>

Remove bogus inlines in evergreen and r100.

Cc: airlied@linux.ie
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/gpu/drm/radeon/evergreen.c          |    4 +-
 drivers/gpu/drm/radeon/evergreen_blit_kms.c |    2 +-
 drivers/gpu/drm/radeon/r100.c               |  106 +++++++++++++++++++++++++-
 drivers/gpu/drm/radeon/r100_track.h         |  110 ++-------------------------
 4 files changed, 114 insertions(+), 108 deletions(-)

diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index c4ffa14f..1c5cd09 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -2586,7 +2586,7 @@ int evergreen_irq_set(struct radeon_device *rdev)
 	return 0;
 }
 
-static inline void evergreen_irq_ack(struct radeon_device *rdev)
+static void evergreen_irq_ack(struct radeon_device *rdev)
 {
 	u32 tmp;
 
@@ -2697,7 +2697,7 @@ void evergreen_irq_suspend(struct radeon_device *rdev)
 	r600_rlc_stop(rdev);
 }
 
-static inline u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
+static u32 evergreen_get_ih_wptr(struct radeon_device *rdev)
 {
 	u32 wptr, tmp;
 
diff --git a/drivers/gpu/drm/radeon/evergreen_blit_kms.c b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
index 2eb2518..7eb78b3 100644
--- a/drivers/gpu/drm/radeon/evergreen_blit_kms.c
+++ b/drivers/gpu/drm/radeon/evergreen_blit_kms.c
@@ -584,7 +584,7 @@ set_default_state(struct radeon_device *rdev)
 
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 9a1efac..6720929 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -68,6 +68,108 @@ MODULE_FIRMWARE(FIRMWARE_R520);
  * r100,rv100,rs100,rv200,rs200,r200,rv250,rs300,rv280
  */
 
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg)
+{
+	int r;
+	u32 tile_flags = 0;
+	u32 tmp;
+	struct radeon_cs_reloc *reloc;
+	u32 value;
+
+	r = r100_cs_packet_next_reloc(p, &reloc);
+	if (r) {
+		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
+			  idx, reg);
+		r100_cs_dump_packet(p, pkt);
+		return r;
+	}
+	value = radeon_get_ib_value(p, idx);
+	tmp = value & 0x003fffff;
+	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+
+	if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+		tile_flags |= RADEON_DST_TILE_MACRO;
+	if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+		if (reg == RADEON_SRC_PITCH_OFFSET) {
+			DRM_ERROR("Cannot src blit from microtiled surface\n");
+			r100_cs_dump_packet(p, pkt);
+			return -EINVAL;
+		}
+		tile_flags |= RADEON_DST_TILE_MICRO;
+	}
+
+	tmp |= tile_flags;
+	p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
+	return 0;
+}
+
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx)
+{
+	unsigned c, i;
+	struct radeon_cs_reloc *reloc;
+	struct r100_cs_track *track;
+	int r = 0;
+	volatile uint32_t *ib;
+	u32 idx_value;
+
+	ib = p->ib->ptr;
+	track = (struct r100_cs_track *)p->track;
+	c = radeon_get_ib_value(p, idx++) & 0x1F;
+	if (c > 16) {
+	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
+		      pkt->opcode);
+	    r100_cs_dump_packet(p, pkt);
+	    return -EINVAL;
+	}
+	track->num_arrays = c;
+	for (i = 0; i < (c - 1); i+=2, idx+=3) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize &= 0x7F;
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+				  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 1].robj = reloc->robj;
+		track->arrays[i + 1].esize = idx_value >> 24;
+		track->arrays[i + 1].esize &= 0x7F;
+	}
+	if (c & 1) {
+		r = r100_cs_packet_next_reloc(p, &reloc);
+		if (r) {
+			DRM_ERROR("No reloc for packet3 %d\n",
+					  pkt->opcode);
+			r100_cs_dump_packet(p, pkt);
+			return r;
+		}
+		idx_value = radeon_get_ib_value(p, idx);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+		track->arrays[i + 0].robj = reloc->robj;
+		track->arrays[i + 0].esize = idx_value >> 8;
+		track->arrays[i + 0].esize &= 0x7F;
+	}
+	return r;
+}
+
 void r100_pre_page_flip(struct radeon_device *rdev, int crtc)
 {
 	/* enable the pflip int */
@@ -588,7 +690,7 @@ void r100_irq_disable(struct radeon_device *rdev)
 	WREG32(R_000044_GEN_INT_STATUS, tmp);
 }
 
-static inline uint32_t r100_irq_ack(struct radeon_device *rdev)
+static uint32_t r100_irq_ack(struct radeon_device *rdev)
 {
 	uint32_t irqs = RREG32(RADEON_GEN_INT_STATUS);
 	uint32_t irq_mask = RADEON_SW_INT_TEST |
@@ -3147,7 +3249,7 @@ void r100_bandwidth_update(struct radeon_device *rdev)
 	}
 }
 
-static inline void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
+static void r100_cs_track_texture_print(struct r100_cs_track_texture *t)
 {
 	DRM_ERROR("pitch                      %d\n", t->pitch);
 	DRM_ERROR("use_pitch                  %d\n", t->use_pitch);
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index 686f9dc..6a603b3 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -92,106 +92,10 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 		       struct radeon_cs_packet *pkt,
 		       unsigned idx, unsigned reg);
 
-
-
-static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
-					  struct radeon_cs_packet *pkt,
-					  unsigned idx,
-					  unsigned reg)
-{
-	int r;
-	u32 tile_flags = 0;
-	u32 tmp;
-	struct radeon_cs_reloc *reloc;
-	u32 value;
-
-	r = r100_cs_packet_next_reloc(p, &reloc);
-	if (r) {
-		DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
-			  idx, reg);
-		r100_cs_dump_packet(p, pkt);
-		return r;
-	}
-	value = radeon_get_ib_value(p, idx);
-	tmp = value & 0x003fffff;
-	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
-
-	if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
-		tile_flags |= RADEON_DST_TILE_MACRO;
-	if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
-		if (reg == RADEON_SRC_PITCH_OFFSET) {
-			DRM_ERROR("Cannot src blit from microtiled surface\n");
-			r100_cs_dump_packet(p, pkt);
-			return -EINVAL;
-		}
-		tile_flags |= RADEON_DST_TILE_MICRO;
-	}
-
-	tmp |= tile_flags;
-	p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
-	return 0;
-}
-
-static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
-					   struct radeon_cs_packet *pkt,
-					   int idx)
-{
-	unsigned c, i;
-	struct radeon_cs_reloc *reloc;
-	struct r100_cs_track *track;
-	int r = 0;
-	volatile uint32_t *ib;
-	u32 idx_value;
-
-	ib = p->ib->ptr;
-	track = (struct r100_cs_track *)p->track;
-	c = radeon_get_ib_value(p, idx++) & 0x1F;
-	if (c > 16) {
-	    DRM_ERROR("Only 16 vertex buffers are allowed %d\n",
-		      pkt->opcode);
-	    r100_cs_dump_packet(p, pkt);
-	    return -EINVAL;
-	}
-	track->num_arrays = c;
-	for (i = 0; i < (c - 1); i+=2, idx+=3) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize &= 0x7F;
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-				  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 1].robj = reloc->robj;
-		track->arrays[i + 1].esize = idx_value >> 24;
-		track->arrays[i + 1].esize &= 0x7F;
-	}
-	if (c & 1) {
-		r = r100_cs_packet_next_reloc(p, &reloc);
-		if (r) {
-			DRM_ERROR("No reloc for packet3 %d\n",
-					  pkt->opcode);
-			r100_cs_dump_packet(p, pkt);
-			return r;
-		}
-		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
-		track->arrays[i + 0].robj = reloc->robj;
-		track->arrays[i + 0].esize = idx_value >> 8;
-		track->arrays[i + 0].esize &= 0x7F;
-	}
-	return r;
-}
+int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
+			    struct radeon_cs_packet *pkt,
+			    unsigned idx,
+			    unsigned reg);
+int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+			     struct radeon_cs_packet *pkt,
+			     int idx);
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 07/12] RADEON: Move more code out of line
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (4 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 06/12] RADEON: Remove more bogus inlines in the radeon driver Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 08/12] X86: Move alloc_intr_gate " Andi Kleen
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, airlied

From: Andi Kleen <ak@linux.intel.com>

With this patch I'm only about 50k larger with DRM debugging
enables (why is that enabled by default?!?), and slightly
smaller without.

Cc: airlied@linux.ie
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/gpu/drm/radeon/r100.c             |   39 ++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/r300_cmdbuf.c      |    2 +-
 drivers/gpu/drm/radeon/r600.c             |    4 +-
 drivers/gpu/drm/radeon/r600_blit_kms.c    |    2 +-
 drivers/gpu/drm/radeon/radeon.h           |   39 +++++----------------------
 drivers/gpu/drm/radeon/radeon_atombios.c  |    4 +-
 drivers/gpu/drm/radeon/radeon_irq.c       |    2 +-
 drivers/gpu/drm/radeon/radeon_legacy_tv.c |    2 +-
 drivers/gpu/drm/radeon/radeon_object.c    |   41 ++++++++++++++++++++++++++++
 drivers/gpu/drm/radeon/radeon_object.h    |   42 ++--------------------------
 drivers/gpu/drm/radeon/radeon_state.c     |   16 +++++-----
 11 files changed, 106 insertions(+), 87 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 6720929..1bbed1f 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -4107,3 +4107,42 @@ void r100_io_wreg(struct radeon_device *rdev, u32 reg, u32 v)
 		iowrite32(v, rdev->rio_mem + RADEON_MM_DATA);
 	}
 }
+
+/* Better place? */
+u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
+{
+	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+	u32 pg_idx, pg_offset;
+	u32 idx_value = 0;
+	int new_page;
+
+	pg_idx = (idx * 4) / PAGE_SIZE;
+	pg_offset = (idx * 4) % PAGE_SIZE;
+
+	if (ibc->kpage_idx[0] == pg_idx)
+		return ibc->kpage[0][pg_offset/4];
+	if (ibc->kpage_idx[1] == pg_idx)
+		return ibc->kpage[1][pg_offset/4];
+
+	new_page = radeon_cs_update_pages(p, pg_idx);
+	if (new_page < 0) {
+		p->parser_error = new_page;
+		return 0;
+	}
+
+	idx_value = ibc->kpage[new_page][pg_offset/4];
+	return idx_value;
+}
+
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
+{
+#if DRM_DEBUG_CODE
+	if (rdev->cp.count_dw <= 0) {
+		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
+	}
+#endif
+	rdev->cp.ring[rdev->cp.wptr++] = v;
+	rdev->cp.wptr &= rdev->cp.ptr_mask;
+	rdev->cp.count_dw--;
+	rdev->cp.ring_free_dw--;
+}
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index c5c2742..1fe98b4 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -791,7 +791,7 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
 /**
  * Emit the sequence to pacify R300.
  */
-static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
+static void r300_pacify(drm_radeon_private_t *dev_priv)
 {
 	uint32_t cache_z, cache_3d, cache_2d;
 	RING_LOCALS;
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 720dd99..c85047f 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -3137,7 +3137,7 @@ int r600_irq_set(struct radeon_device *rdev)
 	return 0;
 }
 
-static inline void r600_irq_ack(struct radeon_device *rdev)
+static void r600_irq_ack(struct radeon_device *rdev)
 {
 	u32 tmp;
 
@@ -3238,7 +3238,7 @@ void r600_irq_disable(struct radeon_device *rdev)
 	r600_disable_interrupt_state(rdev);
 }
 
-static inline u32 r600_get_ih_wptr(struct radeon_device *rdev)
+static u32 r600_get_ih_wptr(struct radeon_device *rdev)
 {
 	u32 wptr, tmp;
 
diff --git a/drivers/gpu/drm/radeon/r600_blit_kms.c b/drivers/gpu/drm/radeon/r600_blit_kms.c
index 9aa74c3..bbbafe6 100644
--- a/drivers/gpu/drm/radeon/r600_blit_kms.c
+++ b/drivers/gpu/drm/radeon/r600_blit_kms.c
@@ -450,7 +450,7 @@ set_default_state(struct radeon_device *rdev)
 	radeon_ring_write(rdev, sq_stack_resource_mgmt_2);
 }
 
-static inline uint32_t i2f(uint32_t input)
+static uint32_t i2f(uint32_t input)
 {
 	u32 result, i, exponent, fraction;
 
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 8ac6cba..5ed7ef7 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -601,32 +601,7 @@ struct radeon_cs_parser {
 
 extern int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx);
 extern int radeon_cs_finish_pages(struct radeon_cs_parser *p);
-
-
-static inline u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
-{
-	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
-	u32 pg_idx, pg_offset;
-	u32 idx_value = 0;
-	int new_page;
-
-	pg_idx = (idx * 4) / PAGE_SIZE;
-	pg_offset = (idx * 4) % PAGE_SIZE;
-
-	if (ibc->kpage_idx[0] == pg_idx)
-		return ibc->kpage[0][pg_offset/4];
-	if (ibc->kpage_idx[1] == pg_idx)
-		return ibc->kpage[1][pg_offset/4];
-
-	new_page = radeon_cs_update_pages(p, pg_idx);
-	if (new_page < 0) {
-		p->parser_error = new_page;
-		return 0;
-	}
-
-	idx_value = ibc->kpage[new_page][pg_offset/4];
-	return idx_value;
-}
+extern u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx);
 
 struct radeon_cs_packet {
 	unsigned	idx;
@@ -1378,19 +1353,19 @@ void radeon_atombios_fini(struct radeon_device *rdev);
 /*
  * RING helpers.
  */
+
+#if DRM_DEBUG_CODE == 0
 static inline void radeon_ring_write(struct radeon_device *rdev, uint32_t v)
 {
-#if DRM_DEBUG_CODE
-	if (rdev->cp.count_dw <= 0) {
-		DRM_ERROR("radeon: writting more dword to ring than expected !\n");
-	}
-#endif
 	rdev->cp.ring[rdev->cp.wptr++] = v;
 	rdev->cp.wptr &= rdev->cp.ptr_mask;
 	rdev->cp.count_dw--;
 	rdev->cp.ring_free_dw--;
 }
-
+#else
+/* With debugging this is just too big to inline */
+void radeon_ring_write(struct radeon_device *rdev, uint32_t v);
+#endif
 
 /*
  * ASICs macro.
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index bf2b615..08d0b94 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -62,7 +62,7 @@ union atom_supported_devices {
 	struct _ATOM_SUPPORTED_DEVICES_INFO_2d1 info_2d1;
 };
 
-static inline struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
+static struct radeon_i2c_bus_rec radeon_lookup_i2c_gpio(struct radeon_device *rdev,
 							       uint8_t id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
@@ -228,7 +228,7 @@ void radeon_atombios_i2c_init(struct radeon_device *rdev)
 	}
 }
 
-static inline struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
+static struct radeon_gpio_rec radeon_lookup_gpio(struct radeon_device *rdev,
 							u8 id)
 {
 	struct atom_context *ctx = rdev->mode_info.atom_context;
diff --git a/drivers/gpu/drm/radeon/radeon_irq.c b/drivers/gpu/drm/radeon/radeon_irq.c
index 465746b..00da384 100644
--- a/drivers/gpu/drm/radeon/radeon_irq.c
+++ b/drivers/gpu/drm/radeon/radeon_irq.c
@@ -129,7 +129,7 @@ void radeon_disable_vblank(struct drm_device *dev, int crtc)
 	}
 }
 
-static inline u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
+static u32 radeon_acknowledge_irqs(drm_radeon_private_t *dev_priv, u32 *r500_disp_int)
 {
 	u32 irqs = RADEON_READ(RADEON_GEN_INT_STATUS);
 	u32 irq_mask = RADEON_SW_INT_TEST;
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_tv.c b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
index c7b6cb4..b37ec0f 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_tv.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_tv.c
@@ -864,7 +864,7 @@ void radeon_legacy_tv_adjust_crtc_reg(struct drm_encoder *encoder,
 	*v_sync_strt_wid = tmp;
 }
 
-static inline int get_post_div(int value)
+static int get_post_div(int value)
 {
 	int post_div;
 	switch (value) {
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 976c3b1..1c85152 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -515,3 +515,44 @@ int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 	return 0;
 }
+
+int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	if (unlikely(r != 0))
+		return r;
+	spin_lock(&bo->tbo.bdev->fence_lock);
+	if (mem_type)
+		*mem_type = bo->tbo.mem.mem_type;
+	if (bo->tbo.sync_obj)
+		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+	spin_unlock(&bo->tbo.bdev->fence_lock);
+	ttm_bo_unreserve(&bo->tbo);
+	return r;
+}
+
+
+/**
+ * radeon_bo_reserve - reserve bo
+ * @bo:		bo structure
+ * @no_wait:		don't sleep while trying to reserve (return -EBUSY)
+ *
+ * Returns:
+ * -EBUSY: buffer is busy and @no_wait is true
+ * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
+ * a signal. Release all buffer reservations and return to user-space.
+ */
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
+		return r;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
index ede6c13..b07f0f9 100644
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@@ -52,28 +52,7 @@ static inline unsigned radeon_mem_type_to_domain(u32 mem_type)
 	return 0;
 }
 
-/**
- * radeon_bo_reserve - reserve bo
- * @bo:		bo structure
- * @no_wait:		don't sleep while trying to reserve (return -EBUSY)
- *
- * Returns:
- * -EBUSY: buffer is busy and @no_wait is true
- * -ERESTARTSYS: A wait for the buffer to become unreserved was interrupted by
- * a signal. Release all buffer reservations and return to user-space.
- */
-static inline int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait)
-{
-	int r;
-
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
-	if (unlikely(r != 0)) {
-		if (r != -ERESTARTSYS)
-			dev_err(bo->rdev->dev, "%p reserve failed\n", bo);
-		return r;
-	}
-	return 0;
-}
+int radeon_bo_reserve(struct radeon_bo *bo, bool no_wait);
 
 static inline void radeon_bo_unreserve(struct radeon_bo *bo)
 {
@@ -118,23 +97,8 @@ static inline u64 radeon_bo_mmap_offset(struct radeon_bo *bo)
 	return bo->tbo.addr_space_offset;
 }
 
-static inline int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
-					bool no_wait)
-{
-	int r;
-
-	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, 0);
-	if (unlikely(r != 0))
-		return r;
-	spin_lock(&bo->tbo.bdev->fence_lock);
-	if (mem_type)
-		*mem_type = bo->tbo.mem.mem_type;
-	if (bo->tbo.sync_obj)
-		r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
-	spin_unlock(&bo->tbo.bdev->fence_lock);
-	ttm_bo_unreserve(&bo->tbo);
-	return r;
-}
+extern int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type,
+			  bool no_wait);
 
 extern int radeon_bo_create(struct radeon_device *rdev,
 				unsigned long size, int byte_align,
diff --git a/drivers/gpu/drm/radeon/radeon_state.c b/drivers/gpu/drm/radeon/radeon_state.c
index 92e7ea7..e8422ae 100644
--- a/drivers/gpu/drm/radeon/radeon_state.c
+++ b/drivers/gpu/drm/radeon/radeon_state.c
@@ -272,12 +272,12 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
 	return 0;
 }
 
-static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
-						     dev_priv,
-						     struct drm_file *file_priv,
-						     drm_radeon_kcmd_buffer_t *
-						     cmdbuf,
-						     unsigned int *cmdsz)
+static int radeon_check_and_fixup_packet3(drm_radeon_private_t *
+					  dev_priv,
+					  struct drm_file *file_priv,
+					  drm_radeon_kcmd_buffer_t *
+					  cmdbuf,
+					  unsigned int *cmdsz)
 {
 	u32 *cmd = drm_buffer_pointer_to_dword(cmdbuf->buffer, 0);
 	u32 offset, narrays;
@@ -446,8 +446,8 @@ static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
  * CP hardware state programming functions
  */
 
-static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
-					     struct drm_clip_rect * box)
+static void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
+				  struct drm_clip_rect * box)
 {
 	RING_LOCALS;
 
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 08/12] X86: Move alloc_intr_gate out of line
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (5 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 07/12] RADEON: Move more code out of line Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 09/12] Don't use inline node_page_state for sysfs output functions Andi Kleen
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, x86

From: Andi Kleen <ak@linux.intel.com>

This saves about 2.5k text on a non force inline kernel.

Cc: x86@kernel.org
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/include/asm/desc.h |   17 +----------------
 arch/x86/kernel/irqinit.c   |   20 ++++++++++++++++++++
 2 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index 41935fa..f7183e1 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -336,22 +336,7 @@ extern int first_system_vector;
 /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
 extern unsigned long used_vectors[];
 
-static inline void alloc_system_vector(int vector)
-{
-	if (!test_bit(vector, used_vectors)) {
-		set_bit(vector, used_vectors);
-		if (first_system_vector > vector)
-			first_system_vector = vector;
-	} else {
-		BUG();
-	}
-}
-
-static inline void alloc_intr_gate(unsigned int n, void *addr)
-{
-	alloc_system_vector(n);
-	set_intr_gate(n, addr);
-}
+void alloc_intr_gate(unsigned int n, void *addr);
 
 /*
  * This routine sets up an interrupt gate at directory privilege level 3.
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index b3300e6..b6e769b 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -162,6 +162,26 @@ void setup_vector_irq(int cpu)
 	__setup_vector_irq(cpu);
 }
 
+static void alloc_system_vector(int vector)
+{
+	if (!test_bit(vector, used_vectors)) {
+		set_bit(vector, used_vectors);
+		if (first_system_vector > vector)
+			first_system_vector = vector;
+	} else
+		BUG();
+}
+
+/*
+ * This could be made __init if xen didn't abuse it in its
+ * suspend path!
+ */
+void alloc_intr_gate(unsigned int n, void *addr)
+{
+	alloc_system_vector(n);
+	set_intr_gate(n, addr);
+}
+
 static void __init smp_intr_init(void)
 {
 #ifdef CONFIG_SMP
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 09/12] Don't use inline node_page_state for sysfs output functions
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (6 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 08/12] X86: Move alloc_intr_gate " Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 10/12] REISERFS: reiserfs drop unnecessary inlines Andi Kleen
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

These are not time critical, and using an out of line function
saves about 2.5k text on a non force inline kernel.

I left the main hotpath user -- readahead -- inline for now.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/base/node.c |   72 +++++++++++++++++++++++++++-----------------------
 1 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 793f796..0b8e7a2 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -58,6 +58,12 @@ static inline ssize_t node_read_cpulist(struct sys_device *dev,
 static SYSDEV_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
 static SYSDEV_ATTR(cpulist, S_IRUGO, node_read_cpulist, NULL);
 
+/* Don't inline */
+static unsigned long my_node_page_state(int node, enum zone_stat_item item)
+{
+	return node_page_state(node, item);
+}
+
 #define K(x) ((x) << (PAGE_SHIFT - 10))
 static ssize_t node_read_meminfo(struct sys_device * dev,
 			struct sysdev_attribute *attr, char * buf)
@@ -82,16 +88,16 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       nid, K(i.totalram),
 		       nid, K(i.freeram),
 		       nid, K(i.totalram - i.freeram),
-		       nid, K(node_page_state(nid, NR_ACTIVE_ANON) +
-				node_page_state(nid, NR_ACTIVE_FILE)),
-		       nid, K(node_page_state(nid, NR_INACTIVE_ANON) +
-				node_page_state(nid, NR_INACTIVE_FILE)),
-		       nid, K(node_page_state(nid, NR_ACTIVE_ANON)),
-		       nid, K(node_page_state(nid, NR_INACTIVE_ANON)),
-		       nid, K(node_page_state(nid, NR_ACTIVE_FILE)),
-		       nid, K(node_page_state(nid, NR_INACTIVE_FILE)),
-		       nid, K(node_page_state(nid, NR_UNEVICTABLE)),
-		       nid, K(node_page_state(nid, NR_MLOCK)));
+		       nid, K(my_node_page_state(nid, NR_ACTIVE_ANON) +
+				my_node_page_state(nid, NR_ACTIVE_FILE)),
+		       nid, K(my_node_page_state(nid, NR_INACTIVE_ANON) +
+				my_node_page_state(nid, NR_INACTIVE_FILE)),
+		       nid, K(my_node_page_state(nid, NR_ACTIVE_ANON)),
+		       nid, K(my_node_page_state(nid, NR_INACTIVE_ANON)),
+		       nid, K(my_node_page_state(nid, NR_ACTIVE_FILE)),
+		       nid, K(my_node_page_state(nid, NR_INACTIVE_FILE)),
+		       nid, K(my_node_page_state(nid, NR_UNEVICTABLE)),
+		       nid, K(my_node_page_state(nid, NR_MLOCK)));
 
 #ifdef CONFIG_HIGHMEM
 	n += sprintf(buf + n,
@@ -123,30 +129,30 @@ static ssize_t node_read_meminfo(struct sys_device * dev,
 		       "Node %d AnonHugePages:  %8lu kB\n"
 #endif
 			,
-		       nid, K(node_page_state(nid, NR_FILE_DIRTY)),
-		       nid, K(node_page_state(nid, NR_WRITEBACK)),
-		       nid, K(node_page_state(nid, NR_FILE_PAGES)),
-		       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
-		       nid, K(node_page_state(nid, NR_ANON_PAGES)
+		       nid, K(my_node_page_state(nid, NR_FILE_DIRTY)),
+		       nid, K(my_node_page_state(nid, NR_WRITEBACK)),
+		       nid, K(my_node_page_state(nid, NR_FILE_PAGES)),
+		       nid, K(my_node_page_state(nid, NR_FILE_MAPPED)),
+		       nid, K(my_node_page_state(nid, NR_ANON_PAGES)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-			+ node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
+			+ my_node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
 			HPAGE_PMD_NR
 #endif
 		       ),
-		       nid, K(node_page_state(nid, NR_SHMEM)),
-		       nid, node_page_state(nid, NR_KERNEL_STACK) *
+		       nid, K(my_node_page_state(nid, NR_SHMEM)),
+		       nid, my_node_page_state(nid, NR_KERNEL_STACK) *
 				THREAD_SIZE / 1024,
-		       nid, K(node_page_state(nid, NR_PAGETABLE)),
-		       nid, K(node_page_state(nid, NR_UNSTABLE_NFS)),
-		       nid, K(node_page_state(nid, NR_BOUNCE)),
-		       nid, K(node_page_state(nid, NR_WRITEBACK_TEMP)),
-		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE) +
-				node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
-		       nid, K(node_page_state(nid, NR_SLAB_RECLAIMABLE)),
-		       nid, K(node_page_state(nid, NR_SLAB_UNRECLAIMABLE))
+		       nid, K(my_node_page_state(nid, NR_PAGETABLE)),
+		       nid, K(my_node_page_state(nid, NR_UNSTABLE_NFS)),
+		       nid, K(my_node_page_state(nid, NR_BOUNCE)),
+		       nid, K(my_node_page_state(nid, NR_WRITEBACK_TEMP)),
+		       nid, K(my_node_page_state(nid, NR_SLAB_RECLAIMABLE) +
+				my_node_page_state(nid, NR_SLAB_UNRECLAIMABLE)),
+		       nid, K(my_node_page_state(nid, NR_SLAB_RECLAIMABLE)),
+		       nid, K(my_node_page_state(nid, NR_SLAB_UNRECLAIMABLE))
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 			, nid,
-			K(node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
+			K(my_node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
 			HPAGE_PMD_NR)
 #endif
 		       );
@@ -167,12 +173,12 @@ static ssize_t node_read_numastat(struct sys_device * dev,
 		       "interleave_hit %lu\n"
 		       "local_node %lu\n"
 		       "other_node %lu\n",
-		       node_page_state(dev->id, NUMA_HIT),
-		       node_page_state(dev->id, NUMA_MISS),
-		       node_page_state(dev->id, NUMA_FOREIGN),
-		       node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
-		       node_page_state(dev->id, NUMA_LOCAL),
-		       node_page_state(dev->id, NUMA_OTHER));
+		       my_node_page_state(dev->id, NUMA_HIT),
+		       my_node_page_state(dev->id, NUMA_MISS),
+		       my_node_page_state(dev->id, NUMA_FOREIGN),
+		       my_node_page_state(dev->id, NUMA_INTERLEAVE_HIT),
+		       my_node_page_state(dev->id, NUMA_LOCAL),
+		       my_node_page_state(dev->id, NUMA_OTHER));
 }
 static SYSDEV_ATTR(numastat, S_IRUGO, node_read_numastat, NULL);
 
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 10/12] REISERFS: reiserfs drop unnecessary inlines
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (7 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 09/12] Don't use inline node_page_state for sysfs output functions Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, viro, fweisbec

From: Andi Kleen <ak@linux.intel.com>

Drop some inlines to shrink code size with force inline

Still some unfixed growth in:

balance_leaf                                7190    8766   +1576
search_by_key                               1963    3317   +1354

Cc: viro@zeniv.linux.org.uk
Cc: fweisbec@gmail.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 fs/reiserfs/do_balan.c |   14 +++++++-------
 1 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/reiserfs/do_balan.c b/fs/reiserfs/do_balan.c
index 60c0804..8b3c44c 100644
--- a/fs/reiserfs/do_balan.c
+++ b/fs/reiserfs/do_balan.c
@@ -21,7 +21,7 @@
 #include <linux/buffer_head.h>
 #include <linux/kernel.h>
 
-static inline void buffer_info_init_left(struct tree_balance *tb,
+static void buffer_info_init_left(struct tree_balance *tb,
                                          struct buffer_info *bi)
 {
 	bi->tb          = tb;
@@ -30,7 +30,7 @@ static inline void buffer_info_init_left(struct tree_balance *tb,
 	bi->bi_position = get_left_neighbor_position(tb, 0);
 }
 
-static inline void buffer_info_init_right(struct tree_balance *tb,
+static void buffer_info_init_right(struct tree_balance *tb,
                                           struct buffer_info *bi)
 {
 	bi->tb          = tb;
@@ -39,7 +39,7 @@ static inline void buffer_info_init_right(struct tree_balance *tb,
 	bi->bi_position = get_right_neighbor_position(tb, 0);
 }
 
-static inline void buffer_info_init_tbS0(struct tree_balance *tb,
+static void buffer_info_init_tbS0(struct tree_balance *tb,
                                          struct buffer_info *bi)
 {
 	bi->tb          = tb;
@@ -48,7 +48,7 @@ static inline void buffer_info_init_tbS0(struct tree_balance *tb,
 	bi->bi_position = PATH_H_POSITION(tb->tb_path, 1);
 }
 
-static inline void buffer_info_init_bh(struct tree_balance *tb,
+static void buffer_info_init_bh(struct tree_balance *tb,
                                        struct buffer_info *bi,
                                        struct buffer_head *bh)
 {
@@ -58,7 +58,7 @@ static inline void buffer_info_init_bh(struct tree_balance *tb,
 	bi->bi_position = 0;
 }
 
-inline void do_balance_mark_leaf_dirty(struct tree_balance *tb,
+void do_balance_mark_leaf_dirty(struct tree_balance *tb,
 				       struct buffer_head *bh, int flag)
 {
 	journal_mark_dirty(tb->transaction_handle,
@@ -1967,7 +1967,7 @@ static void check_internal_levels(struct tree_balance *tb)
 
 */
 
-static inline void do_balance_starts(struct tree_balance *tb)
+static void do_balance_starts(struct tree_balance *tb)
 {
 	/* use print_cur_tb() to see initial state of struct
 	   tree_balance */
@@ -1983,7 +1983,7 @@ static inline void do_balance_starts(struct tree_balance *tb)
 #endif
 }
 
-static inline void do_balance_completed(struct tree_balance *tb)
+static void do_balance_completed(struct tree_balance *tb)
 {
 
 #ifdef CONFIG_REISERFS_CHECK
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 11/12] i915:  Move i915_read/write out of line
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (8 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 10/12] REISERFS: reiserfs drop unnecessary inlines Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-15 13:49   ` Daniel Vetter
  2011-10-18  5:47   ` Ben Widawsky
  2011-10-13 23:08 ` [PATCH 12/12] Force always inline for gcc 4.5 when optimizing for size Andi Kleen
  2011-10-18  8:59 ` [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c David Airlie
  11 siblings, 2 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen, keithp

From: Andi Kleen <ak@linux.intel.com>

With the tracing code in there they are far too big to inline.

.text savings compared to a non force inline kernel:

i915_restore_display                        4393   12036   +7643
i915_save_display                           4295   11459   +7164
i915_handle_error                           2979    6666   +3687
i915_driver_irq_handler                     2923    5086   +2163
i915_ringbuffer_info                         458    1661   +1203
i915_save_vga                                  -    1200   +1200
i915_driver_irq_uninstall                    453    1624   +1171
i915_driver_irq_postinstall                  913    2078   +1165
ironlake_enable_drps                         719    1872   +1153
i915_restore_vga                               -    1142   +1142
intel_display_capture_error_state            784    2030   +1246
intel_init_emon                              719    2016   +1297

and more ...

[AK: these are older numbers, with the new SNB forcewake checks
it will be even worse]

Cc: keithp@keithp.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c |   40 +++++++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_drv.h |   22 ++------------------
 2 files changed, 43 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f07e425..c2de142 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -895,3 +895,43 @@ module_exit(i915_exit);
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
 MODULE_LICENSE("GPL and additional rights");
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	(((dev_priv)->info->gen >= 6) && \
+	((reg) < 0x40000) && \
+	((reg) != FORCEWAKE))
+
+#define __i915_read(x, y) \
+u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		gen6_gt_force_wake_put(dev_priv); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
+	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
+	return val; \
+}
+
+__i915_read(8, b)
+__i915_read(16, w)
+__i915_read(32, l)
+__i915_read(64, q)
+#undef __i915_read
+
+#define __i915_write(x, y) \
+void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
+	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_wait_for_fifo(dev_priv); \
+	} \
+	write##y(val, dev_priv->regs + reg); \
+}
+__i915_write(8, b)
+__i915_write(16, w)
+__i915_write(32, l)
+__i915_write(64, q)
+#undef __i915_write
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 7916bd9..7d171ea 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1354,18 +1354,7 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
 	((reg) != FORCEWAKE))
 
 #define __i915_read(x, y) \
-static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = 0; \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		gen6_gt_force_wake_get(dev_priv); \
-		val = read##y(dev_priv->regs + reg); \
-		gen6_gt_force_wake_put(dev_priv); \
-	} else { \
-		val = read##y(dev_priv->regs + reg); \
-	} \
-	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
-	return val; \
-}
+	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
 
 __i915_read(8, b)
 __i915_read(16, w)
@@ -1374,13 +1363,8 @@ __i915_read(64, q)
 #undef __i915_read
 
 #define __i915_write(x, y) \
-static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
-	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
-	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
-		__gen6_gt_wait_for_fifo(dev_priv); \
-	} \
-	write##y(val, dev_priv->regs + reg); \
-}
+	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
+
 __i915_write(8, b)
 __i915_write(16, w)
 __i915_write(32, l)
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH 12/12] Force always inline for gcc 4.5 when optimizing for size
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (9 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
@ 2011-10-13 23:08 ` Andi Kleen
  2011-10-18  8:59 ` [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c David Airlie
  11 siblings, 0 replies; 15+ messages in thread
From: Andi Kleen @ 2011-10-13 23:08 UTC (permalink / raw)
  To: linux-kernel; +Cc: dri-devel, akpm, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

I found that gcc 4.5 didn't inline a lot of inlines with
CONFIG_OPTIMIZE_INLINING and CONFIG_CC_OPTIMIZE_FOR_SIZE. It was quite
common to have very small inlines to be out of line, or worse inline
statics in include files to be out of line with a copy for every file
using it too.

This is handily visible in a function graph trace for might_fault:

 10)               |    might_fault() {
 10)               |      _cond_resched() {
 10)               |        should_resched() {
 10)               |          need_resched() {
 10)   0.063 us    |            test_ti_thread_flag();
 10)   0.643 us    |          }
 10)   1.238 us    |        }
 10)   1.845 us    |      }
 10)   2.438 us    |    }

Note all of these functions are very small and should be definitely
inlined in each other. In many cases even copy_from_user
ends up out of line now which is really bad!

If I switch to -O2 it is also not quite as bad, but since a lot
of people use -Os I was trying to fix it up.

So this patch forces inlining with gcc 4.5 with -Os.

Unfortunately it costs some code size with just this patch.

   text	   data	    bss	    dec	    hex	filename
11507035	1940276	1191936	14639247	 df608f	vmlinux-O2
10189858	1908124	1187840	13285822	 cab9be	vmlinux-Os-force
9808525		1940204	1187840	12936569 	 c56579	vmlinux-Os-orig

It turned out most of this was because of unnecessary inlines.
With a lot of inlines removed I now get:

11175824        1977200 1191936 14344960         dae300 vmlinux-inlines-removed-no-optimize
11642530        2018416 1191936 14852882         e2a312 vmlinux-master-no-optimize
11530439        2001264 1191936 14723639         e0aa37 vmlinux-master-optimize

which is significantly smaller.

I haven't tested earlier gcc 4.x versions, but they may need
the same treatment.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 include/linux/compiler-gcc.h |    5 ++++-
 1 files changed, 4 insertions(+), 1 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 59e4028..e477a7c 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -44,9 +44,12 @@
 /*
  * Force always-inline if the user requests it so via the .config,
  * or if gcc is too old:
+ * When optimizing for size on gcc 4.5 always force inlining too.
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
-    !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
+    !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4) || \
+    (defined(CONFIG_CC_OPTIMIZE_FOR_SIZE) && \
+		(__GNUC__ == 4 && __GNUC_MINOR__ == 5))
 # define inline		inline		__attribute__((always_inline))
 # define __inline__	__inline__	__attribute__((always_inline))
 # define __inline	__inline	__attribute__((always_inline))
-- 
1.7.4.4


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH 11/12] i915:  Move i915_read/write out of line
  2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
@ 2011-10-15 13:49   ` Daniel Vetter
  2011-10-18  5:47   ` Ben Widawsky
  1 sibling, 0 replies; 15+ messages in thread
From: Daniel Vetter @ 2011-10-15 13:49 UTC (permalink / raw)
  To: Andi Kleen; +Cc: linux-kernel, akpm, Andi Kleen, dri-devel

On Thu, Oct 13, 2011 at 04:08:51PM -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> With the tracing code in there they are far too big to inline.
> 
> .text savings compared to a non force inline kernel:
> 
> i915_restore_display                        4393   12036   +7643
> i915_save_display                           4295   11459   +7164
> i915_handle_error                           2979    6666   +3687
> i915_driver_irq_handler                     2923    5086   +2163
> i915_ringbuffer_info                         458    1661   +1203
> i915_save_vga                                  -    1200   +1200
> i915_driver_irq_uninstall                    453    1624   +1171
> i915_driver_irq_postinstall                  913    2078   +1165
> ironlake_enable_drps                         719    1872   +1153
> i915_restore_vga                               -    1142   +1142
> intel_display_capture_error_state            784    2030   +1246
> intel_init_emon                              719    2016   +1297
> 
> and more ...
> 
> [AK: these are older numbers, with the new SNB forcewake checks
> it will be even worse]
> 
> Cc: keithp@keithp.com
> Signed-off-by: Andi Kleen <ak@linux.intel.com>

Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
-- 
Daniel Vetter
Mail: daniel@ffwll.ch
Mobile: +41 (0)79 365 57 48

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 11/12] i915:  Move i915_read/write out of line
  2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
  2011-10-15 13:49   ` Daniel Vetter
@ 2011-10-18  5:47   ` Ben Widawsky
  1 sibling, 0 replies; 15+ messages in thread
From: Ben Widawsky @ 2011-10-18  5:47 UTC (permalink / raw)
  To: keithp; +Cc: Andi Kleen, linux-kernel, dri-devel, akpm, Andi Kleen, intel-gfx

On Thu, 13 Oct 2011 16:08:51 -0700
Andi Kleen <andi@firstfloor.org> wrote:

> From: Andi Kleen <ak@linux.intel.com>
> 
> With the tracing code in there they are far too big to inline.
> 
> .text savings compared to a non force inline kernel:
> 
> i915_restore_display                        4393   12036   +7643
> i915_save_display                           4295   11459   +7164
> i915_handle_error                           2979    6666   +3687
> i915_driver_irq_handler                     2923    5086   +2163
> i915_ringbuffer_info                         458    1661   +1203
> i915_save_vga                                  -    1200   +1200
> i915_driver_irq_uninstall                    453    1624   +1171
> i915_driver_irq_postinstall                  913    2078   +1165
> ironlake_enable_drps                         719    1872   +1153
> i915_restore_vga                               -    1142   +1142
> intel_display_capture_error_state            784    2030   +1246
> intel_init_emon                              719    2016   +1297
> 
> and more ...
> 
> [AK: these are older numbers, with the new SNB forcewake checks
> it will be even worse]
> 
> Cc: keithp@keithp.com
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  drivers/gpu/drm/i915/i915_drv.c |   40 +++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/i915/i915_drv.h |   22 ++------------------
>  2 files changed, 43 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index f07e425..c2de142 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -895,3 +895,43 @@ module_exit(i915_exit);
>  MODULE_AUTHOR(DRIVER_AUTHOR);
>  MODULE_DESCRIPTION(DRIVER_DESC);
>  MODULE_LICENSE("GPL and additional rights");
> +
> +/* We give fast paths for the really cool registers */
> +#define NEEDS_FORCE_WAKE(dev_priv, reg) \
> +	(((dev_priv)->info->gen >= 6) && \
> +	((reg) < 0x40000) && \
> +	((reg) != FORCEWAKE))
> +
> +#define __i915_read(x, y) \
> +u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
> +	u##x val = 0; \
> +	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> +		gen6_gt_force_wake_get(dev_priv); \
> +		val = read##y(dev_priv->regs + reg); \
> +		gen6_gt_force_wake_put(dev_priv); \
> +	} else { \
> +		val = read##y(dev_priv->regs + reg); \
> +	} \
> +	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
> +	return val; \
> +}
> +
> +__i915_read(8, b)
> +__i915_read(16, w)
> +__i915_read(32, l)
> +__i915_read(64, q)
> +#undef __i915_read
> +
> +#define __i915_write(x, y) \
> +void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
> +	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
> +	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> +		__gen6_gt_wait_for_fifo(dev_priv); \
> +	} \
> +	write##y(val, dev_priv->regs + reg); \
> +}
> +__i915_write(8, b)
> +__i915_write(16, w)
> +__i915_write(32, l)
> +__i915_write(64, q)
> +#undef __i915_write
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 7916bd9..7d171ea 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1354,18 +1354,7 @@ void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
>  	((reg) != FORCEWAKE))
>  
>  #define __i915_read(x, y) \
> -static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
> -	u##x val = 0; \
> -	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> -		gen6_gt_force_wake_get(dev_priv); \
> -		val = read##y(dev_priv->regs + reg); \
> -		gen6_gt_force_wake_put(dev_priv); \
> -	} else { \
> -		val = read##y(dev_priv->regs + reg); \
> -	} \
> -	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
> -	return val; \
> -}
> +	u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg);
>  
>  __i915_read(8, b)
>  __i915_read(16, w)
> @@ -1374,13 +1363,8 @@ __i915_read(64, q)
>  #undef __i915_read
>  
>  #define __i915_write(x, y) \
> -static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
> -	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
> -	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
> -		__gen6_gt_wait_for_fifo(dev_priv); \
> -	} \
> -	write##y(val, dev_priv->regs + reg); \
> -}
> +	void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val);
> +
>  __i915_write(8, b)
>  __i915_write(16, w)
>  __i915_write(32, l)

Acked-by: Ben Widawsky <ben@bwidawsk.net>

The forcewake increased size should have been fixed a bit with the
forcewake struct encapsulation patch I posted to intel-gfx mailing list.
Keith, if you take this, could you also look into that patch?
<1315951648-5380-1-git-send-email-ben@bwidawsk.net>

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c
  2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
                   ` (10 preceding siblings ...)
  2011-10-13 23:08 ` [PATCH 12/12] Force always inline for gcc 4.5 when optimizing for size Andi Kleen
@ 2011-10-18  8:59 ` David Airlie
  11 siblings, 0 replies; 15+ messages in thread
From: David Airlie @ 2011-10-18  8:59 UTC (permalink / raw)
  To: Andi Kleen; +Cc: akpm, Andi Kleen, dri-devel, linux-kernel


> 
> From: Andi Kleen <ak@linux.intel.com>

Hi Andi,

I've merged all the RADEON: patches to drm-next locally, with one minor change (moving some of the out-of-lines to a more appropriate place).

Thanks,
Dave.

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2011-10-18  9:00 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-10-13 23:08 [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c Andi Kleen
2011-10-13 23:08 ` [PATCH 02/12] RADEON: Move r100_*_*reg out of line Andi Kleen
2011-10-13 23:08 ` [PATCH 03/12] RADEON: drop inlines in r600_blit.c Andi Kleen
2011-10-13 23:08 ` [PATCH 04/12] RADEON: Remove now unused functions in radeon driver Andi Kleen
2011-10-13 23:08 ` [PATCH 05/12] FB_ATY: Move register accesses out of line Andi Kleen
2011-10-13 23:08 ` [PATCH 06/12] RADEON: Remove more bogus inlines in the radeon driver Andi Kleen
2011-10-13 23:08 ` [PATCH 07/12] RADEON: Move more code out of line Andi Kleen
2011-10-13 23:08 ` [PATCH 08/12] X86: Move alloc_intr_gate " Andi Kleen
2011-10-13 23:08 ` [PATCH 09/12] Don't use inline node_page_state for sysfs output functions Andi Kleen
2011-10-13 23:08 ` [PATCH 10/12] REISERFS: reiserfs drop unnecessary inlines Andi Kleen
2011-10-13 23:08 ` [PATCH 11/12] i915: Move i915_read/write out of line Andi Kleen
2011-10-15 13:49   ` Daniel Vetter
2011-10-18  5:47   ` Ben Widawsky
2011-10-13 23:08 ` [PATCH 12/12] Force always inline for gcc 4.5 when optimizing for size Andi Kleen
2011-10-18  8:59 ` [PATCH 01/12] RADEON: Drop inlines from evergreen_cs.c / r600_cs.c David Airlie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox