public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
@ 2026-04-08 21:11 Andy Shevchenko
  2026-04-09  7:26 ` David Laight
  0 siblings, 1 reply; 4+ messages in thread
From: Andy Shevchenko @ 2026-04-08 21:11 UTC (permalink / raw)
  To: Andy Shevchenko, linux-mtd, linux-kernel
  Cc: Miquel Raynal, Richard Weinberger, Vignesh Raghavendra,
	Lukas Wunner, Andrew Morton

Compiler is not happy about used stack frame:

drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]

Fix this by factoring out do_write_buffer_locked().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
v3: addressed set but unused variables when MTD_XIP=y (LKP)
v2: kept DIS/ENABLE_VPP paired

 drivers/mtd/chips/cfi_cmdset_0001.c | 88 +++++++++++++++++------------
 1 file changed, 51 insertions(+), 37 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 5a4d2e16a9d1..7733e076ad40 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1154,7 +1154,8 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
 
 static int __xipram xip_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
-		unsigned long adr, unsigned int chip_op_time_max)
+		unsigned long adr, unsigned long inval_adr, int inval_len,
+		unsigned int chip_op_time, unsigned int chip_op_time_max)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -1276,8 +1277,7 @@ static int __xipram xip_wait_for_operation(
 #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
 	INVALIDATE_CACHED_RANGE(map, from, size)
 
-#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
-	xip_wait_for_operation(map, chip, cmd_adr, usec_max)
+#define INVAL_CACHE_AND_WAIT xip_wait_for_operation
 
 #else
 
@@ -1720,42 +1720,24 @@ static int cfi_intelext_write_words (struct mtd_info *mtd, loff_t to , size_t le
 }
 
 
-static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
-				    unsigned long adr, const struct kvec **pvec,
-				    unsigned long *pvec_seek, int len)
+static int __xipram do_write_buffer_locked(struct map_info *map, struct flchip *chip,
+					   unsigned long cmd_adr, unsigned long adr,
+					   const struct kvec **pvec,
+					   unsigned long *pvec_seek, int len)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	map_word status, write_cmd, datum;
-	unsigned long cmd_adr;
-	int ret, wbufsize, word_gap, words;
+	int ret, word_gap, words;
 	const struct kvec *vec;
 	unsigned long vec_seek;
 	unsigned long initial_adr;
 	int initial_len = len;
 
-	wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
-	adr += chip->start;
 	initial_adr = adr;
-	cmd_adr = adr & ~(wbufsize-1);
-
-	/* Sharp LH28F640BF chips need the first address for the
-	 * Page Buffer Program command. See Table 5 of
-	 * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */
-	if (is_LH28F640BF(cfi))
-		cmd_adr = adr;
 
 	/* Let's determine this according to the interleave only once */
 	write_cmd = (cfi->cfiq->P_ID != P_ID_INTEL_PERFORMANCE) ? CMD(0xe8) : CMD(0xe9);
 
-	mutex_lock(&chip->mutex);
-	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
-	if (ret) {
-		mutex_unlock(&chip->mutex);
-		return ret;
-	}
-
-	XIP_INVAL_CACHED_RANGE(map, initial_adr, initial_len);
-	ENABLE_VPP(map);
 	xip_disable(map, chip, cmd_adr);
 
 	/* §4.8 of the 28FxxxJ3A datasheet says "Any time SR.4 and/or SR.5 is set
@@ -1789,7 +1771,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 		xip_enable(map, chip, cmd_adr);
 		printk(KERN_ERR "%s: Chip not ready for buffer write. Xstatus = %lx, status = %lx\n",
 				map->name, Xstatus.x[0], status.x[0]);
-		goto out;
+		return ret;
 	}
 
 	/* Figure out the number of words to write */
@@ -1853,7 +1835,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 		chip->state = FL_STATUS;
 		xip_enable(map, chip, cmd_adr);
 		printk(KERN_ERR "%s: buffer write error (status timeout)\n", map->name);
-		goto out;
+		return ret;
 	}
 
 	/* check for errors */
@@ -1866,21 +1848,53 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 		map_write(map, CMD(0x70), cmd_adr);
 		xip_enable(map, chip, cmd_adr);
 
-		if (chipstatus & 0x02) {
-			ret = -EROFS;
-		} else if (chipstatus & 0x08) {
+		if (chipstatus & 0x02)
+			return -EROFS;
+
+		if (chipstatus & 0x08) {
 			printk(KERN_ERR "%s: buffer write error (bad VPP)\n", map->name);
-			ret = -EIO;
-		} else {
-			printk(KERN_ERR "%s: buffer write error (status 0x%lx)\n", map->name, chipstatus);
-			ret = -EINVAL;
+			return  -EIO;
 		}
 
-		goto out;
+		printk(KERN_ERR "%s: buffer write error (status 0x%lx)\n", map->name, chipstatus);
+		return -EINVAL;
 	}
 
 	xip_enable(map, chip, cmd_adr);
- out:	DISABLE_VPP(map);
+	return 0;
+}
+
+static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
+				    unsigned long adr, const struct kvec **pvec,
+				    unsigned long *pvec_seek, int len)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	unsigned long cmd_adr;
+	int ret, wbufsize;
+
+	wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
+	adr += chip->start;
+	cmd_adr = adr & ~(wbufsize - 1);
+
+	/* Sharp LH28F640BF chips need the first address for the
+	 * Page Buffer Program command. See Table 5 of
+	 * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */
+	if (is_LH28F640BF(cfi))
+		cmd_adr = adr;
+
+	mutex_lock(&chip->mutex);
+	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
+	if (ret) {
+		mutex_unlock(&chip->mutex);
+		return ret;
+	}
+
+	XIP_INVAL_CACHED_RANGE(map, adr, len);
+	ENABLE_VPP(map);
+
+	ret = do_write_buffer_locked(map, chip, cmd_adr, adr, pvec, pvec_seek, len);
+
+	DISABLE_VPP(map);
 	put_chip(map, chip, cmd_adr);
 	mutex_unlock(&chip->mutex);
 	return ret;
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
  2026-04-08 21:11 [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame Andy Shevchenko
@ 2026-04-09  7:26 ` David Laight
  2026-04-09  7:58   ` Lukas Wunner
  0 siblings, 1 reply; 4+ messages in thread
From: David Laight @ 2026-04-09  7:26 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: linux-mtd, linux-kernel, Miquel Raynal, Richard Weinberger,
	Vignesh Raghavendra, Lukas Wunner, Andrew Morton

On Wed,  8 Apr 2026 23:11:48 +0200
Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote:

> Compiler is not happy about used stack frame:
> 
> drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
> drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]
> 
> Fix this by factoring out do_write_buffer_locked().

Does this just split the large stack frame between two nested functions?
I'd also expect the compiler to inline do_write_buffer_locked() so it
makes little difference.
OTOH I can't immediately see where the large stack frame comes from.

	David

> 
> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
> v3: addressed set but unused variables when MTD_XIP=y (LKP)
> v2: kept DIS/ENABLE_VPP paired
> 
>  drivers/mtd/chips/cfi_cmdset_0001.c | 88 +++++++++++++++++------------
>  1 file changed, 51 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
> index 5a4d2e16a9d1..7733e076ad40 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0001.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0001.c
> @@ -1154,7 +1154,8 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
>  
>  static int __xipram xip_wait_for_operation(
>  		struct map_info *map, struct flchip *chip,
> -		unsigned long adr, unsigned int chip_op_time_max)
> +		unsigned long adr, unsigned long inval_adr, int inval_len,
> +		unsigned int chip_op_time, unsigned int chip_op_time_max)
>  {
>  	struct cfi_private *cfi = map->fldrv_priv;
>  	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
> @@ -1276,8 +1277,7 @@ static int __xipram xip_wait_for_operation(
>  #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
>  	INVALIDATE_CACHED_RANGE(map, from, size)
>  
> -#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
> -	xip_wait_for_operation(map, chip, cmd_adr, usec_max)
> +#define INVAL_CACHE_AND_WAIT xip_wait_for_operation

Isn't that separate and unrelated?
The compiler might optimise away the parameters you are adding back.

	David

>  
>  #else
>  
> @@ -1720,42 +1720,24 @@ static int cfi_intelext_write_words (struct mtd_info *mtd, loff_t to , size_t le
>  }
>  
>  
> -static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
> -				    unsigned long adr, const struct kvec **pvec,
> -				    unsigned long *pvec_seek, int len)
> +static int __xipram do_write_buffer_locked(struct map_info *map, struct flchip *chip,
> +					   unsigned long cmd_adr, unsigned long adr,
> +					   const struct kvec **pvec,
> +					   unsigned long *pvec_seek, int len)
>  {
>  	struct cfi_private *cfi = map->fldrv_priv;
>  	map_word status, write_cmd, datum;
> -	unsigned long cmd_adr;
> -	int ret, wbufsize, word_gap, words;
> +	int ret, word_gap, words;
>  	const struct kvec *vec;
>  	unsigned long vec_seek;
>  	unsigned long initial_adr;
>  	int initial_len = len;
>  
> -	wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
> -	adr += chip->start;
>  	initial_adr = adr;
> -	cmd_adr = adr & ~(wbufsize-1);
> -
> -	/* Sharp LH28F640BF chips need the first address for the
> -	 * Page Buffer Program command. See Table 5 of
> -	 * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */
> -	if (is_LH28F640BF(cfi))
> -		cmd_adr = adr;
>  
>  	/* Let's determine this according to the interleave only once */
>  	write_cmd = (cfi->cfiq->P_ID != P_ID_INTEL_PERFORMANCE) ? CMD(0xe8) : CMD(0xe9);
>  
> -	mutex_lock(&chip->mutex);
> -	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
> -	if (ret) {
> -		mutex_unlock(&chip->mutex);
> -		return ret;
> -	}
> -
> -	XIP_INVAL_CACHED_RANGE(map, initial_adr, initial_len);
> -	ENABLE_VPP(map);
>  	xip_disable(map, chip, cmd_adr);
>  
>  	/* §4.8 of the 28FxxxJ3A datasheet says "Any time SR.4 and/or SR.5 is set
> @@ -1789,7 +1771,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
>  		xip_enable(map, chip, cmd_adr);
>  		printk(KERN_ERR "%s: Chip not ready for buffer write. Xstatus = %lx, status = %lx\n",
>  				map->name, Xstatus.x[0], status.x[0]);
> -		goto out;
> +		return ret;
>  	}
>  
>  	/* Figure out the number of words to write */
> @@ -1853,7 +1835,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
>  		chip->state = FL_STATUS;
>  		xip_enable(map, chip, cmd_adr);
>  		printk(KERN_ERR "%s: buffer write error (status timeout)\n", map->name);
> -		goto out;
> +		return ret;
>  	}
>  
>  	/* check for errors */
> @@ -1866,21 +1848,53 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
>  		map_write(map, CMD(0x70), cmd_adr);
>  		xip_enable(map, chip, cmd_adr);
>  
> -		if (chipstatus & 0x02) {
> -			ret = -EROFS;
> -		} else if (chipstatus & 0x08) {
> +		if (chipstatus & 0x02)
> +			return -EROFS;
> +
> +		if (chipstatus & 0x08) {
>  			printk(KERN_ERR "%s: buffer write error (bad VPP)\n", map->name);
> -			ret = -EIO;
> -		} else {
> -			printk(KERN_ERR "%s: buffer write error (status 0x%lx)\n", map->name, chipstatus);
> -			ret = -EINVAL;
> +			return  -EIO;
>  		}
>  
> -		goto out;
> +		printk(KERN_ERR "%s: buffer write error (status 0x%lx)\n", map->name, chipstatus);
> +		return -EINVAL;
>  	}
>  
>  	xip_enable(map, chip, cmd_adr);
> - out:	DISABLE_VPP(map);
> +	return 0;
> +}
> +
> +static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
> +				    unsigned long adr, const struct kvec **pvec,
> +				    unsigned long *pvec_seek, int len)
> +{
> +	struct cfi_private *cfi = map->fldrv_priv;
> +	unsigned long cmd_adr;
> +	int ret, wbufsize;
> +
> +	wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
> +	adr += chip->start;
> +	cmd_adr = adr & ~(wbufsize - 1);
> +
> +	/* Sharp LH28F640BF chips need the first address for the
> +	 * Page Buffer Program command. See Table 5 of
> +	 * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */
> +	if (is_LH28F640BF(cfi))
> +		cmd_adr = adr;
> +
> +	mutex_lock(&chip->mutex);
> +	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
> +	if (ret) {
> +		mutex_unlock(&chip->mutex);
> +		return ret;
> +	}
> +
> +	XIP_INVAL_CACHED_RANGE(map, adr, len);
> +	ENABLE_VPP(map);
> +
> +	ret = do_write_buffer_locked(map, chip, cmd_adr, adr, pvec, pvec_seek, len);
> +
> +	DISABLE_VPP(map);
>  	put_chip(map, chip, cmd_adr);
>  	mutex_unlock(&chip->mutex);
>  	return ret;


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
  2026-04-09  7:26 ` David Laight
@ 2026-04-09  7:58   ` Lukas Wunner
  2026-04-09 11:28     ` David Laight
  0 siblings, 1 reply; 4+ messages in thread
From: Lukas Wunner @ 2026-04-09  7:58 UTC (permalink / raw)
  To: David Laight
  Cc: Andy Shevchenko, linux-mtd, linux-kernel, Miquel Raynal,
	Richard Weinberger, Vignesh Raghavendra, Andrew Morton

On Thu, Apr 09, 2026 at 08:26:11AM +0100, David Laight wrote:
> On Wed,  8 Apr 2026 23:11:48 +0200 Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote:
> > Compiler is not happy about used stack frame:
> > 
> > drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
> > drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]
> > 
> > Fix this by factoring out do_write_buffer_locked().
> 
> Does this just split the large stack frame between two nested functions?
> I'd also expect the compiler to inline do_write_buffer_locked() so it
> makes little difference.
> OTOH I can't immediately see where the large stack frame comes from.

The error occurs for an allmodconfig build on arm, which implies
CONFIG_KASAN_STACK=y and thus increases stack usage vis-à-vis a
"regular" build.

Stack usage is high here because of the three "map_word" types,
which can each be up to 256 unsigned longs (32 * 8), see the
definitions of MAX_MAP_LONGS, MAX_MAP_BANKWIDTH, map_word in
include/linux/mtd/map.h.

Possible solutions:

- Disable KASAN entirely for this file:
  https://lore.kernel.org/all/adX3SHYgazijahbG@wunner.de/

  Not always a good option, particularly for stuff like lib/maple_tree.c
  where the same issue exists in mas_wr_spanning_store() and KASAN would
  certainly be good to have for that one.

- Use heap instead of stack.

- Split function in smaller chunks and mark them "noinline".

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
  2026-04-09  7:58   ` Lukas Wunner
@ 2026-04-09 11:28     ` David Laight
  0 siblings, 0 replies; 4+ messages in thread
From: David Laight @ 2026-04-09 11:28 UTC (permalink / raw)
  To: Lukas Wunner
  Cc: Andy Shevchenko, linux-mtd, linux-kernel, Miquel Raynal,
	Richard Weinberger, Vignesh Raghavendra, Andrew Morton

On Thu, 9 Apr 2026 09:58:28 +0200
Lukas Wunner <lukas@wunner.de> wrote:

> On Thu, Apr 09, 2026 at 08:26:11AM +0100, David Laight wrote:
> > On Wed,  8 Apr 2026 23:11:48 +0200 Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote:  
> > > Compiler is not happy about used stack frame:
> > > 
> > > drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
> > > drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]
> > > 
> > > Fix this by factoring out do_write_buffer_locked().  
> > 
> > Does this just split the large stack frame between two nested functions?
> > I'd also expect the compiler to inline do_write_buffer_locked() so it
> > makes little difference.
> > OTOH I can't immediately see where the large stack frame comes from.  
> 
> The error occurs for an allmodconfig build on arm, which implies
> CONFIG_KASAN_STACK=y and thus increases stack usage vis-à-vis a
> "regular" build.
> 
> Stack usage is high here because of the three "map_word" types,
> which can each be up to 256 unsigned longs (32 * 8), see the
> definitions of MAX_MAP_LONGS, MAX_MAP_BANKWIDTH, map_word in
> include/linux/mtd/map.h.

Ugg - that code is horrid.
Returning structures by value isn't really a good idea.

> 
> Possible solutions:
> 
> - Disable KASAN entirely for this file:
>   https://lore.kernel.org/all/adX3SHYgazijahbG@wunner.de/
> 
>   Not always a good option, particularly for stuff like lib/maple_tree.c
>   where the same issue exists in mas_wr_spanning_store() and KASAN would
>   certainly be good to have for that one.

I've peeked at that at least once.
Some big functions get inlined; IIRC one small function is basically:
	if (expr) a(args) else b(args);
and marking both a and b noinline would help a lot.

> 
> - Use heap instead of stack.
> 
> - Split function in smaller chunks and mark them "noinline".

That might make the code easier to read as well.

But looking at it, I think that a small amount of refactoring
(mostly moving the initial 'status' check before the command
is written) would mean that only one 'map_word' would be valid
at any one time.

I didn't look at what was really happening though.
I suspect it is similar to some code I've written for accessing serial
EEPROM where the control data is written one bit at a time, but the
data itself is read/written in 4 bit chunks (although the low-level hardware
did multiple 'nibble' accesses for wider transfers).
In any case it surely can't be necessary to have a 256+ byte structure
to hold the 8-bit command/status values.
(In my case the 8 bits got 'spread' across a 32bit word and written
(to the fgpa - helped because I was writing that end as well) as a single word.)

	David

> 
> Thanks,
> 
> Lukas
> 


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2026-04-09 11:28 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-08 21:11 [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame Andy Shevchenko
2026-04-09  7:26 ` David Laight
2026-04-09  7:58   ` Lukas Wunner
2026-04-09 11:28     ` David Laight

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox