public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
@ 2026-04-08 21:11 Andy Shevchenko
  2026-04-09  7:26 ` David Laight
  0 siblings, 1 reply; 5+ messages in thread
From: Andy Shevchenko @ 2026-04-08 21:11 UTC (permalink / raw)
  To: Andy Shevchenko, linux-mtd, linux-kernel
  Cc: Miquel Raynal, Richard Weinberger, Vignesh Raghavendra,
	Lukas Wunner, Andrew Morton

Compiler is not happy about used stack frame:

drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]

Fix this by factoring out do_write_buffer_locked().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
v3: addressed set but unused variables when MTD_XIP=y (LKP)
v2: kept DIS/ENABLE_VPP paired

 drivers/mtd/chips/cfi_cmdset_0001.c | 88 +++++++++++++++++------------
 1 file changed, 51 insertions(+), 37 deletions(-)

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 5a4d2e16a9d1..7733e076ad40 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -1154,7 +1154,8 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
 
 static int __xipram xip_wait_for_operation(
 		struct map_info *map, struct flchip *chip,
-		unsigned long adr, unsigned int chip_op_time_max)
+		unsigned long adr, unsigned long inval_adr, int inval_len,
+		unsigned int chip_op_time, unsigned int chip_op_time_max)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
@@ -1276,8 +1277,7 @@ static int __xipram xip_wait_for_operation(
 #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
 	INVALIDATE_CACHED_RANGE(map, from, size)
 
-#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
-	xip_wait_for_operation(map, chip, cmd_adr, usec_max)
+#define INVAL_CACHE_AND_WAIT xip_wait_for_operation
 
 #else
 
@@ -1720,42 +1720,24 @@ static int cfi_intelext_write_words (struct mtd_info *mtd, loff_t to , size_t le
 }
 
 
-static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
-				    unsigned long adr, const struct kvec **pvec,
-				    unsigned long *pvec_seek, int len)
+static int __xipram do_write_buffer_locked(struct map_info *map, struct flchip *chip,
+					   unsigned long cmd_adr, unsigned long adr,
+					   const struct kvec **pvec,
+					   unsigned long *pvec_seek, int len)
 {
 	struct cfi_private *cfi = map->fldrv_priv;
 	map_word status, write_cmd, datum;
-	unsigned long cmd_adr;
-	int ret, wbufsize, word_gap, words;
+	int ret, word_gap, words;
 	const struct kvec *vec;
 	unsigned long vec_seek;
 	unsigned long initial_adr;
 	int initial_len = len;
 
-	wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
-	adr += chip->start;
 	initial_adr = adr;
-	cmd_adr = adr & ~(wbufsize-1);
-
-	/* Sharp LH28F640BF chips need the first address for the
-	 * Page Buffer Program command. See Table 5 of
-	 * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */
-	if (is_LH28F640BF(cfi))
-		cmd_adr = adr;
 
 	/* Let's determine this according to the interleave only once */
 	write_cmd = (cfi->cfiq->P_ID != P_ID_INTEL_PERFORMANCE) ? CMD(0xe8) : CMD(0xe9);
 
-	mutex_lock(&chip->mutex);
-	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
-	if (ret) {
-		mutex_unlock(&chip->mutex);
-		return ret;
-	}
-
-	XIP_INVAL_CACHED_RANGE(map, initial_adr, initial_len);
-	ENABLE_VPP(map);
 	xip_disable(map, chip, cmd_adr);
 
 	/* §4.8 of the 28FxxxJ3A datasheet says "Any time SR.4 and/or SR.5 is set
@@ -1789,7 +1771,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 		xip_enable(map, chip, cmd_adr);
 		printk(KERN_ERR "%s: Chip not ready for buffer write. Xstatus = %lx, status = %lx\n",
 				map->name, Xstatus.x[0], status.x[0]);
-		goto out;
+		return ret;
 	}
 
 	/* Figure out the number of words to write */
@@ -1853,7 +1835,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 		chip->state = FL_STATUS;
 		xip_enable(map, chip, cmd_adr);
 		printk(KERN_ERR "%s: buffer write error (status timeout)\n", map->name);
-		goto out;
+		return ret;
 	}
 
 	/* check for errors */
@@ -1866,21 +1848,53 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 		map_write(map, CMD(0x70), cmd_adr);
 		xip_enable(map, chip, cmd_adr);
 
-		if (chipstatus & 0x02) {
-			ret = -EROFS;
-		} else if (chipstatus & 0x08) {
+		if (chipstatus & 0x02)
+			return -EROFS;
+
+		if (chipstatus & 0x08) {
 			printk(KERN_ERR "%s: buffer write error (bad VPP)\n", map->name);
-			ret = -EIO;
-		} else {
-			printk(KERN_ERR "%s: buffer write error (status 0x%lx)\n", map->name, chipstatus);
-			ret = -EINVAL;
+			return  -EIO;
 		}
 
-		goto out;
+		printk(KERN_ERR "%s: buffer write error (status 0x%lx)\n", map->name, chipstatus);
+		return -EINVAL;
 	}
 
 	xip_enable(map, chip, cmd_adr);
- out:	DISABLE_VPP(map);
+	return 0;
+}
+
+static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
+				    unsigned long adr, const struct kvec **pvec,
+				    unsigned long *pvec_seek, int len)
+{
+	struct cfi_private *cfi = map->fldrv_priv;
+	unsigned long cmd_adr;
+	int ret, wbufsize;
+
+	wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
+	adr += chip->start;
+	cmd_adr = adr & ~(wbufsize - 1);
+
+	/* Sharp LH28F640BF chips need the first address for the
+	 * Page Buffer Program command. See Table 5 of
+	 * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */
+	if (is_LH28F640BF(cfi))
+		cmd_adr = adr;
+
+	mutex_lock(&chip->mutex);
+	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
+	if (ret) {
+		mutex_unlock(&chip->mutex);
+		return ret;
+	}
+
+	XIP_INVAL_CACHED_RANGE(map, adr, len);
+	ENABLE_VPP(map);
+
+	ret = do_write_buffer_locked(map, chip, cmd_adr, adr, pvec, pvec_seek, len);
+
+	DISABLE_VPP(map);
 	put_chip(map, chip, cmd_adr);
 	mutex_unlock(&chip->mutex);
 	return ret;
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
  2026-04-08 21:11 [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame Andy Shevchenko
@ 2026-04-09  7:26 ` David Laight
  2026-04-09  7:58   ` Lukas Wunner
  0 siblings, 1 reply; 5+ messages in thread
From: David Laight @ 2026-04-09  7:26 UTC (permalink / raw)
  To: Andy Shevchenko
  Cc: linux-mtd, linux-kernel, Miquel Raynal, Richard Weinberger,
	Vignesh Raghavendra, Lukas Wunner, Andrew Morton

On Wed,  8 Apr 2026 23:11:48 +0200
Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote:

> Compiler is not happy about used stack frame:
> 
> drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
> drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]
> 
> Fix this by factoring out do_write_buffer_locked().

Does this just split the large stack frame between two nested functions?
I'd also expect the compiler to inline do_write_buffer_locked() so it
makes little difference.
OTOH I can't immediately see where the large stack frame comes from.

	David

> 
> Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
> ---
> v3: addressed set but unused variables when MTD_XIP=y (LKP)
> v2: kept DIS/ENABLE_VPP paired
> 
>  drivers/mtd/chips/cfi_cmdset_0001.c | 88 +++++++++++++++++------------
>  1 file changed, 51 insertions(+), 37 deletions(-)
> 
> diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
> index 5a4d2e16a9d1..7733e076ad40 100644
> --- a/drivers/mtd/chips/cfi_cmdset_0001.c
> +++ b/drivers/mtd/chips/cfi_cmdset_0001.c
> @@ -1154,7 +1154,8 @@ static void __xipram xip_enable(struct map_info *map, struct flchip *chip,
>  
>  static int __xipram xip_wait_for_operation(
>  		struct map_info *map, struct flchip *chip,
> -		unsigned long adr, unsigned int chip_op_time_max)
> +		unsigned long adr, unsigned long inval_adr, int inval_len,
> +		unsigned int chip_op_time, unsigned int chip_op_time_max)
>  {
>  	struct cfi_private *cfi = map->fldrv_priv;
>  	struct cfi_pri_intelext *cfip = cfi->cmdset_priv;
> @@ -1276,8 +1277,7 @@ static int __xipram xip_wait_for_operation(
>  #define XIP_INVAL_CACHED_RANGE(map, from, size)  \
>  	INVALIDATE_CACHED_RANGE(map, from, size)
>  
> -#define INVAL_CACHE_AND_WAIT(map, chip, cmd_adr, inval_adr, inval_len, usec, usec_max) \
> -	xip_wait_for_operation(map, chip, cmd_adr, usec_max)
> +#define INVAL_CACHE_AND_WAIT xip_wait_for_operation

Isn't that separate and unrelated?
The compiler might optimise away the parameters you are adding back.

	David

>  
>  #else
>  
> @@ -1720,42 +1720,24 @@ static int cfi_intelext_write_words (struct mtd_info *mtd, loff_t to , size_t le
>  }
>  
>  
> -static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
> -				    unsigned long adr, const struct kvec **pvec,
> -				    unsigned long *pvec_seek, int len)
> +static int __xipram do_write_buffer_locked(struct map_info *map, struct flchip *chip,
> +					   unsigned long cmd_adr, unsigned long adr,
> +					   const struct kvec **pvec,
> +					   unsigned long *pvec_seek, int len)
>  {
>  	struct cfi_private *cfi = map->fldrv_priv;
>  	map_word status, write_cmd, datum;
> -	unsigned long cmd_adr;
> -	int ret, wbufsize, word_gap, words;
> +	int ret, word_gap, words;
>  	const struct kvec *vec;
>  	unsigned long vec_seek;
>  	unsigned long initial_adr;
>  	int initial_len = len;
>  
> -	wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
> -	adr += chip->start;
>  	initial_adr = adr;
> -	cmd_adr = adr & ~(wbufsize-1);
> -
> -	/* Sharp LH28F640BF chips need the first address for the
> -	 * Page Buffer Program command. See Table 5 of
> -	 * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */
> -	if (is_LH28F640BF(cfi))
> -		cmd_adr = adr;
>  
>  	/* Let's determine this according to the interleave only once */
>  	write_cmd = (cfi->cfiq->P_ID != P_ID_INTEL_PERFORMANCE) ? CMD(0xe8) : CMD(0xe9);
>  
> -	mutex_lock(&chip->mutex);
> -	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
> -	if (ret) {
> -		mutex_unlock(&chip->mutex);
> -		return ret;
> -	}
> -
> -	XIP_INVAL_CACHED_RANGE(map, initial_adr, initial_len);
> -	ENABLE_VPP(map);
>  	xip_disable(map, chip, cmd_adr);
>  
>  	/* §4.8 of the 28FxxxJ3A datasheet says "Any time SR.4 and/or SR.5 is set
> @@ -1789,7 +1771,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
>  		xip_enable(map, chip, cmd_adr);
>  		printk(KERN_ERR "%s: Chip not ready for buffer write. Xstatus = %lx, status = %lx\n",
>  				map->name, Xstatus.x[0], status.x[0]);
> -		goto out;
> +		return ret;
>  	}
>  
>  	/* Figure out the number of words to write */
> @@ -1853,7 +1835,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
>  		chip->state = FL_STATUS;
>  		xip_enable(map, chip, cmd_adr);
>  		printk(KERN_ERR "%s: buffer write error (status timeout)\n", map->name);
> -		goto out;
> +		return ret;
>  	}
>  
>  	/* check for errors */
> @@ -1866,21 +1848,53 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
>  		map_write(map, CMD(0x70), cmd_adr);
>  		xip_enable(map, chip, cmd_adr);
>  
> -		if (chipstatus & 0x02) {
> -			ret = -EROFS;
> -		} else if (chipstatus & 0x08) {
> +		if (chipstatus & 0x02)
> +			return -EROFS;
> +
> +		if (chipstatus & 0x08) {
>  			printk(KERN_ERR "%s: buffer write error (bad VPP)\n", map->name);
> -			ret = -EIO;
> -		} else {
> -			printk(KERN_ERR "%s: buffer write error (status 0x%lx)\n", map->name, chipstatus);
> -			ret = -EINVAL;
> +			return  -EIO;
>  		}
>  
> -		goto out;
> +		printk(KERN_ERR "%s: buffer write error (status 0x%lx)\n", map->name, chipstatus);
> +		return -EINVAL;
>  	}
>  
>  	xip_enable(map, chip, cmd_adr);
> - out:	DISABLE_VPP(map);
> +	return 0;
> +}
> +
> +static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
> +				    unsigned long adr, const struct kvec **pvec,
> +				    unsigned long *pvec_seek, int len)
> +{
> +	struct cfi_private *cfi = map->fldrv_priv;
> +	unsigned long cmd_adr;
> +	int ret, wbufsize;
> +
> +	wbufsize = cfi_interleave(cfi) << cfi->cfiq->MaxBufWriteSize;
> +	adr += chip->start;
> +	cmd_adr = adr & ~(wbufsize - 1);
> +
> +	/* Sharp LH28F640BF chips need the first address for the
> +	 * Page Buffer Program command. See Table 5 of
> +	 * LH28F320BF, LH28F640BF, LH28F128BF Series (Appendix FUM00701) */
> +	if (is_LH28F640BF(cfi))
> +		cmd_adr = adr;
> +
> +	mutex_lock(&chip->mutex);
> +	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
> +	if (ret) {
> +		mutex_unlock(&chip->mutex);
> +		return ret;
> +	}
> +
> +	XIP_INVAL_CACHED_RANGE(map, adr, len);
> +	ENABLE_VPP(map);
> +
> +	ret = do_write_buffer_locked(map, chip, cmd_adr, adr, pvec, pvec_seek, len);
> +
> +	DISABLE_VPP(map);
>  	put_chip(map, chip, cmd_adr);
>  	mutex_unlock(&chip->mutex);
>  	return ret;


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
  2026-04-09  7:26 ` David Laight
@ 2026-04-09  7:58   ` Lukas Wunner
  2026-04-09 11:28     ` David Laight
  0 siblings, 1 reply; 5+ messages in thread
From: Lukas Wunner @ 2026-04-09  7:58 UTC (permalink / raw)
  To: David Laight
  Cc: Andy Shevchenko, linux-mtd, linux-kernel, Miquel Raynal,
	Richard Weinberger, Vignesh Raghavendra, Andrew Morton

On Thu, Apr 09, 2026 at 08:26:11AM +0100, David Laight wrote:
> On Wed,  8 Apr 2026 23:11:48 +0200 Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote:
> > Compiler is not happy about used stack frame:
> > 
> > drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
> > drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]
> > 
> > Fix this by factoring out do_write_buffer_locked().
> 
> Does this just split the large stack frame between two nested functions?
> I'd also expect the compiler to inline do_write_buffer_locked() so it
> makes little difference.
> OTOH I can't immediately see where the large stack frame comes from.

The error occurs for an allmodconfig build on arm, which implies
CONFIG_KASAN_STACK=y and thus increases stack usage vis-à-vis a
"regular" build.

Stack usage is high here because of the three "map_word" types,
which can each be up to 256 unsigned longs (32 * 8), see the
definitions of MAX_MAP_LONGS, MAX_MAP_BANKWIDTH, map_word in
include/linux/mtd/map.h.

Possible solutions:

- Disable KASAN entirely for this file:
  https://lore.kernel.org/all/adX3SHYgazijahbG@wunner.de/

  Not always a good option, particularly for stuff like lib/maple_tree.c
  where the same issue exists in mas_wr_spanning_store() and KASAN would
  certainly be good to have for that one.

- Use heap instead of stack.

- Split function in smaller chunks and mark them "noinline".

Thanks,

Lukas

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
  2026-04-09  7:58   ` Lukas Wunner
@ 2026-04-09 11:28     ` David Laight
  2026-04-14 12:38       ` Andy Shevchenko
  0 siblings, 1 reply; 5+ messages in thread
From: David Laight @ 2026-04-09 11:28 UTC (permalink / raw)
  To: Lukas Wunner
  Cc: Andy Shevchenko, linux-mtd, linux-kernel, Miquel Raynal,
	Richard Weinberger, Vignesh Raghavendra, Andrew Morton

On Thu, 9 Apr 2026 09:58:28 +0200
Lukas Wunner <lukas@wunner.de> wrote:

> On Thu, Apr 09, 2026 at 08:26:11AM +0100, David Laight wrote:
> > On Wed,  8 Apr 2026 23:11:48 +0200 Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote:  
> > > Compiler is not happy about used stack frame:
> > > 
> > > drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
> > > drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]
> > > 
> > > Fix this by factoring out do_write_buffer_locked().  
> > 
> > Does this just split the large stack frame between two nested functions?
> > I'd also expect the compiler to inline do_write_buffer_locked() so it
> > makes little difference.
> > OTOH I can't immediately see where the large stack frame comes from.  
> 
> The error occurs for an allmodconfig build on arm, which implies
> CONFIG_KASAN_STACK=y and thus increases stack usage vis-à-vis a
> "regular" build.
> 
> Stack usage is high here because of the three "map_word" types,
> which can each be up to 256 unsigned longs (32 * 8), see the
> definitions of MAX_MAP_LONGS, MAX_MAP_BANKWIDTH, map_word in
> include/linux/mtd/map.h.

Ugg - that code is horrid.
Returning structures by value isn't really a good idea.

> 
> Possible solutions:
> 
> - Disable KASAN entirely for this file:
>   https://lore.kernel.org/all/adX3SHYgazijahbG@wunner.de/
> 
>   Not always a good option, particularly for stuff like lib/maple_tree.c
>   where the same issue exists in mas_wr_spanning_store() and KASAN would
>   certainly be good to have for that one.

I've peeked at that at least once.
Some big functions get inlined; IIRC one small function is basically:
	if (expr) a(args) else b(args);
and marking both a and b noinline would help a lot.

> 
> - Use heap instead of stack.
> 
> - Split function in smaller chunks and mark them "noinline".

That might make the code easier to read as well.

But looking at it, I think that a small amount of refactoring
(mostly moving the initial 'status' check before the command
is written) would mean that only one 'map_word' would be valid
at any one time.

I didn't look at what was really happening though.
I suspect it is similar to some code I've written for accessing serial
EEPROM where the control data is written one bit at a time, but the
data itself is read/written in 4 bit chunks (although the low-level hardware
did multiple 'nibble' accesses for wider transfers).
In any case it surely can't be necessary to have a 256+ byte structure
to hold the 8-bit command/status values.
(In my case the 8 bits got 'spread' across a 32bit word and written
(to the fgpa - helped because I was writing that end as well) as a single word.)

	David

> 
> Thanks,
> 
> Lukas
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame
  2026-04-09 11:28     ` David Laight
@ 2026-04-14 12:38       ` Andy Shevchenko
  0 siblings, 0 replies; 5+ messages in thread
From: Andy Shevchenko @ 2026-04-14 12:38 UTC (permalink / raw)
  To: David Laight
  Cc: Lukas Wunner, linux-mtd, linux-kernel, Miquel Raynal,
	Richard Weinberger, Vignesh Raghavendra, Andrew Morton

On Thu, Apr 09, 2026 at 12:28:46PM +0100, David Laight wrote:
> On Thu, 9 Apr 2026 09:58:28 +0200
> Lukas Wunner <lukas@wunner.de> wrote:
> > On Thu, Apr 09, 2026 at 08:26:11AM +0100, David Laight wrote:
> > > On Wed,  8 Apr 2026 23:11:48 +0200 Andy Shevchenko <andriy.shevchenko@linux.intel.com> wrote:  
> > > > Compiler is not happy about used stack frame:
> > > > 
> > > > drivers/mtd/chips/cfi_cmdset_0001.c: In function 'do_write_buffer':
> > > > drivers/mtd/chips/cfi_cmdset_0001.c:1887:1: error: the frame size of 1296 bytes is larger than 1280 bytes [-Werror=frame-larger-than=]
> > > > 
> > > > Fix this by factoring out do_write_buffer_locked().  
> > > 
> > > Does this just split the large stack frame between two nested functions?
> > > I'd also expect the compiler to inline do_write_buffer_locked() so it
> > > makes little difference.
> > > OTOH I can't immediately see where the large stack frame comes from.  
> > 
> > The error occurs for an allmodconfig build on arm, which implies
> > CONFIG_KASAN_STACK=y and thus increases stack usage vis-à-vis a
> > "regular" build.
> > 
> > Stack usage is high here because of the three "map_word" types,
> > which can each be up to 256 unsigned longs (32 * 8), see the
> > definitions of MAX_MAP_LONGS, MAX_MAP_BANKWIDTH, map_word in
> > include/linux/mtd/map.h.
> 
> Ugg - that code is horrid.
> Returning structures by value isn't really a good idea.
> 
> > 
> > Possible solutions:
> > 
> > - Disable KASAN entirely for this file:
> >   https://lore.kernel.org/all/adX3SHYgazijahbG@wunner.de/
> > 
> >   Not always a good option, particularly for stuff like lib/maple_tree.c
> >   where the same issue exists in mas_wr_spanning_store() and KASAN would
> >   certainly be good to have for that one.
> 
> I've peeked at that at least once.
> Some big functions get inlined; IIRC one small function is basically:
> 	if (expr) a(args) else b(args);
> and marking both a and b noinline would help a lot.
> 
> > 
> > - Use heap instead of stack.
> > 
> > - Split function in smaller chunks and mark them "noinline".
> 
> That might make the code easier to read as well.
> 
> But looking at it, I think that a small amount of refactoring
> (mostly moving the initial 'status' check before the command
> is written) would mean that only one 'map_word' would be valid
> at any one time.
> 
> I didn't look at what was really happening though.
> I suspect it is similar to some code I've written for accessing serial
> EEPROM where the control data is written one bit at a time, but the
> data itself is read/written in 4 bit chunks (although the low-level hardware
> did multiple 'nibble' accesses for wider transfers).
> In any case it surely can't be necessary to have a 256+ byte structure
> to hold the 8-bit command/status values.
> (In my case the 8 bits got 'spread' across a 32bit word and written
> (to the fgpa - helped because I was writing that end as well) as a single word.)

Okay, I leave this to maintainers to decide what to do with my contribution.
Dunno if this refactoring helps doing better one in the future (like David
suggested) or should be rewritten completely. In my opinion, smaller functions
are always easier to follow.

-- 
With Best Regards,
Andy Shevchenko



^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2026-04-14 12:38 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-04-08 21:11 [PATCH v3 1/1] mtd: cfi_cmdset_0001: Factor out do_write_buffer_locked() to reduce stack frame Andy Shevchenko
2026-04-09  7:26 ` David Laight
2026-04-09  7:58   ` Lukas Wunner
2026-04-09 11:28     ` David Laight
2026-04-14 12:38       ` Andy Shevchenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox