[PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap()
@ 2024-07-28  0:47 Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 02/16] fs: remove accidental overflow during wraparound check Sasha Levin
                   ` (14 more replies)
  0 siblings, 15 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Richard Fitzgerald, Mark Brown, Sasha Levin, gregkh

From: Richard Fitzgerald <rf@opensource.cirrus.com>

[ Upstream commit c3820641da87442251e0c00b6874ef1022da8f58 ]

- Use kunit_kcalloc() to allocate the defaults table so that it will be
  freed when the test case ends.
- kfree() the buf and *data buffers on the error paths.
- Use kunit_add_action_or_reset() instead of kunit_add_action() so that
  if it fails it will call regmap_exit().

Signed-off-by: Richard Fitzgerald <rf@opensource.cirrus.com>
Link: https://msgid.link/r/20240411103724.54063-1-rf@opensource.cirrus.com
Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/base/regmap/regmap-kunit.c | 72 +++++++++++++++++++-----------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/drivers/base/regmap/regmap-kunit.c b/drivers/base/regmap/regmap-kunit.c
index be32cd4e84da4..292e86f601978 100644
--- a/drivers/base/regmap/regmap-kunit.c
+++ b/drivers/base/regmap/regmap-kunit.c
@@ -145,9 +145,9 @@ static struct regmap *gen_regmap(struct kunit *test,
 	const struct regmap_test_param *param = test->param_value;
 	struct regmap_test_priv *priv = test->priv;
 	unsigned int *buf;
-	struct regmap *ret;
+	struct regmap *ret = ERR_PTR(-ENOMEM);
 	size_t size;
-	int i;
+	int i, error;
 	struct reg_default *defaults;
 
 	config->cache_type = param->cache;
@@ -172,15 +172,17 @@ static struct regmap *gen_regmap(struct kunit *test,
 
 	*data = kzalloc(sizeof(**data), GFP_KERNEL);
 	if (!(*data))
-		return ERR_PTR(-ENOMEM);
+		goto out_free;
 	(*data)->vals = buf;
 
 	if (config->num_reg_defaults) {
-		defaults = kcalloc(config->num_reg_defaults,
-				   sizeof(struct reg_default),
-				   GFP_KERNEL);
+		defaults = kunit_kcalloc(test,
+					 config->num_reg_defaults,
+					 sizeof(struct reg_default),
+					 GFP_KERNEL);
 		if (!defaults)
-			return ERR_PTR(-ENOMEM);
+			goto out_free;
+
 		config->reg_defaults = defaults;
 
 		for (i = 0; i < config->num_reg_defaults; i++) {
@@ -190,12 +192,19 @@ static struct regmap *gen_regmap(struct kunit *test,
 	}
 
 	ret = regmap_init_ram(priv->dev, config, *data);
-	if (IS_ERR(ret)) {
-		kfree(buf);
-		kfree(*data);
-	} else {
-		kunit_add_action(test, regmap_exit_action, ret);
-	}
+	if (IS_ERR(ret))
+		goto out_free;
+
+	/* This calls regmap_exit() on failure, which frees buf and *data */
+	error = kunit_add_action_or_reset(test, regmap_exit_action, ret);
+	if (error)
+		ret = ERR_PTR(error);
+
+	return ret;
+
+out_free:
+	kfree(buf);
+	kfree(*data);
 
 	return ret;
 }
@@ -1497,9 +1506,9 @@ static struct regmap *gen_raw_regmap(struct kunit *test,
 	struct regmap_test_priv *priv = test->priv;
 	const struct regmap_test_param *param = test->param_value;
 	u16 *buf;
-	struct regmap *ret;
+	struct regmap *ret = ERR_PTR(-ENOMEM);
 	size_t size = (config->max_register + 1) * config->reg_bits / 8;
-	int i;
+	int i, error;
 	struct reg_default *defaults;
 
 	config->cache_type = param->cache;
@@ -1515,15 +1524,16 @@ static struct regmap *gen_raw_regmap(struct kunit *test,
 
 	*data = kzalloc(sizeof(**data), GFP_KERNEL);
 	if (!(*data))
-		return ERR_PTR(-ENOMEM);
+		goto out_free;
 	(*data)->vals = (void *)buf;
 
 	config->num_reg_defaults = config->max_register + 1;
-	defaults = kcalloc(config->num_reg_defaults,
-			   sizeof(struct reg_default),
-			   GFP_KERNEL);
+	defaults = kunit_kcalloc(test,
+				 config->num_reg_defaults,
+				 sizeof(struct reg_default),
+				 GFP_KERNEL);
 	if (!defaults)
-		return ERR_PTR(-ENOMEM);
+		goto out_free;
 	config->reg_defaults = defaults;
 
 	for (i = 0; i < config->num_reg_defaults; i++) {
@@ -1536,7 +1546,8 @@ static struct regmap *gen_raw_regmap(struct kunit *test,
 			defaults[i].def = be16_to_cpu(buf[i]);
 			break;
 		default:
-			return ERR_PTR(-EINVAL);
+			ret = ERR_PTR(-EINVAL);
+			goto out_free;
 		}
 	}
 
@@ -1548,12 +1559,19 @@ static struct regmap *gen_raw_regmap(struct kunit *test,
 		config->num_reg_defaults = 0;
 
 	ret = regmap_init_raw_ram(priv->dev, config, *data);
-	if (IS_ERR(ret)) {
-		kfree(buf);
-		kfree(*data);
-	} else {
-		kunit_add_action(test, regmap_exit_action, ret);
-	}
+	if (IS_ERR(ret))
+		goto out_free;
+
+	/* This calls regmap_exit() on failure, which frees buf and *data */
+	error = kunit_add_action_or_reset(test, regmap_exit_action, ret);
+	if (error)
+		ret = ERR_PTR(error);
+
+	return ret;
+
+out_free:
+	kfree(buf);
+	kfree(*data);
 
 	return ret;
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 02/16] fs: remove accidental overflow during wraparound check
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-29 11:54   ` Jan Kara
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 03/16] gpio: prevent potential speculation leaks in gpio_device_get_desc() Sasha Levin
                   ` (13 subsequent siblings)
  14 siblings, 1 reply; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Justin Stitt, linux-hardening, Kees Cook, Jan Kara,
	Christian Brauner, Sasha Levin, viro, nathan, linux-fsdevel, llvm

From: Justin Stitt <justinstitt@google.com>

[ Upstream commit 23cc6ef6fd453b13502caae23130844e7d6ed0fe ]

Running syzkaller with the newly enabled signed integer overflow
sanitizer produces this report:

[  195.401651] ------------[ cut here ]------------
[  195.404808] UBSAN: signed-integer-overflow in ../fs/open.c:321:15
[  195.408739] 9223372036854775807 + 562984447377399 cannot be represented in type 'loff_t' (aka 'long long')
[  195.414683] CPU: 1 PID: 703 Comm: syz-executor.0 Not tainted 6.8.0-rc2-00039-g14de58dbe653-dirty #11
[  195.420138] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[  195.425804] Call Trace:
[  195.427360]  <TASK>
[  195.428791]  dump_stack_lvl+0x93/0xd0
[  195.431150]  handle_overflow+0x171/0x1b0
[  195.433640]  vfs_fallocate+0x459/0x4f0
...
[  195.490053] ------------[ cut here ]------------
[  195.493146] UBSAN: signed-integer-overflow in ../fs/open.c:321:61
[  195.497030] 9223372036854775807 + 562984447377399 cannot be represented in type 'loff_t' (aka 'long long)
[  195.502940] CPU: 1 PID: 703 Comm: syz-executor.0 Not tainted 6.8.0-rc2-00039-g14de58dbe653-dirty #11
[  195.508395] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
[  195.514075] Call Trace:
[  195.515636]  <TASK>
[  195.517000]  dump_stack_lvl+0x93/0xd0
[  195.519255]  handle_overflow+0x171/0x1b0
[  195.521677]  vfs_fallocate+0x4cb/0x4f0
[  195.524033]  __x64_sys_fallocate+0xb2/0xf0

Historically, the signed integer overflow sanitizer did not work in the
kernel due to its interaction with `-fwrapv` but this has since been
changed [1] in the newest version of Clang. It was re-enabled in the
kernel with Commit 557f8c582a9ba8ab ("ubsan: Reintroduce signed overflow
sanitizer").

Let's use the check_add_overflow helper to first verify the addition
stays within the bounds of its type (long long); then we can use that
sum for the following check.

Link: https://github.com/llvm/llvm-project/pull/82432 [1]
Closes: https://github.com/KSPP/linux/issues/356
Cc: linux-hardening@vger.kernel.org
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Justin Stitt <justinstitt@google.com>
Link: https://lore.kernel.org/r/20240513-b4-sio-vfs_fallocate-v2-1-db415872fb16@google.com
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/open.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/open.c b/fs/open.c
index 278b3edcda444..1dd123ba34ee9 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -247,6 +247,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	long ret;
+	loff_t sum;
 
 	if (offset < 0 || len <= 0)
 		return -EINVAL;
@@ -319,8 +320,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
 		return -ENODEV;
 
-	/* Check for wrap through zero too */
-	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
+	/* Check for wraparound */
+	if (check_add_overflow(offset, len, &sum))
+		return -EFBIG;
+
+	if (sum > inode->i_sb->s_maxbytes)
 		return -EFBIG;
 
 	if (!file->f_op->fallocate)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH AUTOSEL 6.10 02/16] fs: remove accidental overflow during wraparound check
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 02/16] fs: remove accidental overflow during wraparound check Sasha Levin
@ 2024-07-29 11:54   ` Jan Kara
  2024-08-10  9:10     ` Sasha Levin
  0 siblings, 1 reply; 22+ messages in thread
From: Jan Kara @ 2024-07-29 11:54 UTC (permalink / raw)
  To: Sasha Levin
  Cc: linux-kernel, stable, Justin Stitt, linux-hardening, Kees Cook,
	Jan Kara, Christian Brauner, viro, nathan, linux-fsdevel, llvm

On Sat 27-07-24 20:47:19, Sasha Levin wrote:
> From: Justin Stitt <justinstitt@google.com>
> 
> [ Upstream commit 23cc6ef6fd453b13502caae23130844e7d6ed0fe ]

Sasha, this commit is only about silencing false-positive UBSAN warning.
Not sure if it is really a stable material...

								Honza

> 
> Running syzkaller with the newly enabled signed integer overflow
> sanitizer produces this report:
> 
> [  195.401651] ------------[ cut here ]------------
> [  195.404808] UBSAN: signed-integer-overflow in ../fs/open.c:321:15
> [  195.408739] 9223372036854775807 + 562984447377399 cannot be represented in type 'loff_t' (aka 'long long')
> [  195.414683] CPU: 1 PID: 703 Comm: syz-executor.0 Not tainted 6.8.0-rc2-00039-g14de58dbe653-dirty #11
> [  195.420138] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
> [  195.425804] Call Trace:
> [  195.427360]  <TASK>
> [  195.428791]  dump_stack_lvl+0x93/0xd0
> [  195.431150]  handle_overflow+0x171/0x1b0
> [  195.433640]  vfs_fallocate+0x459/0x4f0
> ...
> [  195.490053] ------------[ cut here ]------------
> [  195.493146] UBSAN: signed-integer-overflow in ../fs/open.c:321:61
> [  195.497030] 9223372036854775807 + 562984447377399 cannot be represented in type 'loff_t' (aka 'long long)
> [  195.502940] CPU: 1 PID: 703 Comm: syz-executor.0 Not tainted 6.8.0-rc2-00039-g14de58dbe653-dirty #11
> [  195.508395] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.3-debian-1.16.3-2 04/01/2014
> [  195.514075] Call Trace:
> [  195.515636]  <TASK>
> [  195.517000]  dump_stack_lvl+0x93/0xd0
> [  195.519255]  handle_overflow+0x171/0x1b0
> [  195.521677]  vfs_fallocate+0x4cb/0x4f0
> [  195.524033]  __x64_sys_fallocate+0xb2/0xf0
> 
> Historically, the signed integer overflow sanitizer did not work in the
> kernel due to its interaction with `-fwrapv` but this has since been
> changed [1] in the newest version of Clang. It was re-enabled in the
> kernel with Commit 557f8c582a9ba8ab ("ubsan: Reintroduce signed overflow
> sanitizer").
> 
> Let's use the check_add_overflow helper to first verify the addition
> stays within the bounds of its type (long long); then we can use that
> sum for the following check.
> 
> Link: https://github.com/llvm/llvm-project/pull/82432 [1]
> Closes: https://github.com/KSPP/linux/issues/356
> Cc: linux-hardening@vger.kernel.org
> Reviewed-by: Kees Cook <keescook@chromium.org>
> Signed-off-by: Justin Stitt <justinstitt@google.com>
> Link: https://lore.kernel.org/r/20240513-b4-sio-vfs_fallocate-v2-1-db415872fb16@google.com
> Reviewed-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Christian Brauner <brauner@kernel.org>
> Signed-off-by: Sasha Levin <sashal@kernel.org>
> ---
>  fs/open.c | 8 ++++++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/open.c b/fs/open.c
> index 278b3edcda444..1dd123ba34ee9 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -247,6 +247,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
>  {
>  	struct inode *inode = file_inode(file);
>  	long ret;
> +	loff_t sum;
>  
>  	if (offset < 0 || len <= 0)
>  		return -EINVAL;
> @@ -319,8 +320,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
>  	if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
>  		return -ENODEV;
>  
> -	/* Check for wrap through zero too */
> -	if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
> +	/* Check for wraparound */
> +	if (check_add_overflow(offset, len, &sum))
> +		return -EFBIG;
> +
> +	if (sum > inode->i_sb->s_maxbytes)
>  		return -EFBIG;
>  
>  	if (!file->f_op->fallocate)
> -- 
> 2.43.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH AUTOSEL 6.10 02/16] fs: remove accidental overflow during wraparound check
  2024-07-29 11:54   ` Jan Kara
@ 2024-08-10  9:10     ` Sasha Levin
  0 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-08-10  9:10 UTC (permalink / raw)
  To: Jan Kara
  Cc: linux-kernel, stable, Justin Stitt, linux-hardening, Kees Cook,
	Christian Brauner, viro, nathan, linux-fsdevel, llvm

On Mon, Jul 29, 2024 at 01:54:18PM +0200, Jan Kara wrote:
>On Sat 27-07-24 20:47:19, Sasha Levin wrote:
>> From: Justin Stitt <justinstitt@google.com>
>>
>> [ Upstream commit 23cc6ef6fd453b13502caae23130844e7d6ed0fe ]
>
>Sasha, this commit is only about silencing false-positive UBSAN warning.
>Not sure if it is really a stable material...

I'll drop it, thanks!

-- 
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 03/16] gpio: prevent potential speculation leaks in gpio_device_get_desc()
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 02/16] fs: remove accidental overflow during wraparound check Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 04/16] hwmon: corsair-psu: add USB id of HX1200i Series 2023 psu Sasha Levin
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Hagar Hemdan, Bartosz Golaszewski, Sasha Levin, linus.walleij,
	brgl, linux-gpio

From: Hagar Hemdan <hagarhem@amazon.com>

[ Upstream commit d795848ecce24a75dfd46481aee066ae6fe39775 ]

Userspace may trigger a speculative read of an address outside the gpio
descriptor array.
Users can do that by calling gpio_ioctl() with an offset out of range.
Offset is copied from user and then used as an array index to get
the gpio descriptor without sanitization in gpio_device_get_desc().

This change ensures that the offset is sanitized by using
array_index_nospec() to mitigate any possibility of speculative
information leaks.

This bug was discovered and resolved using Coverity Static Analysis
Security Testing (SAST) by Synopsys, Inc.

Signed-off-by: Hagar Hemdan <hagarhem@amazon.com>
Link: https://lore.kernel.org/r/20240523085332.1801-1-hagarhem@amazon.com
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/gpio/gpiolib.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index fa62367ee9290..1a9aadd4c803c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -17,6 +17,7 @@
 #include <linux/list.h>
 #include <linux/lockdep.h>
 #include <linux/module.h>
+#include <linux/nospec.h>
 #include <linux/of.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/seq_file.h>
@@ -198,7 +199,7 @@ gpio_device_get_desc(struct gpio_device *gdev, unsigned int hwnum)
 	if (hwnum >= gdev->ngpio)
 		return ERR_PTR(-EINVAL);
 
-	return &gdev->descs[hwnum];
+	return &gdev->descs[array_index_nospec(hwnum, gdev->ngpio)];
 }
 EXPORT_SYMBOL_GPL(gpio_device_get_desc);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 04/16] hwmon: corsair-psu: add USB id of HX1200i Series 2023 psu
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 02/16] fs: remove accidental overflow during wraparound check Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 03/16] gpio: prevent potential speculation leaks in gpio_device_get_desc() Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 05/16] Revert "rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes()" Sasha Levin
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Wilken Gottwalt, Guenter Roeck, Sasha Levin, jdelvare, corbet,
	linux-hwmon, linux-doc

From: Wilken Gottwalt <wilken.gottwalt@posteo.net>

[ Upstream commit b9c15c96ccb47ad860af2e075c5f3c90c4cd1730 ]

Add the usb id of the HX1200i Series 2023. Update the documentation
accordingly. Also fix the version comments, there are no Series 2022
products. That are legacy or first version products going back many
many years.

Signed-off-by: Wilken Gottwalt <wilken.gottwalt@posteo.net>
Link: https://lore.kernel.org/r/ZlAZs4u0dU7JxtDf@monster.localdomain
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 Documentation/hwmon/corsair-psu.rst | 6 +++---
 drivers/hwmon/corsair-psu.c         | 7 ++++---
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/Documentation/hwmon/corsair-psu.rst b/Documentation/hwmon/corsair-psu.rst
index 16db34d464dd6..7ed794087f848 100644
--- a/Documentation/hwmon/corsair-psu.rst
+++ b/Documentation/hwmon/corsair-psu.rst
@@ -15,11 +15,11 @@ Supported devices:
 
   Corsair HX850i
 
-  Corsair HX1000i (Series 2022 and 2023)
+  Corsair HX1000i (Legacy and Series 2023)
 
-  Corsair HX1200i
+  Corsair HX1200i (Legacy and Series 2023)
 
-  Corsair HX1500i (Series 2022 and 2023)
+  Corsair HX1500i (Legacy and Series 2023)
 
   Corsair RM550i
 
diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c
index 2c7c92272fe39..f8f22b8a67cdf 100644
--- a/drivers/hwmon/corsair-psu.c
+++ b/drivers/hwmon/corsair-psu.c
@@ -875,15 +875,16 @@ static const struct hid_device_id corsairpsu_idtable[] = {
 	{ HID_USB_DEVICE(0x1b1c, 0x1c04) }, /* Corsair HX650i */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c05) }, /* Corsair HX750i */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c06) }, /* Corsair HX850i */
-	{ HID_USB_DEVICE(0x1b1c, 0x1c07) }, /* Corsair HX1000i Series 2022 */
-	{ HID_USB_DEVICE(0x1b1c, 0x1c08) }, /* Corsair HX1200i */
+	{ HID_USB_DEVICE(0x1b1c, 0x1c07) }, /* Corsair HX1000i Legacy */
+	{ HID_USB_DEVICE(0x1b1c, 0x1c08) }, /* Corsair HX1200i Legacy */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c09) }, /* Corsair RM550i */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c0a) }, /* Corsair RM650i */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c0b) }, /* Corsair RM750i */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c0c) }, /* Corsair RM850i */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c0d) }, /* Corsair RM1000i */
 	{ HID_USB_DEVICE(0x1b1c, 0x1c1e) }, /* Corsair HX1000i Series 2023 */
-	{ HID_USB_DEVICE(0x1b1c, 0x1c1f) }, /* Corsair HX1500i Series 2022 and 2023 */
+	{ HID_USB_DEVICE(0x1b1c, 0x1c1f) }, /* Corsair HX1500i Legacy and Series 2023 */
+	{ HID_USB_DEVICE(0x1b1c, 0x1c23) }, /* Corsair HX1200i Series 2023 */
 	{ },
 };
 MODULE_DEVICE_TABLE(hid, corsairpsu_idtable);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 05/16] Revert "rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes()"
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (2 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 04/16] hwmon: corsair-psu: add USB id of HX1200i Series 2023 psu Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 06/16] platform/chrome: cros_ec_lpc: Add a new quirk for ACPI id Sasha Levin
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Frederic Weisbecker, Oleg Nesterov, Paul E . McKenney,
	Sasha Levin, neeraj.upadhyay, joel, josh, boqun.feng, urezki,
	akpm, j.granados, cyphar, rongtao, willy, rcu

From: Frederic Weisbecker <frederic@kernel.org>

[ Upstream commit 9855c37edf0009cc276cecfee09f7e76e2380212 ]

This reverts commit 28319d6dc5e2ffefa452c2377dd0f71621b5bff0. The race
it fixed was subject to conditions that don't exist anymore since:

	1612160b9127 ("rcu-tasks: Eliminate deadlocks involving do_exit() and RCU tasks")

This latter commit removes the use of SRCU that used to cover the
RCU-tasks blind spot on exit between the tasklist's removal and the
final preemption disabling. The task is now placed instead into a
temporary list inside which voluntary sleeps are accounted as RCU-tasks
quiescent states. This would disarm the deadlock initially reported
against PID namespace exit.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/linux/rcupdate.h |  2 --
 kernel/pid_namespace.c   | 17 -----------------
 kernel/rcu/tasks.h       | 16 +++-------------
 3 files changed, 3 insertions(+), 32 deletions(-)

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index dfd2399f2cde0..61cb3de236af1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -209,7 +209,6 @@ void synchronize_rcu_tasks_rude(void);
 
 #define rcu_note_voluntary_context_switch(t) rcu_tasks_qs(t, false)
 void exit_tasks_rcu_start(void);
-void exit_tasks_rcu_stop(void);
 void exit_tasks_rcu_finish(void);
 #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */
 #define rcu_tasks_classic_qs(t, preempt) do { } while (0)
@@ -218,7 +217,6 @@ void exit_tasks_rcu_finish(void);
 #define call_rcu_tasks call_rcu
 #define synchronize_rcu_tasks synchronize_rcu
 static inline void exit_tasks_rcu_start(void) { }
-static inline void exit_tasks_rcu_stop(void) { }
 static inline void exit_tasks_rcu_finish(void) { }
 #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 25f3cf679b358..bdf0087d64423 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -249,24 +249,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 		set_current_state(TASK_INTERRUPTIBLE);
 		if (pid_ns->pid_allocated == init_pids)
 			break;
-		/*
-		 * Release tasks_rcu_exit_srcu to avoid following deadlock:
-		 *
-		 * 1) TASK A unshare(CLONE_NEWPID)
-		 * 2) TASK A fork() twice -> TASK B (child reaper for new ns)
-		 *    and TASK C
-		 * 3) TASK B exits, kills TASK C, waits for TASK A to reap it
-		 * 4) TASK A calls synchronize_rcu_tasks()
-		 *                   -> synchronize_srcu(tasks_rcu_exit_srcu)
-		 * 5) *DEADLOCK*
-		 *
-		 * It is considered safe to release tasks_rcu_exit_srcu here
-		 * because we assume the current task can not be concurrently
-		 * reaped at this point.
-		 */
-		exit_tasks_rcu_stop();
 		schedule();
-		exit_tasks_rcu_start();
 	}
 	__set_current_state(TASK_RUNNING);
 
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index e1bf33018e6d5..4dc56b6e27c04 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -858,7 +858,7 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp)
 //	not know to synchronize with this RCU Tasks grace period) have
 //	completed exiting.  The synchronize_rcu() in rcu_tasks_postgp()
 //	will take care of any tasks stuck in the non-preemptible region
-//	of do_exit() following its call to exit_tasks_rcu_stop().
+//	of do_exit() following its call to exit_tasks_rcu_finish().
 // check_all_holdout_tasks(), repeatedly until holdout list is empty:
 //	Scans the holdout list, attempting to identify a quiescent state
 //	for each task on the list.  If there is a quiescent state, the
@@ -1220,7 +1220,7 @@ void exit_tasks_rcu_start(void)
  * Remove the task from the "yet another list" because do_exit() is now
  * non-preemptible, allowing synchronize_rcu() to wait beyond this point.
  */
-void exit_tasks_rcu_stop(void)
+void exit_tasks_rcu_finish(void)
 {
 	unsigned long flags;
 	struct rcu_tasks_percpu *rtpcp;
@@ -1231,22 +1231,12 @@ void exit_tasks_rcu_stop(void)
 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
 	list_del_init(&t->rcu_tasks_exit_list);
 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
-}
 
-/*
- * Contribute to protect against tasklist scan blind spot while the
- * task is exiting and may be removed from the tasklist. See
- * corresponding synchronize_srcu() for further details.
- */
-void exit_tasks_rcu_finish(void)
-{
-	exit_tasks_rcu_stop();
-	exit_tasks_rcu_finish_trace(current);
+	exit_tasks_rcu_finish_trace(t);
 }
 
 #else /* #ifdef CONFIG_TASKS_RCU */
 void exit_tasks_rcu_start(void) { }
-void exit_tasks_rcu_stop(void) { }
 void exit_tasks_rcu_finish(void) { exit_tasks_rcu_finish_trace(current); }
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 06/16] platform/chrome: cros_ec_lpc: Add a new quirk for ACPI id
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (3 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 05/16] Revert "rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes()" Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 07/16] rcutorture: Fix rcu_torture_fwd_cb_cr() data race Sasha Levin
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Ben Walsh, Dustin L . Howett, Tzung-Bi Shih, Sasha Levin, bleung,
	chrome-platform

From: Ben Walsh <ben@jubnut.com>

[ Upstream commit 040159e0912c31fe959d8671f9700bda105ab63a ]

Framework Laptops' ACPI exposes the EC with id "PNP0C09". But
"PNP0C09" is part of the ACPI standard; there are lots of computers
with EC chips with this id, and most of them don't support the cros_ec
protocol.

The driver could find the ACPI device by having "PNP0C09" in the
acpi_match_table, but this would match devices which don't support the
cros_ec protocol. Instead, add a new quirk "CROS_EC_LPC_QUIRK_ACPI_ID"
which allows the id to be specified. This quirk is applied after the
DMI check shows that the device is supported.

Tested-by: Dustin L. Howett <dustin@howett.net>
Signed-off-by: Ben Walsh <ben@jubnut.com>
Link: https://lore.kernel.org/r/20240605063351.14836-4-ben@jubnut.com
Signed-off-by: Tzung-Bi Shih <tzungbi@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/platform/chrome/cros_ec_lpc.c | 50 ++++++++++++++++++++-------
 1 file changed, 38 insertions(+), 12 deletions(-)

diff --git a/drivers/platform/chrome/cros_ec_lpc.c b/drivers/platform/chrome/cros_ec_lpc.c
index ddfbfec44f4cc..43e0914256a3c 100644
--- a/drivers/platform/chrome/cros_ec_lpc.c
+++ b/drivers/platform/chrome/cros_ec_lpc.c
@@ -39,6 +39,11 @@ static bool cros_ec_lpc_acpi_device_found;
  * be used as the base port for EC mapped memory.
  */
 #define CROS_EC_LPC_QUIRK_REMAP_MEMORY              BIT(0)
+/*
+ * Indicates that lpc_driver_data.quirk_acpi_id should be used to find
+ * the ACPI device.
+ */
+#define CROS_EC_LPC_QUIRK_ACPI_ID                   BIT(1)
 
 /**
  * struct lpc_driver_data - driver data attached to a DMI device ID to indicate
@@ -46,10 +51,12 @@ static bool cros_ec_lpc_acpi_device_found;
  * @quirks: a bitfield composed of quirks from CROS_EC_LPC_QUIRK_*
  * @quirk_mmio_memory_base: The first I/O port addressing EC mapped memory (used
  *                          when quirk ...REMAP_MEMORY is set.)
+ * @quirk_acpi_id: An ACPI HID to be used to find the ACPI device.
  */
 struct lpc_driver_data {
 	u32 quirks;
 	u16 quirk_mmio_memory_base;
+	const char *quirk_acpi_id;
 };
 
 /**
@@ -374,6 +381,26 @@ static void cros_ec_lpc_acpi_notify(acpi_handle device, u32 value, void *data)
 		pm_system_wakeup();
 }
 
+static acpi_status cros_ec_lpc_parse_device(acpi_handle handle, u32 level,
+					    void *context, void **retval)
+{
+	*(struct acpi_device **)context = acpi_fetch_acpi_dev(handle);
+	return AE_CTRL_TERMINATE;
+}
+
+static struct acpi_device *cros_ec_lpc_get_device(const char *id)
+{
+	struct acpi_device *adev = NULL;
+	acpi_status status = acpi_get_devices(id, cros_ec_lpc_parse_device,
+					      &adev, NULL);
+	if (ACPI_FAILURE(status)) {
+		pr_warn(DRV_NAME ": Looking for %s failed\n", id);
+		return NULL;
+	}
+
+	return adev;
+}
+
 static int cros_ec_lpc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -401,6 +428,16 @@ static int cros_ec_lpc_probe(struct platform_device *pdev)
 
 		if (quirks & CROS_EC_LPC_QUIRK_REMAP_MEMORY)
 			ec_lpc->mmio_memory_base = driver_data->quirk_mmio_memory_base;
+
+		if (quirks & CROS_EC_LPC_QUIRK_ACPI_ID) {
+			adev = cros_ec_lpc_get_device(driver_data->quirk_acpi_id);
+			if (!adev) {
+				dev_err(dev, "failed to get ACPI device '%s'",
+					driver_data->quirk_acpi_id);
+				return -ENODEV;
+			}
+			ACPI_COMPANION_SET(dev, adev);
+		}
 	}
 
 	/*
@@ -661,23 +698,12 @@ static struct platform_device cros_ec_lpc_device = {
 	.name = DRV_NAME
 };
 
-static acpi_status cros_ec_lpc_parse_device(acpi_handle handle, u32 level,
-					    void *context, void **retval)
-{
-	*(bool *)context = true;
-	return AE_CTRL_TERMINATE;
-}
-
 static int __init cros_ec_lpc_init(void)
 {
 	int ret;
-	acpi_status status;
 	const struct dmi_system_id *dmi_match;
 
-	status = acpi_get_devices(ACPI_DRV_NAME, cros_ec_lpc_parse_device,
-				  &cros_ec_lpc_acpi_device_found, NULL);
-	if (ACPI_FAILURE(status))
-		pr_warn(DRV_NAME ": Looking for %s failed\n", ACPI_DRV_NAME);
+	cros_ec_lpc_acpi_device_found = !!cros_ec_lpc_get_device(ACPI_DRV_NAME);
 
 	dmi_match = dmi_first_match(cros_ec_lpc_dmi_table);
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 07/16] rcutorture: Fix rcu_torture_fwd_cb_cr() data race
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (4 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 06/16] platform/chrome: cros_ec_lpc: Add a new quirk for ACPI id Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 08/16] md: do not delete safemode_timer in mddev_suspend Sasha Levin
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Paul E. McKenney, Marco Elver, Andrey Konovalov, kasan-dev,
	Sasha Levin, dave, josh, frederic, neeraj.upadhyay, joel,
	boqun.feng, urezki, rcu

From: "Paul E. McKenney" <paulmck@kernel.org>

[ Upstream commit 6040072f4774a575fa67b912efe7722874be337b ]

On powerpc systems, spinlock acquisition does not order prior stores
against later loads.  This means that this statement:

	rfcp->rfc_next = NULL;

Can be reordered to follow this statement:

	WRITE_ONCE(*rfcpp, rfcp);

Which is then a data race with rcu_torture_fwd_prog_cr(), specifically,
this statement:

	rfcpn = READ_ONCE(rfcp->rfc_next)

KCSAN located this data race, which represents a real failure on powerpc.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Acked-by: Marco Elver <elver@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: <kasan-dev@googlegroups.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 kernel/rcu/rcutorture.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 807fbf6123a77..251cead744603 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -2626,7 +2626,7 @@ static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp)
 	spin_lock_irqsave(&rfp->rcu_fwd_lock, flags);
 	rfcpp = rfp->rcu_fwd_cb_tail;
 	rfp->rcu_fwd_cb_tail = &rfcp->rfc_next;
-	WRITE_ONCE(*rfcpp, rfcp);
+	smp_store_release(rfcpp, rfcp);
 	WRITE_ONCE(rfp->n_launders_cb, rfp->n_launders_cb + 1);
 	i = ((jiffies - rfp->rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV));
 	if (i >= ARRAY_SIZE(rfp->n_launders_hist))
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 08/16] md: do not delete safemode_timer in mddev_suspend
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (5 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 07/16] rcutorture: Fix rcu_torture_fwd_cb_cr() data race Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 09/16] md: change the return value type of md_write_start to void Sasha Levin
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Li Nan, Yu Kuai, Song Liu, Sasha Levin, linux-raid

From: Li Nan <linan122@huawei.com>

[ Upstream commit a8768a134518e406d41799a3594aeb74e0889cf7 ]

The deletion of safemode_timer in mddev_suspend() is redundant and
potentially harmful now. If timer is about to be woken up but gets
deleted, 'in_sync' will remain 0 until the next write, causing array
to stay in the 'active' state instead of transitioning to 'clean'.

Commit 0d9f4f135eb6 ("MD: Add del_timer_sync to mddev_suspend (fix
nasty panic))" introduced this deletion for dm, because if timer fired
after dm is destroyed, the resource which the timer depends on might
have been freed.

However, commit 0dd84b319352 ("md: call __md_stop_writes in md_stop")
added __md_stop_writes() to md_stop(), which is called before freeing
resource. Timer is deleted in __md_stop_writes(), and the origin issue
is resolved. Therefore, delete safemode_timer can be removed safely now.

Signed-off-by: Li Nan <linan122@huawei.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240508092053.1447930-1-linan666@huaweicloud.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/md/md.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index aff9118ff6975..09c55d9a2c542 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -479,7 +479,6 @@ int mddev_suspend(struct mddev *mddev, bool interruptible)
 	 */
 	WRITE_ONCE(mddev->suspended, mddev->suspended + 1);

-	del_timer_sync(&mddev->safemode_timer);
 	/* restrict memory reclaim I/O during raid array is suspend */
 	mddev->noio_flag = memalloc_noio_save();

-- 
2.43.0

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 09/16] md: change the return value type of md_write_start to void
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (6 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 08/16] md: do not delete safemode_timer in mddev_suspend Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 10/16] md/raid5: avoid BUG_ON() while continue reshape after reassembling Sasha Levin
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Li Nan, Yu Kuai, Song Liu, Sasha Levin, linux-raid

From: Li Nan <linan122@huawei.com>

[ Upstream commit 03e792eaf18ec2e93e2c623f9f1a4bdb97fe4126 ]

Commit cc27b0c78c79 ("md: fix deadlock between mddev_suspend() and
md_write_start()") aborted md_write_start() with false when mddev is
suspended, which fixed a deadlock if calling mddev_suspend() with
holding reconfig_mutex(). Since mddev_suspend() now includes
lockdep_assert_not_held(), it no longer holds the reconfig_mutex. This
makes previous abort unnecessary. Now, remove unnecessary abort and
change function return value to void.

Signed-off-by: Li Nan <linan122@huawei.com>
Reviewed-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240525185257.3896201-2-linan666@huaweicloud.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/md/md.c     | 14 ++++----------
 drivers/md/md.h     |  2 +-
 drivers/md/raid1.c  |  3 +--
 drivers/md/raid10.c |  3 +--
 drivers/md/raid5.c  |  3 +--
 5 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 09c55d9a2c542..6bac20e82ff02 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -8640,12 +8640,12 @@ EXPORT_SYMBOL(md_done_sync);
  * A return value of 'false' means that the write wasn't recorded
  * and cannot proceed as the array is being suspend.
  */
-bool md_write_start(struct mddev *mddev, struct bio *bi)
+void md_write_start(struct mddev *mddev, struct bio *bi)
 {
 	int did_change = 0;
 
 	if (bio_data_dir(bi) != WRITE)
-		return true;
+		return;
 
 	BUG_ON(mddev->ro == MD_RDONLY);
 	if (mddev->ro == MD_AUTO_READ) {
@@ -8678,15 +8678,9 @@ bool md_write_start(struct mddev *mddev, struct bio *bi)
 	if (did_change)
 		sysfs_notify_dirent_safe(mddev->sysfs_state);
 	if (!mddev->has_superblocks)
-		return true;
+		return;
 	wait_event(mddev->sb_wait,
-		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) ||
-		   is_md_suspended(mddev));
-	if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
-		percpu_ref_put(&mddev->writes_pending);
-		return false;
-	}
-	return true;
+		   !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
 }
 EXPORT_SYMBOL(md_write_start);
 
diff --git a/drivers/md/md.h b/drivers/md/md.h
index ca085ecad5044..487582058f741 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -785,7 +785,7 @@ extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **t
 extern void md_wakeup_thread(struct md_thread __rcu *thread);
 extern void md_check_recovery(struct mddev *mddev);
 extern void md_reap_sync_thread(struct mddev *mddev);
-extern bool md_write_start(struct mddev *mddev, struct bio *bi);
+extern void md_write_start(struct mddev *mddev, struct bio *bi);
 extern void md_write_inc(struct mddev *mddev, struct bio *bi);
 extern void md_write_end(struct mddev *mddev);
 extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7b8a71ca66dde..0d80ff471c73d 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1687,8 +1687,7 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio)
 	if (bio_data_dir(bio) == READ)
 		raid1_read_request(mddev, bio, sectors, NULL);
 	else {
-		if (!md_write_start(mddev,bio))
-			return false;
+		md_write_start(mddev,bio);
 		raid1_write_request(mddev, bio, sectors);
 	}
 	return true;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index a4556d2e46bf9..f8d7c02c6ed56 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1836,8 +1836,7 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio)
 	    && md_flush_request(mddev, bio))
 		return true;
 
-	if (!md_write_start(mddev, bio))
-		return false;
+	md_write_start(mddev, bio);
 
 	if (unlikely(bio_op(bio) == REQ_OP_DISCARD))
 		if (!raid10_handle_discard(mddev, bio))
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 2bd1ce9b39226..a84389311dd1e 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6078,8 +6078,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
 		ctx.do_flush = bi->bi_opf & REQ_PREFLUSH;
 	}
 
-	if (!md_write_start(mddev, bi))
-		return false;
+	md_write_start(mddev, bi);
 	/*
 	 * If array is degraded, better not do chunk aligned read because
 	 * later we might have to read it again in order to reconstruct
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 10/16] md/raid5: avoid BUG_ON() while continue reshape after reassembling
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (7 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 09/16] md: change the return value type of md_write_start to void Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 11/16] debugobjects: Annotate racy debug variables Sasha Levin
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Yu Kuai, Song Liu, Sasha Levin, linux-raid

From: Yu Kuai <yukuai3@huawei.com>

[ Upstream commit 305a5170dc5cf3d395bb4c4e9239bca6d0b54b49 ]

Currently, mdadm support --revert-reshape to abort the reshape while
reassembling, as the test 07revert-grow. However, following BUG_ON()
can be triggerred by the test:

kernel BUG at drivers/md/raid5.c:6278!
invalid opcode: 0000 [#1] PREEMPT SMP PTI
irq event stamp: 158985
CPU: 6 PID: 891 Comm: md0_reshape Not tainted 6.9.0-03335-g7592a0b0049a #94
RIP: 0010:reshape_request+0x3f1/0xe60
Call Trace:
 <TASK>
 raid5_sync_request+0x43d/0x550
 md_do_sync+0xb7a/0x2110
 md_thread+0x294/0x2b0
 kthread+0x147/0x1c0
 ret_from_fork+0x59/0x70
 ret_from_fork_asm+0x1a/0x30
 </TASK>

Root cause is that --revert-reshape update the raid_disks from 5 to 4,
while reshape position is still set, and after reassembling the array,
reshape position will be read from super block, then during reshape the
checking of 'writepos' that is caculated by old reshape position will
fail.

Fix this panic the easy way first, by converting the BUG_ON() to
WARN_ON(), and stop the reshape if checkings fail.

Noted that mdadm must fix --revert-shape as well, and probably md/raid
should enhance metadata validation as well, however this means
reassemble will fail and there must be user tools to fix the wrong
metadata.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20240611132251.1967786-13-yukuai1@huaweicloud.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/md/raid5.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index a84389311dd1e..f9d35c8515e2a 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6254,7 +6254,9 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 	safepos = conf->reshape_safe;
 	sector_div(safepos, data_disks);
 	if (mddev->reshape_backwards) {
-		BUG_ON(writepos < reshape_sectors);
+		if (WARN_ON(writepos < reshape_sectors))
+			return MaxSector;
+
 		writepos -= reshape_sectors;
 		readpos += reshape_sectors;
 		safepos += reshape_sectors;
@@ -6272,14 +6274,18 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk
 	 * to set 'stripe_addr' which is where we will write to.
 	 */
 	if (mddev->reshape_backwards) {
-		BUG_ON(conf->reshape_progress == 0);
+		if (WARN_ON(conf->reshape_progress == 0))
+			return MaxSector;
+
 		stripe_addr = writepos;
-		BUG_ON((mddev->dev_sectors &
-			~((sector_t)reshape_sectors - 1))
-		       - reshape_sectors - stripe_addr
-		       != sector_nr);
+		if (WARN_ON((mddev->dev_sectors &
+		    ~((sector_t)reshape_sectors - 1)) -
+		    reshape_sectors - stripe_addr != sector_nr))
+			return MaxSector;
 	} else {
-		BUG_ON(writepos != sector_nr + reshape_sectors);
+		if (WARN_ON(writepos != sector_nr + reshape_sectors))
+			return MaxSector;
+
 		stripe_addr = sector_nr;
 	}

-- 
2.43.0

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 11/16] debugobjects: Annotate racy debug variables
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (8 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 10/16] md/raid5: avoid BUG_ON() while continue reshape after reassembling Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 12/16] nvme: apple: fix device reference counting Sasha Levin
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable; +Cc: Breno Leitao, Thomas Gleixner, Sasha Levin

From: Breno Leitao <leitao@debian.org>

[ Upstream commit 5b5baba6222255d29626f63c41f101379ec5400b ]

KCSAN has identified a potential data race in debugobjects, where the
global variable debug_objects_maxchain is accessed for both reading and
writing simultaneously in separate and parallel data paths. This results in
the following splat printed by KCSAN:

  BUG: KCSAN: data-race in debug_check_no_obj_freed / debug_object_activate

  write to 0xffffffff847ccfc8 of 4 bytes by task 734 on cpu 41:
  debug_object_activate (lib/debugobjects.c:199 lib/debugobjects.c:564 lib/debugobjects.c:710)
  call_rcu (kernel/rcu/rcu.h:227 kernel/rcu/tree.c:2719 kernel/rcu/tree.c:2838)
  security_inode_free (security/security.c:1626)
  __destroy_inode (./include/linux/fsnotify.h:222 fs/inode.c:287)
  ...
  read to 0xffffffff847ccfc8 of 4 bytes by task 384 on cpu 31:
  debug_check_no_obj_freed (lib/debugobjects.c:1000 lib/debugobjects.c:1019)
  kfree (mm/slub.c:2081 mm/slub.c:4280 mm/slub.c:4390)
  percpu_ref_exit (lib/percpu-refcount.c:147)
  css_free_rwork_fn (kernel/cgroup/cgroup.c:5357)
  ...
  value changed: 0x00000070 -> 0x00000071

The data race is actually harmless as this is just used for debugfs
statistics, as all other debug variables.

Annotate all debug variables as racy explicitly, since these variables
are known to be racy and harmless.

Signed-off-by: Breno Leitao <leitao@debian.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240611091813.1189860-1-leitao@debian.org
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 lib/debugobjects.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index fb12a9bacd2fa..7cea91e193a8f 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -78,16 +78,17 @@ static bool			obj_freeing;
 /* The number of objs on the global free list */
 static int			obj_nr_tofree;
 
-static int			debug_objects_maxchain __read_mostly;
-static int __maybe_unused	debug_objects_maxchecked __read_mostly;
-static int			debug_objects_fixups __read_mostly;
-static int			debug_objects_warnings __read_mostly;
-static int			debug_objects_enabled __read_mostly
-				= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int			debug_objects_pool_size __read_mostly
-				= ODEBUG_POOL_SIZE;
-static int			debug_objects_pool_min_level __read_mostly
-				= ODEBUG_POOL_MIN_LEVEL;
+static int __data_racy			debug_objects_maxchain __read_mostly;
+static int __data_racy __maybe_unused	debug_objects_maxchecked __read_mostly;
+static int __data_racy			debug_objects_fixups __read_mostly;
+static int __data_racy			debug_objects_warnings __read_mostly;
+static int __data_racy			debug_objects_enabled __read_mostly
+					= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
+static int __data_racy			debug_objects_pool_size __read_mostly
+					= ODEBUG_POOL_SIZE;
+static int __data_racy			debug_objects_pool_min_level __read_mostly
+					= ODEBUG_POOL_MIN_LEVEL;
+
 static const struct debug_obj_descr *descr_test  __read_mostly;
 static struct kmem_cache	*obj_cache __ro_after_init;
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 12/16] nvme: apple: fix device reference counting
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (9 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 11/16] debugobjects: Annotate racy debug variables Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 13/16] block: change rq_integrity_vec to respect the iterator Sasha Levin
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Keith Busch, Christoph Hellwig, Chaitanya Kulkarni, Sasha Levin,
	marcan, sven, sagi, asahi, linux-arm-kernel, linux-nvme

From: Keith Busch <kbusch@kernel.org>

[ Upstream commit b9ecbfa45516182cd062fecd286db7907ba84210 ]

Drivers must call nvme_uninit_ctrl after a successful nvme_init_ctrl.
Split the allocation side out to make the error handling boundary easier
to navigate. The apple driver had been doing this wrong, leaking the
controller device memory on a tagset failure.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/nvme/host/apple.c | 27 ++++++++++++++++++++++-----
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index 0cfa39361d3b6..25ecc1a005c5a 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1388,7 +1388,7 @@ static void devm_apple_nvme_mempool_destroy(void *data)
 	mempool_destroy(data);
 }
 
-static int apple_nvme_probe(struct platform_device *pdev)
+static struct apple_nvme *apple_nvme_alloc(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct apple_nvme *anv;
@@ -1396,7 +1396,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
 
 	anv = devm_kzalloc(dev, sizeof(*anv), GFP_KERNEL);
 	if (!anv)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	anv->dev = get_device(dev);
 	anv->adminq.is_adminq = true;
@@ -1516,10 +1516,26 @@ static int apple_nvme_probe(struct platform_device *pdev)
 		goto put_dev;
 	}
 
+	return anv;
+put_dev:
+	put_device(anv->dev);
+	return ERR_PTR(ret);
+}
+
+static int apple_nvme_probe(struct platform_device *pdev)
+{
+	struct apple_nvme *anv;
+	int ret;
+
+	anv = apple_nvme_alloc(pdev);
+	if (IS_ERR(anv))
+		return PTR_ERR(anv);
+
 	anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
 	if (IS_ERR(anv->ctrl.admin_q)) {
 		ret = -ENOMEM;
-		goto put_dev;
+		anv->ctrl.admin_q = NULL;
+		goto out_uninit_ctrl;
 	}
 
 	nvme_reset_ctrl(&anv->ctrl);
@@ -1527,8 +1543,9 @@ static int apple_nvme_probe(struct platform_device *pdev)
 
 	return 0;
 
-put_dev:
-	put_device(anv->dev);
+out_uninit_ctrl:
+	nvme_uninit_ctrl(&anv->ctrl);
+	nvme_put_ctrl(&anv->ctrl);
 	return ret;
 }
 
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 13/16] block: change rq_integrity_vec to respect the iterator
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (10 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 12/16] nvme: apple: fix device reference counting Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-08-12 13:51   ` Matthieu Baerts
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 14/16] irqchip/gic-v3: Don't return errors from gic_acpi_match_gicc() Sasha Levin
                   ` (2 subsequent siblings)
  14 siblings, 1 reply; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Mikulas Patocka, Anuj Gupta, Kanchan Joshi, Christoph Hellwig,
	Jens Axboe, Sasha Levin, kbusch, sagi, linux-nvme, linux-block

From: Mikulas Patocka <mpatocka@redhat.com>

[ Upstream commit cf546dd289e0f6d2594c25e2fb4e19ee67c6d988 ]

If we allocate a bio that is larger than NVMe maximum request size,
attach integrity metadata to it and send it to the NVMe subsystem, the
integrity metadata will be corrupted.

Splitting the bio works correctly. The function bio_split will clone the
bio, trim the iterator of the first bio and advance the iterator of the
second bio.

However, the function rq_integrity_vec has a bug - it returns the first
vector of the bio's metadata and completely disregards the metadata
iterator that was advanced when the bio was split. Thus, the second bio
uses the same metadata as the first bio and this leads to metadata
corruption.

This commit changes rq_integrity_vec, so that it calls mp_bvec_iter_bvec
instead of returning the first vector. mp_bvec_iter_bvec reads the
iterator and uses it to build a bvec for the current position in the
iterator.

The "queue_max_integrity_segments(rq->q) > 1" check was removed, because
the updated rq_integrity_vec function works correctly with multiple
segments.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/49d1afaa-f934-6ed2-a678-e0d428c63a65@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/nvme/host/pci.c       |  6 +++---
 include/linux/blk-integrity.h | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 102a9fb0c65ff..5d8035218de9b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -826,9 +826,9 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct bio_vec bv = rq_integrity_vec(req);
 
-	iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
-			rq_dma_dir(req), 0);
+	iod->meta_dma = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0);
 	if (dma_mapping_error(dev->dev, iod->meta_dma))
 		return BLK_STS_IOERR;
 	cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
@@ -967,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
 	        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 
 		dma_unmap_page(dev->dev, iod->meta_dma,
-			       rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+			       rq_integrity_vec(req).bv_len, rq_dma_dir(req));
 	}
 
 	if (blk_rq_nr_phys_segments(req))
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index 7428cb43952da..d16dd24719841 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -100,14 +100,13 @@ static inline bool blk_integrity_rq(struct request *rq)
 }
 
 /*
- * Return the first bvec that contains integrity data.  Only drivers that are
- * limited to a single integrity segment should use this helper.
+ * Return the current bvec that contains the integrity data. bip_iter may be
+ * advanced to iterate over the integrity data.
  */
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
 {
-	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
-		return NULL;
-	return rq->bio->bi_integrity->bip_vec;
+	return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
+				 rq->bio->bi_integrity->bip_iter);
 }
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 static inline int blk_rq_count_integrity_sg(struct request_queue *q,
@@ -169,7 +168,8 @@ static inline int blk_integrity_rq(struct request *rq)
 
 static inline struct bio_vec *rq_integrity_vec(struct request *rq)
 {
-	return NULL;
+	/* the optimizer will remove all calls to this function */
+	return (struct bio_vec){ };
 }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 #endif /* _LINUX_BLK_INTEGRITY_H */
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH AUTOSEL 6.10 13/16] block: change rq_integrity_vec to respect the iterator
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 13/16] block: change rq_integrity_vec to respect the iterator Sasha Levin
@ 2024-08-12 13:51   ` Matthieu Baerts
  2024-08-12 14:03     ` Greg KH
  0 siblings, 1 reply; 22+ messages in thread
From: Matthieu Baerts @ 2024-08-12 13:51 UTC (permalink / raw)
  To: Sasha Levin, stable, Greg KH
  Cc: Mikulas Patocka, Anuj Gupta, Kanchan Joshi, Christoph Hellwig,
	Jens Axboe, kbusch, sagi, linux-nvme, linux-block, linux-kernel

Hi Sasha, Greg,

On 28/07/2024 02:47, Sasha Levin wrote:
> From: Mikulas Patocka <mpatocka@redhat.com>
> 
> [ Upstream commit cf546dd289e0f6d2594c25e2fb4e19ee67c6d988 ]
> 
> If we allocate a bio that is larger than NVMe maximum request size,
> attach integrity metadata to it and send it to the NVMe subsystem, the
> integrity metadata will be corrupted.

(...)

> diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
> index 7428cb43952da..d16dd24719841 100644
> --- a/include/linux/blk-integrity.h
> +++ b/include/linux/blk-integrity.h
> @@ -100,14 +100,13 @@ static inline bool blk_integrity_rq(struct request *rq)
>  }
>  
>  /*
> - * Return the first bvec that contains integrity data.  Only drivers that are
> - * limited to a single integrity segment should use this helper.
> + * Return the current bvec that contains the integrity data. bip_iter may be
> + * advanced to iterate over the integrity data.
>   */
> -static inline struct bio_vec *rq_integrity_vec(struct request *rq)
> +static inline struct bio_vec rq_integrity_vec(struct request *rq)
>  {
> -	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
> -		return NULL;
> -	return rq->bio->bi_integrity->bip_vec;
> +	return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
> +				 rq->bio->bi_integrity->bip_iter);
>  }
>  #else /* CONFIG_BLK_DEV_INTEGRITY */
>  static inline int blk_rq_count_integrity_sg(struct request_queue *q,
> @@ -169,7 +168,8 @@ static inline int blk_integrity_rq(struct request *rq)
>  
>  static inline struct bio_vec *rq_integrity_vec(struct request *rq)
>  {
> -	return NULL;
> +	/* the optimizer will remove all calls to this function */
> +	return (struct bio_vec){ };

If CONFIG_BLK_DEV_INTEGRITY is not defined, there is a compilation error
here in v6.10 with the recently queued patches because the signature has
not been updated:

> In file included from block/bdev.c:15:                                                                                                                                             
> include/linux/blk-integrity.h: In function 'rq_integrity_vec':
> include/linux/blk-integrity.h:172:16: error: incompatible types when returning type 'struct bio_vec' but 'struct bio_vec *' was expected
>   172 |         return (struct bio_vec){ };                 
>       |                ^

Could it be possible to backport the following fix to v6.10 as well please?

  69b6517687a4 ("block: use the right type for stub rq_integrity_vec()")

It is also needed for v6.6 and v6.1.

Cheers,
Matt
-- 
Sponsored by the NGI0 Core fund.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH AUTOSEL 6.10 13/16] block: change rq_integrity_vec to respect the iterator
  2024-08-12 13:51   ` Matthieu Baerts
@ 2024-08-12 14:03     ` Greg KH
  0 siblings, 0 replies; 22+ messages in thread
From: Greg KH @ 2024-08-12 14:03 UTC (permalink / raw)
  To: Matthieu Baerts
  Cc: Sasha Levin, stable, Mikulas Patocka, Anuj Gupta, Kanchan Joshi,
	Christoph Hellwig, Jens Axboe, kbusch, sagi, linux-nvme,
	linux-block, linux-kernel

On Mon, Aug 12, 2024 at 03:51:12PM +0200, Matthieu Baerts wrote:
> Hi Sasha, Greg,
> 
> On 28/07/2024 02:47, Sasha Levin wrote:
> > From: Mikulas Patocka <mpatocka@redhat.com>
> > 
> > [ Upstream commit cf546dd289e0f6d2594c25e2fb4e19ee67c6d988 ]
> > 
> > If we allocate a bio that is larger than NVMe maximum request size,
> > attach integrity metadata to it and send it to the NVMe subsystem, the
> > integrity metadata will be corrupted.
> 
> (...)
> 
> > diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
> > index 7428cb43952da..d16dd24719841 100644
> > --- a/include/linux/blk-integrity.h
> > +++ b/include/linux/blk-integrity.h
> > @@ -100,14 +100,13 @@ static inline bool blk_integrity_rq(struct request *rq)
> >  }
> >  
> >  /*
> > - * Return the first bvec that contains integrity data.  Only drivers that are
> > - * limited to a single integrity segment should use this helper.
> > + * Return the current bvec that contains the integrity data. bip_iter may be
> > + * advanced to iterate over the integrity data.
> >   */
> > -static inline struct bio_vec *rq_integrity_vec(struct request *rq)
> > +static inline struct bio_vec rq_integrity_vec(struct request *rq)
> >  {
> > -	if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
> > -		return NULL;
> > -	return rq->bio->bi_integrity->bip_vec;
> > +	return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
> > +				 rq->bio->bi_integrity->bip_iter);
> >  }
> >  #else /* CONFIG_BLK_DEV_INTEGRITY */
> >  static inline int blk_rq_count_integrity_sg(struct request_queue *q,
> > @@ -169,7 +168,8 @@ static inline int blk_integrity_rq(struct request *rq)
> >  
> >  static inline struct bio_vec *rq_integrity_vec(struct request *rq)
> >  {
> > -	return NULL;
> > +	/* the optimizer will remove all calls to this function */
> > +	return (struct bio_vec){ };
> 
> If CONFIG_BLK_DEV_INTEGRITY is not defined, there is a compilation error
> here in v6.10 with the recently queued patches because the signature has
> not been updated:
> 
> > In file included from block/bdev.c:15:                                                                                                                                             
> > include/linux/blk-integrity.h: In function 'rq_integrity_vec':
> > include/linux/blk-integrity.h:172:16: error: incompatible types when returning type 'struct bio_vec' but 'struct bio_vec *' was expected
> >   172 |         return (struct bio_vec){ };                 
> >       |                ^
> 
> Could it be possible to backport the following fix to v6.10 as well please?
> 
>   69b6517687a4 ("block: use the right type for stub rq_integrity_vec()")
> 
> It is also needed for v6.6 and v6.1.

Now queued up, thanks!

greg k-h

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 14/16] irqchip/gic-v3: Don't return errors from gic_acpi_match_gicc()
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (11 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 13/16] block: change rq_integrity_vec to respect the iterator Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  9:22   ` Marc Zyngier
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 15/16] rcu: Fix rcu_barrier() VS post CPUHP_TEARDOWN_CPU invocation Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 16/16] clocksource/drivers/sh_cmt: Address race condition for clock events Sasha Levin
  14 siblings, 1 reply; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: James Morse, Gavin Shan, Miguel Luis, Russell King,
	Jonathan Cameron, Marc Zyngier, Catalin Marinas, Sasha Levin,
	tglx, linux-arm-kernel

From: James Morse <james.morse@arm.com>

[ Upstream commit fa2dabe57220e6af78ed7a2f7016bf250a618204 ]

gic_acpi_match_gicc() is only called via gic_acpi_count_gicr_regions().
It should only count the number of enabled redistributors, but it
also tries to sanity check the GICC entry, currently returning an
error if the Enabled bit is set, but the gicr_base_address is zero.

Adding support for the online-capable bit to the sanity check will
complicate it, for no benefit. The existing check implicitly depends on
gic_acpi_count_gicr_regions() previous failing to find any GICR regions
(as it is valid to have gicr_base_address of zero if the redistributors
are described via a GICR entry).

Instead of complicating the check, remove it. Failures that happen at
this point cause the irqchip not to register, meaning no irqs can be
requested. The kernel grinds to a panic() pretty quickly.

Without the check, MADT tables that exhibit this problem are still
caught by gic_populate_rdist(), which helpfully also prints what went
wrong:
| CPU4: mpidr 100 has no re-distributor!

Signed-off-by: James Morse <james.morse@arm.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Tested-by: Miguel Luis <miguel.luis@oracle.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20240529133446.28446-14-Jonathan.Cameron@huawei.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/irqchip/irq-gic-v3.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 6fb276504bcc8..10af15f93d4d4 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -2415,19 +2415,10 @@ static int __init gic_acpi_match_gicc(union acpi_subtable_headers *header,
 	 * If GICC is enabled and has valid gicr base address, then it means
 	 * GICR base is presented via GICC
 	 */
-	if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address) {
+	if (acpi_gicc_is_usable(gicc) && gicc->gicr_base_address)
 		acpi_data.enabled_rdists++;
-		return 0;
-	}
 
-	/*
-	 * It's perfectly valid firmware can pass disabled GICC entry, driver
-	 * should not treat as errors, skip the entry instead of probe fail.
-	 */
-	if (!acpi_gicc_is_usable(gicc))
-		return 0;
-
-	return -ENODEV;
+	return 0;
 }
 
 static int __init gic_acpi_count_gicr_regions(void)
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH AUTOSEL 6.10 14/16] irqchip/gic-v3: Don't return errors from gic_acpi_match_gicc()
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 14/16] irqchip/gic-v3: Don't return errors from gic_acpi_match_gicc() Sasha Levin
@ 2024-07-28  9:22   ` Marc Zyngier
  2024-08-10  9:11     ` Sasha Levin
  0 siblings, 1 reply; 22+ messages in thread
From: Marc Zyngier @ 2024-07-28  9:22 UTC (permalink / raw)
  To: Sasha Levin
  Cc: linux-kernel, stable, James Morse, Gavin Shan, Miguel Luis,
	Russell King, Jonathan Cameron, Catalin Marinas, tglx,
	linux-arm-kernel

On Sun, 28 Jul 2024 01:47:31 +0100,
Sasha Levin <sashal@kernel.org> wrote:
> 
> From: James Morse <james.morse@arm.com>
> 
> [ Upstream commit fa2dabe57220e6af78ed7a2f7016bf250a618204 ]
> 
> gic_acpi_match_gicc() is only called via gic_acpi_count_gicr_regions().
> It should only count the number of enabled redistributors, but it
> also tries to sanity check the GICC entry, currently returning an
> error if the Enabled bit is set, but the gicr_base_address is zero.
> 
> Adding support for the online-capable bit to the sanity check will
> complicate it, for no benefit. The existing check implicitly depends on
> gic_acpi_count_gicr_regions() previous failing to find any GICR regions
> (as it is valid to have gicr_base_address of zero if the redistributors
> are described via a GICR entry).
> 
> Instead of complicating the check, remove it. Failures that happen at
> this point cause the irqchip not to register, meaning no irqs can be
> requested. The kernel grinds to a panic() pretty quickly.
> 
> Without the check, MADT tables that exhibit this problem are still
> caught by gic_populate_rdist(), which helpfully also prints what went
> wrong:
> | CPU4: mpidr 100 has no re-distributor!
> 
> Signed-off-by: James Morse <james.morse@arm.com>
> Reviewed-by: Gavin Shan <gshan@redhat.com>
> Tested-by: Miguel Luis <miguel.luis@oracle.com>
> Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Reviewed-by: Marc Zyngier <maz@kernel.org>
> Link: https://lore.kernel.org/r/20240529133446.28446-14-Jonathan.Cameron@huawei.com
> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
> Signed-off-by: Sasha Levin <sashal@kernel.org>

Please drop this. It has no purpose being backported to stable.

Thanks,

	M.

-- 
Without deviation from the norm, progress is not possible.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH AUTOSEL 6.10 14/16] irqchip/gic-v3: Don't return errors from gic_acpi_match_gicc()
  2024-07-28  9:22   ` Marc Zyngier
@ 2024-08-10  9:11     ` Sasha Levin
  0 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-08-10  9:11 UTC (permalink / raw)
  To: Marc Zyngier
  Cc: linux-kernel, stable, James Morse, Gavin Shan, Miguel Luis,
	Russell King, Jonathan Cameron, Catalin Marinas, tglx,
	linux-arm-kernel

On Sun, Jul 28, 2024 at 10:22:53AM +0100, Marc Zyngier wrote:
>On Sun, 28 Jul 2024 01:47:31 +0100,
>Sasha Levin <sashal@kernel.org> wrote:
>>
>> From: James Morse <james.morse@arm.com>
>>
>> [ Upstream commit fa2dabe57220e6af78ed7a2f7016bf250a618204 ]
>>
>> gic_acpi_match_gicc() is only called via gic_acpi_count_gicr_regions().
>> It should only count the number of enabled redistributors, but it
>> also tries to sanity check the GICC entry, currently returning an
>> error if the Enabled bit is set, but the gicr_base_address is zero.
>>
>> Adding support for the online-capable bit to the sanity check will
>> complicate it, for no benefit. The existing check implicitly depends on
>> gic_acpi_count_gicr_regions() previous failing to find any GICR regions
>> (as it is valid to have gicr_base_address of zero if the redistributors
>> are described via a GICR entry).
>>
>> Instead of complicating the check, remove it. Failures that happen at
>> this point cause the irqchip not to register, meaning no irqs can be
>> requested. The kernel grinds to a panic() pretty quickly.
>>
>> Without the check, MADT tables that exhibit this problem are still
>> caught by gic_populate_rdist(), which helpfully also prints what went
>> wrong:
>> | CPU4: mpidr 100 has no re-distributor!
>>
>> Signed-off-by: James Morse <james.morse@arm.com>
>> Reviewed-by: Gavin Shan <gshan@redhat.com>
>> Tested-by: Miguel Luis <miguel.luis@oracle.com>
>> Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
>> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
>> Reviewed-by: Marc Zyngier <maz@kernel.org>
>> Link: https://lore.kernel.org/r/20240529133446.28446-14-Jonathan.Cameron@huawei.com
>> Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
>> Signed-off-by: Sasha Levin <sashal@kernel.org>
>
>Please drop this. It has no purpose being backported to stable.

Ack, thanks!

-- 
Thanks,
Sasha

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 15/16] rcu: Fix rcu_barrier() VS post CPUHP_TEARDOWN_CPU invocation
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (12 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 14/16] irqchip/gic-v3: Don't return errors from gic_acpi_match_gicc() Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 16/16] clocksource/drivers/sh_cmt: Address race condition for clock events Sasha Levin
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Frederic Weisbecker, Paul E . McKenney, Sasha Levin,
	neeraj.upadhyay, joel, josh, boqun.feng, urezki, rcu

From: Frederic Weisbecker <frederic@kernel.org>

[ Upstream commit 55d4669ef1b76823083caecfab12a8bd2ccdcf64 ]

When rcu_barrier() calls rcu_rdp_cpu_online() and observes a CPU off
rnp->qsmaskinitnext, it means that all accesses from the offline CPU
preceding the CPUHP_TEARDOWN_CPU are visible to RCU barrier, including
callbacks expiration and counter updates.

However interrupts can still fire after stop_machine() re-enables
interrupts and before rcutree_report_cpu_dead(). The related accesses
happening between CPUHP_TEARDOWN_CPU and rnp->qsmaskinitnext clearing
are _NOT_ guaranteed to be seen by rcu_barrier() without proper
ordering, especially when callbacks are invoked there to the end, making
rcutree_migrate_callback() bypass barrier_lock.

The following theoretical race example can make rcu_barrier() hang:

CPU 0                                               CPU 1
-----                                               -----
//cpu_down()
smpboot_park_threads()
//ksoftirqd is parked now
<IRQ>
rcu_sched_clock_irq()
   invoke_rcu_core()
do_softirq()
   rcu_core()
      rcu_do_batch()
         // callback storm
         // rcu_do_batch() returns
         // before completing all
         // of them
   // do_softirq also returns early because of
   // timeout. It defers to ksoftirqd but
   // it's parked
</IRQ>
stop_machine()
   take_cpu_down()
                                                    rcu_barrier()
                                                        spin_lock(barrier_lock)
                                                        // observes rcu_segcblist_n_cbs(&rdp->cblist) != 0
<IRQ>
do_softirq()
   rcu_core()
      rcu_do_batch()
         //completes all pending callbacks
         //smp_mb() implied _after_ callback number dec
</IRQ>

rcutree_report_cpu_dead()
   rnp->qsmaskinitnext &= ~rdp->grpmask;

rcutree_migrate_callback()
   // no callback, early return without locking
   // barrier_lock
                                                        //observes !rcu_rdp_cpu_online(rdp)
                                                        rcu_barrier_entrain()
                                                           rcu_segcblist_entrain()
                                                              // Observe rcu_segcblist_n_cbs(rsclp) == 0
                                                              // because no barrier between reading
                                                              // rnp->qsmaskinitnext and rsclp->len
                                                              rcu_segcblist_add_len()
                                                                 smp_mb__before_atomic()
                                                                 // will now observe the 0 count and empty
                                                                 // list, but too late, we enqueue regardless
                                                                 WRITE_ONCE(rsclp->len, rsclp->len + v);
                                                        // ignored barrier callback
                                                        // rcu barrier stall...

This could be solved with a read memory barrier, enforcing the message
passing between rnp->qsmaskinitnext and rsclp->len, matching the full
memory barrier after rsclp->len addition in rcu_segcblist_add_len()
performed at the end of rcu_do_batch().

However the rcu_barrier() is complicated enough and probably doesn't
need too many more subtleties. CPU down is a slowpath and the
barrier_lock seldom contended. Solve the issue with unconditionally
locking the barrier_lock on rcutree_migrate_callbacks(). This makes sure
that either rcu_barrier() sees the empty queue or its entrained
callback will be migrated.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 kernel/rcu/tree.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 28c7031711a3f..63fb007beeaf5 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -5110,11 +5110,15 @@ void rcutree_migrate_callbacks(int cpu)
 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
 	bool needwake;
 
-	if (rcu_rdp_is_offloaded(rdp) ||
-	    rcu_segcblist_empty(&rdp->cblist))
-		return;  /* No callbacks to migrate. */
+	if (rcu_rdp_is_offloaded(rdp))
+		return;
 
 	raw_spin_lock_irqsave(&rcu_state.barrier_lock, flags);
+	if (rcu_segcblist_empty(&rdp->cblist)) {
+		raw_spin_unlock_irqrestore(&rcu_state.barrier_lock, flags);
+		return;  /* No callbacks to migrate. */
+	}
+
 	WARN_ON_ONCE(rcu_rdp_cpu_online(rdp));
 	rcu_barrier_entrain(rdp);
 	my_rdp = this_cpu_ptr(&rcu_data);
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH AUTOSEL 6.10 16/16] clocksource/drivers/sh_cmt: Address race condition for clock events
  2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
                   ` (13 preceding siblings ...)
  2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 15/16] rcu: Fix rcu_barrier() VS post CPUHP_TEARDOWN_CPU invocation Sasha Levin
@ 2024-07-28  0:47 ` Sasha Levin
  14 siblings, 0 replies; 22+ messages in thread
From: Sasha Levin @ 2024-07-28  0:47 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Niklas Söderlund, Daniel Lezcano, Sasha Levin, tglx

From: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>

[ Upstream commit db19d3aa77612983a02bd223b3f273f896b243cf ]

There is a race condition in the CMT interrupt handler. In the interrupt
handler the driver sets a driver private flag, FLAG_IRQCONTEXT. This
flag is used to indicate any call to set_next_event() should not be
directly propagated to the device, but instead cached. This is done as
the interrupt handler itself reprograms the device when needed before it
completes and this avoids this operation to take place twice.

It is unclear why this design was chosen, my suspicion is to allow the
struct clock_event_device.event_handler callback, which is called while
the FLAG_IRQCONTEXT is set, can update the next event without having to
write to the device twice.

Unfortunately there is a race between when the FLAG_IRQCONTEXT flag is
set and later cleared where the interrupt handler have already started to
write the next event to the device. If set_next_event() is called in
this window the value is only cached in the driver but not written. This
leads to the board to misbehave, or worse lockup and produce a splat.

   rcu: INFO: rcu_preempt detected stalls on CPUs/tasks:
   rcu:     0-...!: (0 ticks this GP) idle=f5e0/0/0x0 softirq=519/519 fqs=0 (false positive?)
   rcu:     (detected by 1, t=6502 jiffies, g=-595, q=77 ncpus=2)
   Sending NMI from CPU 1 to CPUs 0:
   NMI backtrace for cpu 0
   CPU: 0 PID: 0 Comm: swapper/0 Not tainted 6.10.0-rc5-arm64-renesas-00019-g74a6f86eaf1c-dirty #20
   Hardware name: Renesas Salvator-X 2nd version board based on r8a77965 (DT)
   pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
   pc : tick_check_broadcast_expired+0xc/0x40
   lr : cpu_idle_poll.isra.0+0x8c/0x168
   sp : ffff800081c63d70
   x29: ffff800081c63d70 x28: 00000000580000c8 x27: 00000000bfee5610
   x26: 0000000000000027 x25: 0000000000000000 x24: 0000000000000000
   x23: ffff00007fbb9100 x22: ffff8000818f1008 x21: ffff8000800ef07c
   x20: ffff800081c79ec0 x19: ffff800081c70c28 x18: 0000000000000000
   x17: 0000000000000000 x16: 0000000000000000 x15: 0000ffffc2c717d8
   x14: 0000000000000000 x13: ffff000009c18080 x12: ffff8000825f7fc0
   x11: 0000000000000000 x10: ffff8000818f3cd4 x9 : 0000000000000028
   x8 : ffff800081c79ec0 x7 : ffff800081c73000 x6 : 0000000000000000
   x5 : 0000000000000000 x4 : ffff7ffffe286000 x3 : 0000000000000000
   x2 : ffff7ffffe286000 x1 : ffff800082972900 x0 : ffff8000818f1008
   Call trace:
    tick_check_broadcast_expired+0xc/0x40
    do_idle+0x9c/0x280
    cpu_startup_entry+0x34/0x40
    kernel_init+0x0/0x11c
    do_one_initcall+0x0/0x260
    __primary_switched+0x80/0x88
   rcu: rcu_preempt kthread timer wakeup didn't happen for 6501 jiffies! g-595 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402
   rcu:     Possible timer handling issue on cpu=0 timer-softirq=262
   rcu: rcu_preempt kthread starved for 6502 jiffies! g-595 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0
   rcu:     Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior.
   rcu: RCU grace-period kthread stack dump:
   task:rcu_preempt     state:I stack:0     pid:15    tgid:15    ppid:2      flags:0x00000008
   Call trace:
    __switch_to+0xbc/0x100
    __schedule+0x358/0xbe0
    schedule+0x48/0x148
    schedule_timeout+0xc4/0x138
    rcu_gp_fqs_loop+0x12c/0x764
    rcu_gp_kthread+0x208/0x298
    kthread+0x10c/0x110
    ret_from_fork+0x10/0x20

The design have been part of the driver since it was first merged in
early 2009. It becomes increasingly harder to trigger the issue the
older kernel version one tries. It only takes a few boots on v6.10-rc5,
while hundreds of boots are needed to trigger it on v5.10.

Close the race condition by using the CMT channel lock for the two
competing sections. The channel lock was added to the driver after its
initial design.

Signed-off-by: Niklas Söderlund <niklas.soderlund+renesas@ragnatech.se>
Link: https://lore.kernel.org/r/20240702190230.3825292-1-niklas.soderlund+renesas@ragnatech.se
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 drivers/clocksource/sh_cmt.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/clocksource/sh_cmt.c b/drivers/clocksource/sh_cmt.c
index 26919556ef5f0..b72b36e0abed8 100644
--- a/drivers/clocksource/sh_cmt.c
+++ b/drivers/clocksource/sh_cmt.c
@@ -528,6 +528,7 @@ static void sh_cmt_set_next(struct sh_cmt_channel *ch, unsigned long delta)
 static irqreturn_t sh_cmt_interrupt(int irq, void *dev_id)
 {
 	struct sh_cmt_channel *ch = dev_id;
+	unsigned long flags;
 
 	/* clear flags */
 	sh_cmt_write_cmcsr(ch, sh_cmt_read_cmcsr(ch) &
@@ -558,6 +559,8 @@ static irqreturn_t sh_cmt_interrupt(int irq, void *dev_id)
 
 	ch->flags &= ~FLAG_SKIPEVENT;
 
+	raw_spin_lock_irqsave(&ch->lock, flags);
+
 	if (ch->flags & FLAG_REPROGRAM) {
 		ch->flags &= ~FLAG_REPROGRAM;
 		sh_cmt_clock_event_program_verify(ch, 1);
@@ -570,6 +573,8 @@ static irqreturn_t sh_cmt_interrupt(int irq, void *dev_id)
 
 	ch->flags &= ~FLAG_IRQCONTEXT;
 
+	raw_spin_unlock_irqrestore(&ch->lock, flags);
+
 	return IRQ_HANDLED;
 }
 
@@ -780,12 +785,18 @@ static int sh_cmt_clock_event_next(unsigned long delta,
 				   struct clock_event_device *ced)
 {
 	struct sh_cmt_channel *ch = ced_to_sh_cmt(ced);
+	unsigned long flags;
 
 	BUG_ON(!clockevent_state_oneshot(ced));
+
+	raw_spin_lock_irqsave(&ch->lock, flags);
+
 	if (likely(ch->flags & FLAG_IRQCONTEXT))
 		ch->next_match_value = delta - 1;
 	else
-		sh_cmt_set_next(ch, delta - 1);
+		__sh_cmt_set_next(ch, delta - 1);
+
+	raw_spin_unlock_irqrestore(&ch->lock, flags);
 
 	return 0;
 }
-- 
2.43.0


^ permalink raw reply related	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2024-08-12 14:03 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-07-28  0:47 [PATCH AUTOSEL 6.10 01/16] regmap: kunit: Fix memory leaks in gen_regmap() and gen_raw_regmap() Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 02/16] fs: remove accidental overflow during wraparound check Sasha Levin
2024-07-29 11:54   ` Jan Kara
2024-08-10  9:10     ` Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 03/16] gpio: prevent potential speculation leaks in gpio_device_get_desc() Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 04/16] hwmon: corsair-psu: add USB id of HX1200i Series 2023 psu Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 05/16] Revert "rcu-tasks: Fix synchronize_rcu_tasks() VS zap_pid_ns_processes()" Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 06/16] platform/chrome: cros_ec_lpc: Add a new quirk for ACPI id Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 07/16] rcutorture: Fix rcu_torture_fwd_cb_cr() data race Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 08/16] md: do not delete safemode_timer in mddev_suspend Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 09/16] md: change the return value type of md_write_start to void Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 10/16] md/raid5: avoid BUG_ON() while continue reshape after reassembling Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 11/16] debugobjects: Annotate racy debug variables Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 12/16] nvme: apple: fix device reference counting Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 13/16] block: change rq_integrity_vec to respect the iterator Sasha Levin
2024-08-12 13:51   ` Matthieu Baerts
2024-08-12 14:03     ` Greg KH
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 14/16] irqchip/gic-v3: Don't return errors from gic_acpi_match_gicc() Sasha Levin
2024-07-28  9:22   ` Marc Zyngier
2024-08-10  9:11     ` Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 15/16] rcu: Fix rcu_barrier() VS post CPUHP_TEARDOWN_CPU invocation Sasha Levin
2024-07-28  0:47 ` [PATCH AUTOSEL 6.10 16/16] clocksource/drivers/sh_cmt: Address race condition for clock events Sasha Levin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox