All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] gpiolib: fix race condition for gdev->srcu
@ 2025-12-23  8:42 Paweł Narewski
  2025-12-23 20:14 ` kernel test robot
                   ` (4 more replies)
  0 siblings, 5 replies; 6+ messages in thread
From: Paweł Narewski @ 2025-12-23  8:42 UTC (permalink / raw)
  To: linux-gpio; +Cc: linusw, brgl, Paweł Narewski, Jakub Lewalski

If two drivers were calling gpiochip_add_data_with_key(), one may be
traversing the srcu-protected list in gpio_name_to_desc(), meanwhile
other has just added its gdev in gpiodev_add_to_list_unlocked().
This creates a non-mutexed and non-protected timeframe, when one
instance is dereferencing and using &gdev->srcu, before the other
has initialized it, resulting in crash:

[    4.935481] Unable to handle kernel paging request at virtual address ffff800272bcc000
[    4.943396] Mem abort info:
[    4.943400]   ESR = 0x0000000096000005
[    4.943403]   EC = 0x25: DABT (current EL), IL = 32 bits
[    4.943407]   SET = 0, FnV = 0
[    4.943410]   EA = 0, S1PTW = 0
[    4.943413]   FSC = 0x05: level 1 translation fault
[    4.943416] Data abort info:
[    4.943418]   ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
[    4.946220]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
[    4.955261]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
[    4.955268] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000038e6c000
[    4.961449] [ffff800272bcc000] pgd=0000000000000000
[    4.969203] , p4d=1000000039739003
[    4.979730] , pud=0000000000000000
[    4.980210] phandle (CPU): 0x0000005e, phandle (BE): 0x5e000000 for node "reset"
[    4.991736] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
...
[    5.121359] pc : __srcu_read_lock+0x44/0x98
[    5.131091] lr : gpio_name_to_desc+0x60/0x1a0
[    5.153671] sp : ffff8000833bb430
[    5.298440]
[    5.298443] Call trace:
[    5.298445]  __srcu_read_lock+0x44/0x98
[    5.309484]  gpio_name_to_desc+0x60/0x1a0
[    5.320692]  gpiochip_add_data_with_key+0x488/0xf00
    5.946419] ---[ end trace 0000000000000000 ]---

Move initialization code for gdev fields before it is added to
gpio_devices, with adjacent initialization code.
Adjust goto statements  to reflect modified order of operations

Fixes: 47d8b4c1d868 ("gpio: add SRCU infrastructure to struct gpio_device")
Reviewed-by: Jakub Lewalski <jakub.lewalski@nokia.com>
Signed-off-by: Paweł Narewski <pawel.narewski@nokia.com>
---
 drivers/gpio/gpiolib.c | 41 +++++++++++++++++++++--------------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 409c2f415251..f25a7584b45e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1105,6 +1105,19 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	gdev->ngpio = gc->ngpio;
 	gdev->can_sleep = gc->can_sleep;
 
+	rwlock_init(&gdev->line_state_lock);
+	BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
+	BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier);
+
+	ret = init_srcu_struct(&gdev->srcu);
+	if (ret)
+		goto err_free_label;
+
+	ret = init_srcu_struct(&gdev->desc_srcu);
+	if (ret)
+		goto err_cleanup_gdev_srcu;
+
+
 	scoped_guard(mutex, &gpio_devices_lock) {
 		/*
 		 * TODO: this allocates a Linux GPIO number base in the global
@@ -1119,7 +1132,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 			if (base < 0) {
 				ret = base;
 				base = 0;
-				goto err_free_label;
+				goto err_cleanup_desc_srcu;
 			}
 
 			/*
@@ -1139,22 +1152,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 		ret = gpiodev_add_to_list_unlocked(gdev);
 		if (ret) {
 			gpiochip_err(gc, "GPIO integer space overlap, cannot add chip\n");
-			goto err_free_label;
+			goto err_cleanup_desc_srcu;
 		}
 	}
 
-	rwlock_init(&gdev->line_state_lock);
-	RAW_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
-	BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier);
-
-	ret = init_srcu_struct(&gdev->srcu);
-	if (ret)
-		goto err_remove_from_list;
-
-	ret = init_srcu_struct(&gdev->desc_srcu);
-	if (ret)
-		goto err_cleanup_gdev_srcu;
-
 #ifdef CONFIG_PINCTRL
 	INIT_LIST_HEAD(&gdev->pin_ranges);
 #endif
@@ -1164,11 +1165,11 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 
 	ret = gpiochip_set_names(gc);
 	if (ret)
-		goto err_cleanup_desc_srcu;
+		goto err_remove_from_list;
 
 	ret = gpiochip_init_valid_mask(gc);
 	if (ret)
-		goto err_cleanup_desc_srcu;
+		goto err_remove_from_list;
 
 	for (desc_index = 0; desc_index < gc->ngpio; desc_index++) {
 		struct gpio_desc *desc = &gdev->descs[desc_index];
@@ -1248,10 +1249,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	of_gpiochip_remove(gc);
 err_free_valid_mask:
 	gpiochip_free_valid_mask(gc);
-err_cleanup_desc_srcu:
-	cleanup_srcu_struct(&gdev->desc_srcu);
-err_cleanup_gdev_srcu:
-	cleanup_srcu_struct(&gdev->srcu);
 err_remove_from_list:
 	scoped_guard(mutex, &gpio_devices_lock)
 		list_del_rcu(&gdev->list);
@@ -1261,6 +1258,10 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 		gpio_device_put(gdev);
 		goto err_print_message;
 	}
+err_cleanup_desc_srcu:
+	cleanup_srcu_struct(&gdev->desc_srcu);
+err_cleanup_gdev_srcu:
+	cleanup_srcu_struct(&gdev->srcu);
 err_free_label:
 	kfree_const(gdev->label);
 err_free_descs:
-- 
2.42.0


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] gpiolib: fix race condition for gdev->srcu
  2025-12-23  8:42 [PATCH] gpiolib: fix race condition for gdev->srcu Paweł Narewski
@ 2025-12-23 20:14 ` kernel test robot
  2025-12-24  3:47 ` kernel test robot
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2025-12-23 20:14 UTC (permalink / raw)
  To: Paweł Narewski, linux-gpio
  Cc: oe-kbuild-all, linusw, brgl, Paweł Narewski, Jakub Lewalski

Hi Paweł,

kernel test robot noticed the following build errors:

[auto build test ERROR on brgl/gpio/for-next]
[also build test ERROR on linus/master v6.19-rc2 next-20251219]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Pawe-Narewski/gpiolib-fix-race-condition-for-gdev-srcu/20251223-165142
base:   https://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git gpio/for-next
patch link:    https://lore.kernel.org/r/20251223084952.1847489-1-pawel.narewski%40nokia.com
patch subject: [PATCH] gpiolib: fix race condition for gdev->srcu
config: openrisc-allnoconfig (https://download.01.org/0day-ci/archive/20251224/202512240338.Efsr69oP-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 15.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251224/202512240338.Efsr69oP-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512240338.Efsr69oP-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from include/linux/mm_types.h:13,
                    from include/linux/mmzone.h:22,
                    from include/linux/gfp.h:7,
                    from include/linux/slab.h:16,
                    from include/linux/resource_ext.h:11,
                    from include/linux/acpi.h:14,
                    from drivers/gpio/gpiolib.c:3:
   drivers/gpio/gpiolib.c: In function 'gpiochip_add_data_with_key':
>> include/linux/notifier.h:86:35: error: 'struct raw_notifier_head' has no member named 'rwsem'
      86 |                 init_rwsem(&(name)->rwsem);     \
         |                                   ^~
   include/linux/rwsem.h:121:23: note: in definition of macro 'init_rwsem'
     121 |         __init_rwsem((sem), #sem, &__key);                      \
         |                       ^~~
   drivers/gpio/gpiolib.c:1109:9: note: in expansion of macro 'BLOCKING_INIT_NOTIFIER_HEAD'
    1109 |         BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +86 include/linux/notifier.h

eabc069401bcf4 Alan Stern 2006-10-04  80  
e041c683412d5b Alan Stern 2006-03-27  81  #define ATOMIC_INIT_NOTIFIER_HEAD(name) do {	\
e041c683412d5b Alan Stern 2006-03-27  82  		spin_lock_init(&(name)->lock);	\
e041c683412d5b Alan Stern 2006-03-27  83  		(name)->head = NULL;		\
e041c683412d5b Alan Stern 2006-03-27  84  	} while (0)
e041c683412d5b Alan Stern 2006-03-27  85  #define BLOCKING_INIT_NOTIFIER_HEAD(name) do {	\
e041c683412d5b Alan Stern 2006-03-27 @86  		init_rwsem(&(name)->rwsem);	\
e041c683412d5b Alan Stern 2006-03-27  87  		(name)->head = NULL;		\
e041c683412d5b Alan Stern 2006-03-27  88  	} while (0)
e041c683412d5b Alan Stern 2006-03-27  89  #define RAW_INIT_NOTIFIER_HEAD(name) do {	\
e041c683412d5b Alan Stern 2006-03-27  90  		(name)->head = NULL;		\
e041c683412d5b Alan Stern 2006-03-27  91  	} while (0)
e041c683412d5b Alan Stern 2006-03-27  92  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] gpiolib: fix race condition for gdev->srcu
  2025-12-23  8:42 [PATCH] gpiolib: fix race condition for gdev->srcu Paweł Narewski
  2025-12-23 20:14 ` kernel test robot
@ 2025-12-24  3:47 ` kernel test robot
  2025-12-24  6:21 ` kernel test robot
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2025-12-24  3:47 UTC (permalink / raw)
  To: Paweł Narewski, linux-gpio
  Cc: llvm, oe-kbuild-all, linusw, brgl, Paweł Narewski,
	Jakub Lewalski

Hi Paweł,

kernel test robot noticed the following build errors:

[auto build test ERROR on brgl/gpio/for-next]
[also build test ERROR on linus/master v6.19-rc2 next-20251219]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Pawe-Narewski/gpiolib-fix-race-condition-for-gdev-srcu/20251223-165142
base:   https://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git gpio/for-next
patch link:    https://lore.kernel.org/r/20251223084952.1847489-1-pawel.narewski%40nokia.com
patch subject: [PATCH] gpiolib: fix race condition for gdev->srcu
config: x86_64-kexec (https://download.01.org/0day-ci/archive/20251224/202512240408.wrQcvO0t-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251224/202512240408.wrQcvO0t-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512240408.wrQcvO0t-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/gpio/gpiolib.c:1109:2: error: no member named 'rwsem' in 'struct raw_notifier_head'
    1109 |         BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/notifier.h:86:23: note: expanded from macro 'BLOCKING_INIT_NOTIFIER_HEAD'
      86 |                 init_rwsem(&(name)->rwsem);     \
         |                 ~~~~~~~~~~~~~~~~~~~~^~~~~~
   include/linux/rwsem.h:121:16: note: expanded from macro 'init_rwsem'
     121 |         __init_rwsem((sem), #sem, &__key);                      \
         |                       ^~~
   1 error generated.


vim +1109 drivers/gpio/gpiolib.c

  1044	
  1045	int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
  1046				       struct lock_class_key *lock_key,
  1047				       struct lock_class_key *request_key)
  1048	{
  1049		struct gpio_device *gdev;
  1050		unsigned int desc_index;
  1051		int base = 0;
  1052		int ret;
  1053	
  1054		/*
  1055		 * First: allocate and populate the internal stat container, and
  1056		 * set up the struct device.
  1057		 */
  1058		gdev = kzalloc(sizeof(*gdev), GFP_KERNEL);
  1059		if (!gdev)
  1060			return -ENOMEM;
  1061	
  1062		gdev->dev.type = &gpio_dev_type;
  1063		gdev->dev.bus = &gpio_bus_type;
  1064		gdev->dev.parent = gc->parent;
  1065		rcu_assign_pointer(gdev->chip, gc);
  1066	
  1067		gc->gpiodev = gdev;
  1068		gpiochip_set_data(gc, data);
  1069	
  1070		device_set_node(&gdev->dev, gpiochip_choose_fwnode(gc));
  1071	
  1072		ret = ida_alloc(&gpio_ida, GFP_KERNEL);
  1073		if (ret < 0)
  1074			goto err_free_gdev;
  1075		gdev->id = ret;
  1076	
  1077		ret = dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id);
  1078		if (ret)
  1079			goto err_free_ida;
  1080	
  1081		if (gc->parent && gc->parent->driver)
  1082			gdev->owner = gc->parent->driver->owner;
  1083		else if (gc->owner)
  1084			/* TODO: remove chip->owner */
  1085			gdev->owner = gc->owner;
  1086		else
  1087			gdev->owner = THIS_MODULE;
  1088	
  1089		ret = gpiochip_get_ngpios(gc, &gdev->dev);
  1090		if (ret)
  1091			goto err_free_dev_name;
  1092	
  1093		gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL);
  1094		if (!gdev->descs) {
  1095			ret = -ENOMEM;
  1096			goto err_free_dev_name;
  1097		}
  1098	
  1099		gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL);
  1100		if (!gdev->label) {
  1101			ret = -ENOMEM;
  1102			goto err_free_descs;
  1103		}
  1104	
  1105		gdev->ngpio = gc->ngpio;
  1106		gdev->can_sleep = gc->can_sleep;
  1107	
  1108		rwlock_init(&gdev->line_state_lock);
> 1109		BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
  1110		BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier);
  1111	
  1112		ret = init_srcu_struct(&gdev->srcu);
  1113		if (ret)
  1114			goto err_free_label;
  1115	
  1116		ret = init_srcu_struct(&gdev->desc_srcu);
  1117		if (ret)
  1118			goto err_cleanup_gdev_srcu;
  1119	
  1120	
  1121		scoped_guard(mutex, &gpio_devices_lock) {
  1122			/*
  1123			 * TODO: this allocates a Linux GPIO number base in the global
  1124			 * GPIO numberspace for this chip. In the long run we want to
  1125			 * get *rid* of this numberspace and use only descriptors, but
  1126			 * it may be a pipe dream. It will not happen before we get rid
  1127			 * of the sysfs interface anyways.
  1128			 */
  1129			base = gc->base;
  1130			if (base < 0) {
  1131				base = gpiochip_find_base_unlocked(gc->ngpio);
  1132				if (base < 0) {
  1133					ret = base;
  1134					base = 0;
  1135					goto err_cleanup_desc_srcu;
  1136				}
  1137	
  1138				/*
  1139				 * TODO: it should not be necessary to reflect the
  1140				 * assigned base outside of the GPIO subsystem. Go over
  1141				 * drivers and see if anyone makes use of this, else
  1142				 * drop this and assign a poison instead.
  1143				 */
  1144				gc->base = base;
  1145			} else {
  1146				dev_warn(&gdev->dev,
  1147					 "Static allocation of GPIO base is deprecated, use dynamic allocation.\n");
  1148			}
  1149	
  1150			gdev->base = base;
  1151	
  1152			ret = gpiodev_add_to_list_unlocked(gdev);
  1153			if (ret) {
  1154				gpiochip_err(gc, "GPIO integer space overlap, cannot add chip\n");
  1155				goto err_cleanup_desc_srcu;
  1156			}
  1157		}
  1158	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] gpiolib: fix race condition for gdev->srcu
  2025-12-23  8:42 [PATCH] gpiolib: fix race condition for gdev->srcu Paweł Narewski
  2025-12-23 20:14 ` kernel test robot
  2025-12-24  3:47 ` kernel test robot
@ 2025-12-24  6:21 ` kernel test robot
  2025-12-24  8:21 ` Bartosz Golaszewski
  2025-12-24 12:20 ` kernel test robot
  4 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2025-12-24  6:21 UTC (permalink / raw)
  To: Paweł Narewski, linux-gpio
  Cc: oe-kbuild-all, linusw, brgl, Paweł Narewski, Jakub Lewalski

Hi Paweł,

kernel test robot noticed the following build errors:

[auto build test ERROR on brgl/gpio/for-next]
[also build test ERROR on next-20251219]
[cannot apply to linus/master v6.16-rc1]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Pawe-Narewski/gpiolib-fix-race-condition-for-gdev-srcu/20251223-165142
base:   https://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git gpio/for-next
patch link:    https://lore.kernel.org/r/20251223084952.1847489-1-pawel.narewski%40nokia.com
patch subject: [PATCH] gpiolib: fix race condition for gdev->srcu
config: x86_64-rhel-9.4-ltp (https://download.01.org/0day-ci/archive/20251224/202512240750.LQnDrYIF-lkp@intel.com/config)
compiler: gcc-14 (Debian 14.2.0-19) 14.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251224/202512240750.LQnDrYIF-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512240750.LQnDrYIF-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from include/linux/mm_types.h:13,
                    from include/linux/mmzone.h:22,
                    from include/linux/gfp.h:7,
                    from include/linux/slab.h:16,
                    from include/linux/resource_ext.h:11,
                    from include/linux/acpi.h:14,
                    from drivers/gpio/gpiolib.c:3:
   drivers/gpio/gpiolib.c: In function 'gpiochip_add_data_with_key':
>> include/linux/notifier.h:86:35: error: 'struct raw_notifier_head' has no member named 'rwsem'
      86 |                 init_rwsem(&(name)->rwsem);     \
         |                                   ^~
   include/linux/rwsem.h:121:23: note: in definition of macro 'init_rwsem'
     121 |         __init_rwsem((sem), #sem, &__key);                      \
         |                       ^~~
   drivers/gpio/gpiolib.c:1109:9: note: in expansion of macro 'BLOCKING_INIT_NOTIFIER_HEAD'
    1109 |         BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~


vim +86 include/linux/notifier.h

eabc069401bcf4 Alan Stern 2006-10-04  80  
e041c683412d5b Alan Stern 2006-03-27  81  #define ATOMIC_INIT_NOTIFIER_HEAD(name) do {	\
e041c683412d5b Alan Stern 2006-03-27  82  		spin_lock_init(&(name)->lock);	\
e041c683412d5b Alan Stern 2006-03-27  83  		(name)->head = NULL;		\
e041c683412d5b Alan Stern 2006-03-27  84  	} while (0)
e041c683412d5b Alan Stern 2006-03-27  85  #define BLOCKING_INIT_NOTIFIER_HEAD(name) do {	\
e041c683412d5b Alan Stern 2006-03-27 @86  		init_rwsem(&(name)->rwsem);	\
e041c683412d5b Alan Stern 2006-03-27  87  		(name)->head = NULL;		\
e041c683412d5b Alan Stern 2006-03-27  88  	} while (0)
e041c683412d5b Alan Stern 2006-03-27  89  #define RAW_INIT_NOTIFIER_HEAD(name) do {	\
e041c683412d5b Alan Stern 2006-03-27  90  		(name)->head = NULL;		\
e041c683412d5b Alan Stern 2006-03-27  91  	} while (0)
e041c683412d5b Alan Stern 2006-03-27  92  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] gpiolib: fix race condition for gdev->srcu
  2025-12-23  8:42 [PATCH] gpiolib: fix race condition for gdev->srcu Paweł Narewski
                   ` (2 preceding siblings ...)
  2025-12-24  6:21 ` kernel test robot
@ 2025-12-24  8:21 ` Bartosz Golaszewski
  2025-12-24 12:20 ` kernel test robot
  4 siblings, 0 replies; 6+ messages in thread
From: Bartosz Golaszewski @ 2025-12-24  8:21 UTC (permalink / raw)
  To: Paweł Narewski; +Cc: linux-gpio, linusw, Jakub Lewalski

On Tue, Dec 23, 2025 at 9:50 AM Paweł Narewski <pawel.narewski@nokia.com> wrote:
>
> If two drivers were calling gpiochip_add_data_with_key(), one may be
> traversing the srcu-protected list in gpio_name_to_desc(), meanwhile
> other has just added its gdev in gpiodev_add_to_list_unlocked().
> This creates a non-mutexed and non-protected timeframe, when one
> instance is dereferencing and using &gdev->srcu, before the other
> has initialized it, resulting in crash:
>
> [    4.935481] Unable to handle kernel paging request at virtual address ffff800272bcc000
> [    4.943396] Mem abort info:
> [    4.943400]   ESR = 0x0000000096000005
> [    4.943403]   EC = 0x25: DABT (current EL), IL = 32 bits
> [    4.943407]   SET = 0, FnV = 0
> [    4.943410]   EA = 0, S1PTW = 0
> [    4.943413]   FSC = 0x05: level 1 translation fault
> [    4.943416] Data abort info:
> [    4.943418]   ISV = 0, ISS = 0x00000005, ISS2 = 0x00000000
> [    4.946220]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
> [    4.955261]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
> [    4.955268] swapper pgtable: 4k pages, 48-bit VAs, pgdp=0000000038e6c000
> [    4.961449] [ffff800272bcc000] pgd=0000000000000000
> [    4.969203] , p4d=1000000039739003
> [    4.979730] , pud=0000000000000000
> [    4.980210] phandle (CPU): 0x0000005e, phandle (BE): 0x5e000000 for node "reset"
> [    4.991736] Internal error: Oops: 0000000096000005 [#1] PREEMPT SMP
> ...
> [    5.121359] pc : __srcu_read_lock+0x44/0x98
> [    5.131091] lr : gpio_name_to_desc+0x60/0x1a0
> [    5.153671] sp : ffff8000833bb430
> [    5.298440]
> [    5.298443] Call trace:
> [    5.298445]  __srcu_read_lock+0x44/0x98
> [    5.309484]  gpio_name_to_desc+0x60/0x1a0
> [    5.320692]  gpiochip_add_data_with_key+0x488/0xf00
>     5.946419] ---[ end trace 0000000000000000 ]---
>
> Move initialization code for gdev fields before it is added to
> gpio_devices, with adjacent initialization code.
> Adjust goto statements  to reflect modified order of operations
>
> Fixes: 47d8b4c1d868 ("gpio: add SRCU infrastructure to struct gpio_device")
> Reviewed-by: Jakub Lewalski <jakub.lewalski@nokia.com>
> Signed-off-by: Paweł Narewski <pawel.narewski@nokia.com>
> ---

Good catch, there's indeed an optional call to
gpiochip_set_desc_names() that can race. Though I have no idea how you
tested the fix as it can't possibly build - line_state_notifier is a
raw notifier, not blocking.

Let me send a fixed version.

Bart

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] gpiolib: fix race condition for gdev->srcu
  2025-12-23  8:42 [PATCH] gpiolib: fix race condition for gdev->srcu Paweł Narewski
                   ` (3 preceding siblings ...)
  2025-12-24  8:21 ` Bartosz Golaszewski
@ 2025-12-24 12:20 ` kernel test robot
  4 siblings, 0 replies; 6+ messages in thread
From: kernel test robot @ 2025-12-24 12:20 UTC (permalink / raw)
  To: Paweł Narewski, linux-gpio
  Cc: llvm, oe-kbuild-all, linusw, brgl, Paweł Narewski,
	Jakub Lewalski

Hi Paweł,

kernel test robot noticed the following build errors:

[auto build test ERROR on brgl/gpio/for-next]
[also build test ERROR on linus/master v6.19-rc2 next-20251219]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Pawe-Narewski/gpiolib-fix-race-condition-for-gdev-srcu/20251223-165142
base:   https://git.kernel.org/pub/scm/linux/kernel/git/brgl/linux.git gpio/for-next
patch link:    https://lore.kernel.org/r/20251223084952.1847489-1-pawel.narewski%40nokia.com
patch subject: [PATCH] gpiolib: fix race condition for gdev->srcu
config: x86_64-kexec (https://download.01.org/0day-ci/archive/20251225/202512250122.ibtipV8L-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251225/202512250122.ibtipV8L-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202512250122.ibtipV8L-lkp@intel.com/

All errors (new ones prefixed by >>):

>> drivers/gpio/gpiolib.c:1109:2: error: no member named 'rwsem' in 'struct raw_notifier_head'
    1109 |         BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
         |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/notifier.h:86:23: note: expanded from macro 'BLOCKING_INIT_NOTIFIER_HEAD'
      86 |                 init_rwsem(&(name)->rwsem);     \
         |                 ~~~~~~~~~~~~~~~~~~~~^~~~~~
   include/linux/rwsem.h:121:16: note: expanded from macro 'init_rwsem'
     121 |         __init_rwsem((sem), #sem, &__key);                      \
         |                       ^~~
   1 error generated.


vim +1109 drivers/gpio/gpiolib.c

  1044	
  1045	int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
  1046				       struct lock_class_key *lock_key,
  1047				       struct lock_class_key *request_key)
  1048	{
  1049		struct gpio_device *gdev;
  1050		unsigned int desc_index;
  1051		int base = 0;
  1052		int ret;
  1053	
  1054		/*
  1055		 * First: allocate and populate the internal stat container, and
  1056		 * set up the struct device.
  1057		 */
  1058		gdev = kzalloc(sizeof(*gdev), GFP_KERNEL);
  1059		if (!gdev)
  1060			return -ENOMEM;
  1061	
  1062		gdev->dev.type = &gpio_dev_type;
  1063		gdev->dev.bus = &gpio_bus_type;
  1064		gdev->dev.parent = gc->parent;
  1065		rcu_assign_pointer(gdev->chip, gc);
  1066	
  1067		gc->gpiodev = gdev;
  1068		gpiochip_set_data(gc, data);
  1069	
  1070		device_set_node(&gdev->dev, gpiochip_choose_fwnode(gc));
  1071	
  1072		ret = ida_alloc(&gpio_ida, GFP_KERNEL);
  1073		if (ret < 0)
  1074			goto err_free_gdev;
  1075		gdev->id = ret;
  1076	
  1077		ret = dev_set_name(&gdev->dev, GPIOCHIP_NAME "%d", gdev->id);
  1078		if (ret)
  1079			goto err_free_ida;
  1080	
  1081		if (gc->parent && gc->parent->driver)
  1082			gdev->owner = gc->parent->driver->owner;
  1083		else if (gc->owner)
  1084			/* TODO: remove chip->owner */
  1085			gdev->owner = gc->owner;
  1086		else
  1087			gdev->owner = THIS_MODULE;
  1088	
  1089		ret = gpiochip_get_ngpios(gc, &gdev->dev);
  1090		if (ret)
  1091			goto err_free_dev_name;
  1092	
  1093		gdev->descs = kcalloc(gc->ngpio, sizeof(*gdev->descs), GFP_KERNEL);
  1094		if (!gdev->descs) {
  1095			ret = -ENOMEM;
  1096			goto err_free_dev_name;
  1097		}
  1098	
  1099		gdev->label = kstrdup_const(gc->label ?: "unknown", GFP_KERNEL);
  1100		if (!gdev->label) {
  1101			ret = -ENOMEM;
  1102			goto err_free_descs;
  1103		}
  1104	
  1105		gdev->ngpio = gc->ngpio;
  1106		gdev->can_sleep = gc->can_sleep;
  1107	
  1108		rwlock_init(&gdev->line_state_lock);
> 1109		BLOCKING_INIT_NOTIFIER_HEAD(&gdev->line_state_notifier);
  1110		BLOCKING_INIT_NOTIFIER_HEAD(&gdev->device_notifier);
  1111	
  1112		ret = init_srcu_struct(&gdev->srcu);
  1113		if (ret)
  1114			goto err_free_label;
  1115	
  1116		ret = init_srcu_struct(&gdev->desc_srcu);
  1117		if (ret)
  1118			goto err_cleanup_gdev_srcu;
  1119	
  1120	
  1121		scoped_guard(mutex, &gpio_devices_lock) {
  1122			/*
  1123			 * TODO: this allocates a Linux GPIO number base in the global
  1124			 * GPIO numberspace for this chip. In the long run we want to
  1125			 * get *rid* of this numberspace and use only descriptors, but
  1126			 * it may be a pipe dream. It will not happen before we get rid
  1127			 * of the sysfs interface anyways.
  1128			 */
  1129			base = gc->base;
  1130			if (base < 0) {
  1131				base = gpiochip_find_base_unlocked(gc->ngpio);
  1132				if (base < 0) {
  1133					ret = base;
  1134					base = 0;
  1135					goto err_cleanup_desc_srcu;
  1136				}
  1137	
  1138				/*
  1139				 * TODO: it should not be necessary to reflect the
  1140				 * assigned base outside of the GPIO subsystem. Go over
  1141				 * drivers and see if anyone makes use of this, else
  1142				 * drop this and assign a poison instead.
  1143				 */
  1144				gc->base = base;
  1145			} else {
  1146				dev_warn(&gdev->dev,
  1147					 "Static allocation of GPIO base is deprecated, use dynamic allocation.\n");
  1148			}
  1149	
  1150			gdev->base = base;
  1151	
  1152			ret = gpiodev_add_to_list_unlocked(gdev);
  1153			if (ret) {
  1154				gpiochip_err(gc, "GPIO integer space overlap, cannot add chip\n");
  1155				goto err_cleanup_desc_srcu;
  1156			}
  1157		}
  1158	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-12-24 12:20 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-12-23  8:42 [PATCH] gpiolib: fix race condition for gdev->srcu Paweł Narewski
2025-12-23 20:14 ` kernel test robot
2025-12-24  3:47 ` kernel test robot
2025-12-24  6:21 ` kernel test robot
2025-12-24  8:21 ` Bartosz Golaszewski
2025-12-24 12:20 ` kernel test robot

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.