* [PATCH v8 1/2] devcoredump: add new APIs to support migration of users from old device coredump related APIs
2022-08-23 11:21 [PATCH v8 0/2] Add new APIs of devcoredump and fix bugs Duoming Zhou
@ 2022-08-23 11:21 ` Duoming Zhou
2022-08-23 11:21 ` [PATCH v8 2/2 RESEND] mwifiex: fix sleep in atomic context bugs caused by dev_coredumpv Duoming Zhou
2022-08-24 20:42 ` [PATCH v8 0/2] Add new APIs of devcoredump and fix bugs Brian Norris
2 siblings, 0 replies; 8+ messages in thread
From: Duoming Zhou @ 2022-08-23 11:21 UTC (permalink / raw)
To: linux-kernel, gregkh, briannorris
Cc: johannes, rafael, amitkarwar, ganapathi017, sharvari.harisangam,
huxinming820, kvalo, davem, edumazet, kuba, pabeni,
linux-wireless, netdev, Duoming Zhou
The dev_coredumpv(), dev_coredumpm() and dev_coredumpsg() could not
be used in atomic context, because they call kvasprintf_const() and
kstrdup() with GFP_KERNEL parameter. The process is shown below:
dev_coredumpv(.., gfp_t gfp)
dev_coredumpm(.., gfp_t gfp)
dev_set_name
kobject_set_name_vargs
kvasprintf_const(GFP_KERNEL, ...); //may sleep
kstrdup(s, GFP_KERNEL); //may sleep
This patch adds new APIs that remove gfp_t parameter of dev_coredumpv(),
dev_coredumpm() and dev_coredumpsg() in order to show they could not be
used in atomic context.
These new APIs will ultimately replace the dev_coredumpv(), dev_coredumpm()
and dev_coredumpsg() when there are no users of the old APIs.
Fixes: 833c95456a70 ("device coredump: add new device coredump class")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
---
Changes in v8:
- add new APIs to prepare migration of users from old device coredump related APIs.
drivers/base/devcoredump.c | 116 ++++++++++++++++++++++++++++++++++++
include/linux/devcoredump.h | 34 +++++++++++
2 files changed, 150 insertions(+)
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
index f4d794d6bb8..728457b12ce 100644
--- a/drivers/base/devcoredump.c
+++ b/drivers/base/devcoredump.c
@@ -185,6 +185,21 @@ void dev_coredumpv(struct device *dev, void *data, size_t datalen,
}
EXPORT_SYMBOL_GPL(dev_coredumpv);
+/**
+ * dev_core_dumpv - create device coredump with vmalloc data
+ * @dev: the struct device for the crashed device
+ * @data: vmalloc data containing the device coredump
+ * @datalen: length of the data
+ *
+ * This function takes ownership of the vmalloc'ed data and will free
+ * it when it is no longer used. See dev_core_dumpm() for more information.
+ */
+void dev_core_dumpv(struct device *dev, void *data, size_t datalen)
+{
+ dev_core_dumpm(dev, NULL, data, datalen, devcd_readv, devcd_freev);
+}
+EXPORT_SYMBOL_GPL(dev_core_dumpv);
+
static int devcd_match_failing(struct device *dev, const void *failing)
{
struct devcd_entry *devcd = dev_to_devcd(dev);
@@ -312,6 +327,87 @@ void dev_coredumpm(struct device *dev, struct module *owner,
}
EXPORT_SYMBOL_GPL(dev_coredumpm);
+/**
+ * dev_core_dumpm - create device coredump with read/free methods
+ * @dev: the struct device for the crashed device
+ * @owner: the module that contains the read/free functions, use %THIS_MODULE
+ * @data: data cookie for the @read/@free functions
+ * @datalen: length of the data
+ * @read: function to read from the given buffer
+ * @free: function to free the given buffer
+ *
+ * Creates a new device coredump for the given device. If a previous one hasn't
+ * been read yet, the new coredump is discarded. The data lifetime is determined
+ * by the device coredump framework and when it is no longer needed the @free
+ * function will be called to free the data.
+ */
+void dev_core_dumpm(struct device *dev, struct module *owner,
+ void *data, size_t datalen,
+ ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen),
+ void (*free)(void *data))
+{
+ static atomic_t devcd_count = ATOMIC_INIT(0);
+ struct devcd_entry *devcd;
+ struct device *existing;
+
+ if (devcd_disabled)
+ goto free;
+
+ existing = class_find_device(&devcd_class, NULL, dev,
+ devcd_match_failing);
+ if (existing) {
+ put_device(existing);
+ goto free;
+ }
+
+ if (!try_module_get(owner))
+ goto free;
+
+ devcd = kzalloc(sizeof(*devcd), GFP_KERNEL);
+ if (!devcd)
+ goto put_module;
+
+ devcd->owner = owner;
+ devcd->data = data;
+ devcd->datalen = datalen;
+ devcd->read = read;
+ devcd->free = free;
+ devcd->failing_dev = get_device(dev);
+
+ device_initialize(&devcd->devcd_dev);
+
+ dev_set_name(&devcd->devcd_dev, "devcd%d",
+ atomic_inc_return(&devcd_count));
+ devcd->devcd_dev.class = &devcd_class;
+
+ if (device_add(&devcd->devcd_dev))
+ goto put_device;
+
+ /*
+ * These should normally not fail, but there is no problem
+ * continuing without the links, so just warn instead of
+ * failing.
+ */
+ if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj,
+ "failing_device") ||
+ sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj,
+ "devcoredump"))
+ dev_warn(dev, "devcoredump create_link failed\n");
+
+ INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+ schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+
+ return;
+ put_device:
+ put_device(&devcd->devcd_dev);
+ put_module:
+ module_put(owner);
+ free:
+ free(data);
+}
+EXPORT_SYMBOL_GPL(dev_core_dumpm);
+
/**
* dev_coredumpsg - create device coredump that uses scatterlist as data
* parameter
@@ -333,6 +429,26 @@ void dev_coredumpsg(struct device *dev, struct scatterlist *table,
}
EXPORT_SYMBOL_GPL(dev_coredumpsg);
+/**
+ * dev_core_dumpsg - create device coredump that uses scatterlist as data
+ * parameter
+ * @dev: the struct device for the crashed device
+ * @table: the dump data
+ * @datalen: length of the data
+ *
+ * Creates a new device coredump for the given device. If a previous one hasn't
+ * been read yet, the new coredump is discarded. The data lifetime is determined
+ * by the device coredump framework and when it is no longer needed
+ * it will free the data.
+ */
+void dev_core_dumpsg(struct device *dev, struct scatterlist *table,
+ size_t datalen)
+{
+ dev_core_dumpm(dev, NULL, table, datalen, devcd_read_from_sgtable,
+ devcd_free_sgtable);
+}
+EXPORT_SYMBOL_GPL(dev_core_dumpsg);
+
static int __init devcoredump_init(void)
{
return class_register(&devcd_class);
diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h
index c008169ed2c..113fe63800a 100644
--- a/include/linux/devcoredump.h
+++ b/include/linux/devcoredump.h
@@ -55,14 +55,26 @@ static inline void _devcd_free_sgtable(struct scatterlist *table)
void dev_coredumpv(struct device *dev, void *data, size_t datalen,
gfp_t gfp);
+void dev_core_dumpv(struct device *dev, void *data, size_t datalen);
+
void dev_coredumpm(struct device *dev, struct module *owner,
void *data, size_t datalen, gfp_t gfp,
ssize_t (*read)(char *buffer, loff_t offset, size_t count,
void *data, size_t datalen),
void (*free)(void *data));
+void dev_core_dumpm(struct device *dev, struct module *owner,
+ void *data, size_t datalen,
+ ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen),
+ void (*free)(void *data));
+
void dev_coredumpsg(struct device *dev, struct scatterlist *table,
size_t datalen, gfp_t gfp);
+
+void dev_core_dumpsg(struct device *dev, struct scatterlist *table,
+ size_t datalen);
+
#else
static inline void dev_coredumpv(struct device *dev, void *data,
size_t datalen, gfp_t gfp)
@@ -70,6 +82,12 @@ static inline void dev_coredumpv(struct device *dev, void *data,
vfree(data);
}
+static inline void dev_core_dumpv(struct device *dev, void *data,
+ size_t datalen)
+{
+ vfree(data);
+}
+
static inline void
dev_coredumpm(struct device *dev, struct module *owner,
void *data, size_t datalen, gfp_t gfp,
@@ -80,11 +98,27 @@ dev_coredumpm(struct device *dev, struct module *owner,
free(data);
}
+static inline void
+dev_core_dumpm(struct device *dev, struct module *owner,
+ void *data, size_t datalen,
+ ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+ void *data, size_t datalen),
+ void (*free)(void *data))
+{
+ free(data);
+}
+
static inline void dev_coredumpsg(struct device *dev, struct scatterlist *table,
size_t datalen, gfp_t gfp)
{
_devcd_free_sgtable(table);
}
+
+static inline void dev_core_dumpsg(struct device *dev, struct scatterlist *table,
+ size_t datalen)
+{
+ _devcd_free_sgtable(table);
+}
#endif /* CONFIG_DEV_COREDUMP */
#endif /* __DEVCOREDUMP_H */
--
2.17.1
^ permalink raw reply related [flat|nested] 8+ messages in thread* [PATCH v8 2/2 RESEND] mwifiex: fix sleep in atomic context bugs caused by dev_coredumpv
2022-08-23 11:21 [PATCH v8 0/2] Add new APIs of devcoredump and fix bugs Duoming Zhou
2022-08-23 11:21 ` [PATCH v8 1/2] devcoredump: add new APIs to support migration of users from old device coredump related APIs Duoming Zhou
@ 2022-08-23 11:21 ` Duoming Zhou
2022-09-22 6:08 ` Kalle Valo
2022-08-24 20:42 ` [PATCH v8 0/2] Add new APIs of devcoredump and fix bugs Brian Norris
2 siblings, 1 reply; 8+ messages in thread
From: Duoming Zhou @ 2022-08-23 11:21 UTC (permalink / raw)
To: linux-kernel, gregkh, briannorris
Cc: johannes, rafael, amitkarwar, ganapathi017, sharvari.harisangam,
huxinming820, kvalo, davem, edumazet, kuba, pabeni,
linux-wireless, netdev, Duoming Zhou
There are sleep in atomic context bugs when uploading device dump
data in mwifiex. The root cause is that dev_coredumpv could not
be used in atomic contexts, because it calls dev_set_name which
include operations that may sleep. The call tree shows execution
paths that could lead to bugs:
(Interrupt context)
fw_dump_timer_fn
mwifiex_upload_device_dump
dev_coredumpv(..., GFP_KERNEL)
dev_coredumpm()
kzalloc(sizeof(*devcd), gfp); //may sleep
dev_set_name
kobject_set_name_vargs
kvasprintf_const(GFP_KERNEL, ...); //may sleep
kstrdup(s, GFP_KERNEL); //may sleep
The corresponding fail log is shown below:
[ 135.275938] usb 1-1: == mwifiex dump information to /sys/class/devcoredump start
[ 135.281029] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:265
...
[ 135.293613] Call Trace:
[ 135.293613] <IRQ>
[ 135.293613] dump_stack_lvl+0x57/0x7d
[ 135.293613] __might_resched.cold+0x138/0x173
[ 135.293613] ? dev_coredumpm+0xca/0x2e0
[ 135.293613] kmem_cache_alloc_trace+0x189/0x1f0
[ 135.293613] ? devcd_match_failing+0x30/0x30
[ 135.293613] dev_coredumpm+0xca/0x2e0
[ 135.293613] ? devcd_freev+0x10/0x10
[ 135.293613] dev_coredumpv+0x1c/0x20
[ 135.293613] ? devcd_match_failing+0x30/0x30
[ 135.293613] mwifiex_upload_device_dump+0x65/0xb0
[ 135.293613] ? mwifiex_dnld_fw+0x1b0/0x1b0
[ 135.293613] call_timer_fn+0x122/0x3d0
[ 135.293613] ? msleep_interruptible+0xb0/0xb0
[ 135.293613] ? lock_downgrade+0x3c0/0x3c0
[ 135.293613] ? __next_timer_interrupt+0x13c/0x160
[ 135.293613] ? lockdep_hardirqs_on_prepare+0xe/0x220
[ 135.293613] ? mwifiex_dnld_fw+0x1b0/0x1b0
[ 135.293613] __run_timers.part.0+0x3f8/0x540
[ 135.293613] ? call_timer_fn+0x3d0/0x3d0
[ 135.293613] ? arch_restore_msi_irqs+0x10/0x10
[ 135.293613] ? lapic_next_event+0x31/0x40
[ 135.293613] run_timer_softirq+0x4f/0xb0
[ 135.293613] __do_softirq+0x1c2/0x651
...
[ 135.293613] RIP: 0010:default_idle+0xb/0x10
[ 135.293613] RSP: 0018:ffff888006317e68 EFLAGS: 00000246
[ 135.293613] RAX: ffffffff82ad8d10 RBX: ffff888006301cc0 RCX: ffffffff82ac90e1
[ 135.293613] RDX: ffffed100d9ff1b4 RSI: ffffffff831ad140 RDI: ffffffff82ad8f20
[ 135.293613] RBP: 0000000000000003 R08: 0000000000000000 R09: ffff88806cff8d9b
[ 135.293613] R10: ffffed100d9ff1b3 R11: 0000000000000001 R12: ffffffff84593410
[ 135.293613] R13: 0000000000000000 R14: 0000000000000000 R15: 1ffff11000c62fd2
...
[ 135.389205] usb 1-1: == mwifiex dump information to /sys/class/devcoredump end
This patch uses delayed work to replace timer and moves the operations
that may sleep into a delayed work in order to mitigate bugs, it was
tested on Marvell 88W8801 chip whose port is usb and the firmware is
usb8801_uapsta.bin. The following is the result after using delayed
work to replace timer.
[ 134.936453] usb 1-1: == mwifiex dump information to /sys/class/devcoredump start
[ 135.043344] usb 1-1: == mwifiex dump information to /sys/class/devcoredump end
As we can see, there is no bug now.
Fixes: f5ecd02a8b20 ("mwifiex: device dump support for usb interface")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
Changes since v6:
- Use clang-format to adjust the format of code.
drivers/net/wireless/marvell/mwifiex/init.c | 9 +++++----
drivers/net/wireless/marvell/mwifiex/main.h | 3 ++-
drivers/net/wireless/marvell/mwifiex/sta_event.c | 6 +++---
3 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c
index fc77489cc51..7dddb4b5dea 100644
--- a/drivers/net/wireless/marvell/mwifiex/init.c
+++ b/drivers/net/wireless/marvell/mwifiex/init.c
@@ -51,9 +51,10 @@ static void wakeup_timer_fn(struct timer_list *t)
adapter->if_ops.card_reset(adapter);
}
-static void fw_dump_timer_fn(struct timer_list *t)
+static void fw_dump_work(struct work_struct *work)
{
- struct mwifiex_adapter *adapter = from_timer(adapter, t, devdump_timer);
+ struct mwifiex_adapter *adapter =
+ container_of(work, struct mwifiex_adapter, devdump_work.work);
mwifiex_upload_device_dump(adapter);
}
@@ -309,7 +310,7 @@ static void mwifiex_init_adapter(struct mwifiex_adapter *adapter)
adapter->active_scan_triggered = false;
timer_setup(&adapter->wakeup_timer, wakeup_timer_fn, 0);
adapter->devdump_len = 0;
- timer_setup(&adapter->devdump_timer, fw_dump_timer_fn, 0);
+ INIT_DELAYED_WORK(&adapter->devdump_work, fw_dump_work);
}
/*
@@ -388,7 +389,7 @@ static void
mwifiex_adapter_cleanup(struct mwifiex_adapter *adapter)
{
del_timer(&adapter->wakeup_timer);
- del_timer_sync(&adapter->devdump_timer);
+ cancel_delayed_work_sync(&adapter->devdump_work);
mwifiex_cancel_all_pending_cmd(adapter);
wake_up_interruptible(&adapter->cmd_wait_q.wait);
wake_up_interruptible(&adapter->hs_activate_wait_q);
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index 87729d251fe..63f861e6b28 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -37,6 +37,7 @@
#include <linux/pm_runtime.h>
#include <linux/slab.h>
#include <linux/of_irq.h>
+#include <linux/workqueue.h>
#include "decl.h"
#include "ioctl.h"
@@ -1043,7 +1044,7 @@ struct mwifiex_adapter {
/* Device dump data/length */
void *devdump_data;
int devdump_len;
- struct timer_list devdump_timer;
+ struct delayed_work devdump_work;
bool ignore_btcoex_events;
};
diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c
index b95e90a7d12..e80e372cce8 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_event.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c
@@ -611,8 +611,8 @@ mwifiex_fw_dump_info_event(struct mwifiex_private *priv,
* transmission event get lost, in this cornel case,
* user would still get partial of the dump.
*/
- mod_timer(&adapter->devdump_timer,
- jiffies + msecs_to_jiffies(MWIFIEX_TIMER_10S));
+ schedule_delayed_work(&adapter->devdump_work,
+ msecs_to_jiffies(MWIFIEX_TIMER_10S));
}
/* Overflow check */
@@ -631,7 +631,7 @@ mwifiex_fw_dump_info_event(struct mwifiex_private *priv,
return;
upload_dump:
- del_timer_sync(&adapter->devdump_timer);
+ cancel_delayed_work_sync(&adapter->devdump_work);
mwifiex_upload_device_dump(adapter);
}
--
2.17.1
^ permalink raw reply related [flat|nested] 8+ messages in thread