* [5.2-stable PATCH 1/2] libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant
@ 2019-08-06 1:29 Dan Williams
2019-08-06 1:29 ` [5.2-stable PATCH 2/2] libnvdimm/bus: Fix wait_nvdimm_bus_probe_idle() ABBA deadlock Dan Williams
2019-08-06 22:18 ` [5.2-stable PATCH 1/2] libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant Sasha Levin
0 siblings, 2 replies; 3+ messages in thread
From: Dan Williams @ 2019-08-06 1:29 UTC (permalink / raw)
To: stable; +Cc: Vishal Verma, Vishal Verma, Vishal Verma
commit 6de5d06e657acdbcf9637dac37916a4a5309e0f4 upstream.
In preparation for not holding a lock over the execution of nd_ioctl(),
update the implementation to allow multiple threads to be attempting
ioctls at the same time. The bus lock still prevents multiple in-flight
->ndctl() invocations from corrupting each other's state, but static
global staging buffers are moved to the heap.
Reported-by: Vishal Verma <vishal.l.verma@intel.com>
Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
Tested-by: Vishal Verma <vishal.l.verma@intel.com>
Link: https://lore.kernel.org/r/156341208947.292348.10560140326807607481.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/nvdimm/bus.c | 59 +++++++++++++++++++++++++++++++-------------------
1 file changed, 37 insertions(+), 22 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index dfb93228d6a7..a38572bf486b 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -973,20 +973,19 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
int read_only, unsigned int ioctl_cmd, unsigned long arg)
{
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
- static char out_env[ND_CMD_MAX_ENVELOPE];
- static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd);
struct device *dev = &nvdimm_bus->dev;
void __user *p = (void __user *) arg;
+ char *out_env = NULL, *in_env = NULL;
const char *cmd_name, *dimm_name;
u32 in_len = 0, out_len = 0;
unsigned int func = cmd;
unsigned long cmd_mask;
struct nd_cmd_pkg pkg;
int rc, i, cmd_rc;
+ void *buf = NULL;
u64 buf_len = 0;
- void *buf;
if (nvdimm) {
desc = nd_cmd_dimm_desc(cmd);
@@ -1026,6 +1025,9 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
/* process an input envelope */
+ in_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
+ if (!in_env)
+ return -ENOMEM;
for (i = 0; i < desc->in_num; i++) {
u32 in_size, copy;
@@ -1033,14 +1035,17 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (in_size == UINT_MAX) {
dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
__func__, dimm_name, cmd_name, i);
- return -ENXIO;
+ rc = -ENXIO;
+ goto out;
}
- if (in_len < sizeof(in_env))
- copy = min_t(u32, sizeof(in_env) - in_len, in_size);
+ if (in_len < ND_CMD_MAX_ENVELOPE)
+ copy = min_t(u32, ND_CMD_MAX_ENVELOPE - in_len, in_size);
else
copy = 0;
- if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
- return -EFAULT;
+ if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) {
+ rc = -EFAULT;
+ goto out;
+ }
in_len += in_size;
}
@@ -1052,6 +1057,12 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
/* process an output envelope */
+ out_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
+ if (!out_env) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
for (i = 0; i < desc->out_num; i++) {
u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
(u32 *) in_env, (u32 *) out_env, 0);
@@ -1060,15 +1071,18 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (out_size == UINT_MAX) {
dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
dimm_name, cmd_name, i);
- return -EFAULT;
+ rc = -EFAULT;
+ goto out;
}
- if (out_len < sizeof(out_env))
- copy = min_t(u32, sizeof(out_env) - out_len, out_size);
+ if (out_len < ND_CMD_MAX_ENVELOPE)
+ copy = min_t(u32, ND_CMD_MAX_ENVELOPE - out_len, out_size);
else
copy = 0;
if (copy && copy_from_user(&out_env[out_len],
- p + in_len + out_len, copy))
- return -EFAULT;
+ p + in_len + out_len, copy)) {
+ rc = -EFAULT;
+ goto out;
+ }
out_len += out_size;
}
@@ -1076,12 +1090,15 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
if (buf_len > ND_IOCTL_MAX_BUFLEN) {
dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
buf = vmalloc(buf_len);
- if (!buf)
- return -ENOMEM;
+ if (!buf) {
+ rc = -ENOMEM;
+ goto out;
+ }
if (copy_from_user(buf, p, buf_len)) {
rc = -EFAULT;
@@ -1103,17 +1120,15 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
clear_err->cleared);
}
- nvdimm_bus_unlock(&nvdimm_bus->dev);
if (copy_to_user(p, buf, buf_len))
rc = -EFAULT;
- vfree(buf);
- return rc;
-
- out_unlock:
+out_unlock:
nvdimm_bus_unlock(&nvdimm_bus->dev);
- out:
+out:
+ kfree(in_env);
+ kfree(out_env);
vfree(buf);
return rc;
}
^ permalink raw reply related [flat|nested] 3+ messages in thread
* [5.2-stable PATCH 2/2] libnvdimm/bus: Fix wait_nvdimm_bus_probe_idle() ABBA deadlock
2019-08-06 1:29 [5.2-stable PATCH 1/2] libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant Dan Williams
@ 2019-08-06 1:29 ` Dan Williams
2019-08-06 22:18 ` [5.2-stable PATCH 1/2] libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant Sasha Levin
1 sibling, 0 replies; 3+ messages in thread
From: Dan Williams @ 2019-08-06 1:29 UTC (permalink / raw)
To: stable; +Cc: Vishal Verma, Jane Chu
commit ca6bf264f6d856f959c4239cda1047b587745c67 upstream.
A multithreaded namespace creation/destruction stress test currently
deadlocks with the following lockup signature:
INFO: task ndctl:2924 blocked for more than 122 seconds.
Tainted: G OE 5.2.0-rc4+ #3382
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
ndctl D 0 2924 1176 0x00000000
Call Trace:
? __schedule+0x27e/0x780
schedule+0x30/0xb0
wait_nvdimm_bus_probe_idle+0x8a/0xd0 [libnvdimm]
? finish_wait+0x80/0x80
uuid_store+0xe6/0x2e0 [libnvdimm]
kernfs_fop_write+0xf0/0x1a0
vfs_write+0xb7/0x1b0
ksys_write+0x5c/0xd0
do_syscall_64+0x60/0x240
INFO: task ndctl:2923 blocked for more than 122 seconds.
Tainted: G OE 5.2.0-rc4+ #3382
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
ndctl D 0 2923 1175 0x00000000
Call Trace:
? __schedule+0x27e/0x780
? __mutex_lock+0x489/0x910
schedule+0x30/0xb0
schedule_preempt_disabled+0x11/0x20
__mutex_lock+0x48e/0x910
? nvdimm_namespace_common_probe+0x95/0x4d0 [libnvdimm]
? __lock_acquire+0x23f/0x1710
? nvdimm_namespace_common_probe+0x95/0x4d0 [libnvdimm]
nvdimm_namespace_common_probe+0x95/0x4d0 [libnvdimm]
__dax_pmem_probe+0x5e/0x210 [dax_pmem_core]
? nvdimm_bus_probe+0x1d0/0x2c0 [libnvdimm]
dax_pmem_probe+0xc/0x20 [dax_pmem]
nvdimm_bus_probe+0x90/0x2c0 [libnvdimm]
really_probe+0xef/0x390
driver_probe_device+0xb4/0x100
In this sequence an 'nd_dax' device is being probed and trying to take
the lock on its backing namespace to validate that the 'nd_dax' device
indeed has exclusive access to the backing namespace. Meanwhile, another
thread is trying to update the uuid property of that same backing
namespace. So one thread is in the probe path trying to acquire the
lock, and the other thread has acquired the lock and tries to flush the
probe path.
Fix this deadlock by not holding the namespace device_lock over the
wait_nvdimm_bus_probe_idle() synchronization step. In turn this requires
the device_lock to be held on entry to wait_nvdimm_bus_probe_idle() and
subsequently dropped internally to wait_nvdimm_bus_probe_idle().
Cc: <stable@vger.kernel.org>
Fixes: bf9bccc14c05 ("libnvdimm: pmem label sets and namespace instantiation")
Cc: Vishal Verma <vishal.l.verma@intel.com>
Tested-by: Jane Chu <jane.chu@oracle.com>
Link: https://lore.kernel.org/r/156341210094.292348.2384694131126767789.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
drivers/nvdimm/bus.c | 14 +++++++++-----
drivers/nvdimm/region_devs.c | 4 ++++
2 files changed, 13 insertions(+), 5 deletions(-)
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index a38572bf486b..df41f3571dc9 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -887,10 +887,12 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
do {
if (nvdimm_bus->probe_active == 0)
break;
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ nvdimm_bus_unlock(dev);
+ device_unlock(dev);
wait_event(nvdimm_bus->wait,
nvdimm_bus->probe_active == 0);
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ device_lock(dev);
+ nvdimm_bus_lock(dev);
} while (true);
}
@@ -1016,7 +1018,7 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
case ND_CMD_ARS_START:
case ND_CMD_CLEAR_ERROR:
case ND_CMD_CALL:
- dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
+ dev_dbg(dev, "'%s' command while read-only.\n",
nvdimm ? nvdimm_cmd_name(cmd)
: nvdimm_bus_cmd_name(cmd));
return -EPERM;
@@ -1105,7 +1107,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
goto out;
}
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ device_lock(dev);
+ nvdimm_bus_lock(dev);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
if (rc)
goto out_unlock;
@@ -1125,7 +1128,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
rc = -EFAULT;
out_unlock:
- nvdimm_bus_unlock(&nvdimm_bus->dev);
+ nvdimm_bus_unlock(dev);
+ device_unlock(dev);
out:
kfree(in_env);
kfree(out_env);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4fed9ce9c2fe..a15276cdec7d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -422,10 +422,12 @@ static ssize_t available_size_show(struct device *dev,
* memory nvdimm_bus_lock() is dropped, but that's userspace's
* problem to not race itself.
*/
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_available_dpa(nd_region);
nvdimm_bus_unlock(dev);
+ device_unlock(dev);
return sprintf(buf, "%llu\n", available);
}
@@ -437,10 +439,12 @@ static ssize_t max_available_extent_show(struct device *dev,
struct nd_region *nd_region = to_nd_region(dev);
unsigned long long available = 0;
+ device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_allocatable_dpa(nd_region);
nvdimm_bus_unlock(dev);
+ device_unlock(dev);
return sprintf(buf, "%llu\n", available);
}
^ permalink raw reply related [flat|nested] 3+ messages in thread
* Re: [5.2-stable PATCH 1/2] libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant
2019-08-06 1:29 [5.2-stable PATCH 1/2] libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant Dan Williams
2019-08-06 1:29 ` [5.2-stable PATCH 2/2] libnvdimm/bus: Fix wait_nvdimm_bus_probe_idle() ABBA deadlock Dan Williams
@ 2019-08-06 22:18 ` Sasha Levin
1 sibling, 0 replies; 3+ messages in thread
From: Sasha Levin @ 2019-08-06 22:18 UTC (permalink / raw)
To: Dan Williams; +Cc: stable, Vishal Verma
On Mon, Aug 05, 2019 at 06:29:04PM -0700, Dan Williams wrote:
>commit 6de5d06e657acdbcf9637dac37916a4a5309e0f4 upstream.
>
>In preparation for not holding a lock over the execution of nd_ioctl(),
>update the implementation to allow multiple threads to be attempting
>ioctls at the same time. The bus lock still prevents multiple in-flight
>->ndctl() invocations from corrupting each other's state, but static
>global staging buffers are moved to the heap.
>
>Reported-by: Vishal Verma <vishal.l.verma@intel.com>
>Reviewed-by: Vishal Verma <vishal.l.verma@intel.com>
>Tested-by: Vishal Verma <vishal.l.verma@intel.com>
>Link: https://lore.kernel.org/r/156341208947.292348.10560140326807607481.stgit@dwillia2-desk3.amr.corp.intel.com
>Signed-off-by: Dan Williams <dan.j.williams@intel.com>
I've queued up both the 5.2 and 4.19 patches, thanks!
--
Thanks,
Sasha
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2019-08-06 22:18 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-08-06 1:29 [5.2-stable PATCH 1/2] libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant Dan Williams
2019-08-06 1:29 ` [5.2-stable PATCH 2/2] libnvdimm/bus: Fix wait_nvdimm_bus_probe_idle() ABBA deadlock Dan Williams
2019-08-06 22:18 ` [5.2-stable PATCH 1/2] libnvdimm/bus: Prepare the nd_ioctl() path to be re-entrant Sasha Levin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).