From: lizf@kernel.org
To: stable@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Yishai Hadas <yishaih@mellanox.com>,
Shachar Raindel <raindel@mellanox.com>,
Doug Ledford <dledford@redhat.com>, Zefan Li <lizefan@huawei.com>
Subject: [PATCH 3.4 21/92] IB/uverbs: Fix race between ib_uverbs_open and remove_one
Date: Mon, 18 Apr 2016 18:45:26 +0800 [thread overview]
Message-ID: <1460976397-5688-21-git-send-email-lizf@kernel.org> (raw)
In-Reply-To: <1460976338-5631-1-git-send-email-lizf@kernel.org>
From: Yishai Hadas <yishaih@mellanox.com>
3.4.112-rc1 review patch. If anyone has any objections, please let me know.
------------------
commit 35d4a0b63dc0c6d1177d4f532a9deae958f0662c upstream.
Fixes: 2a72f212263701b927559f6850446421d5906c41 ("IB/uverbs: Remove dev_table")
Before this commit there was a device look-up table that was protected
by a spin_lock used by ib_uverbs_open and by ib_uverbs_remove_one. When
it was dropped and container_of was used instead, it enabled the race
with remove_one as dev might be freed just after:
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev) but
before the kref_get.
In addition, this buggy patch added some dead code as
container_of(x,y,z) can never be NULL and so dev can never be NULL.
As a result the comment above ib_uverbs_open saying "the open method
will either immediately run -ENXIO" is wrong as it can never happen.
The solution follows Jason Gunthorpe suggestion from below URL:
https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg25692.html
cdev will hold a kref on the parent (the containing structure,
ib_uverbs_device) and only when that kref is released it is
guaranteed that open will never be called again.
In addition, fixes the active count scheme to use an atomic
not a kref to prevent WARN_ON as pointed by above comment
from Jason.
Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Zefan Li <lizefan@huawei.com>
---
drivers/infiniband/core/uverbs.h | 3 ++-
drivers/infiniband/core/uverbs_main.c | 43 ++++++++++++++++++++++++-----------
2 files changed, 32 insertions(+), 14 deletions(-)
diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 5bcb2af..228af18 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -69,7 +69,7 @@
*/
struct ib_uverbs_device {
- struct kref ref;
+ atomic_t refcount;
int num_comp_vectors;
struct completion comp;
struct device *dev;
@@ -78,6 +78,7 @@ struct ib_uverbs_device {
struct cdev cdev;
struct rb_root xrcd_tree;
struct mutex xrcd_tree_mutex;
+ struct kobject kobj;
};
struct ib_uverbs_event_file {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5b51e4e..c8e7669 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -117,14 +117,18 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
static void ib_uverbs_add_one(struct ib_device *device);
static void ib_uverbs_remove_one(struct ib_device *device);
-static void ib_uverbs_release_dev(struct kref *ref)
+static void ib_uverbs_release_dev(struct kobject *kobj)
{
struct ib_uverbs_device *dev =
- container_of(ref, struct ib_uverbs_device, ref);
+ container_of(kobj, struct ib_uverbs_device, kobj);
- complete(&dev->comp);
+ kfree(dev);
}
+static struct kobj_type ib_uverbs_dev_ktype = {
+ .release = ib_uverbs_release_dev,
+};
+
static void ib_uverbs_release_event_file(struct kref *ref)
{
struct ib_uverbs_event_file *file =
@@ -273,13 +277,19 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
return context->device->dealloc_ucontext(context);
}
+static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
+{
+ complete(&dev->comp);
+}
+
static void ib_uverbs_release_file(struct kref *ref)
{
struct ib_uverbs_file *file =
container_of(ref, struct ib_uverbs_file, ref);
module_put(file->device->ib_dev->owner);
- kref_put(&file->device->ref, ib_uverbs_release_dev);
+ if (atomic_dec_and_test(&file->device->refcount))
+ ib_uverbs_comp_dev(file->device);
kfree(file);
}
@@ -621,9 +631,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
int ret;
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
- if (dev)
- kref_get(&dev->ref);
- else
+ if (!atomic_inc_not_zero(&dev->refcount))
return -ENXIO;
if (!try_module_get(dev->ib_dev->owner)) {
@@ -644,6 +652,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
mutex_init(&file->mutex);
filp->private_data = file;
+ kobject_get(&dev->kobj);
return nonseekable_open(inode, filp);
@@ -651,13 +660,16 @@ err_module:
module_put(dev->ib_dev->owner);
err:
- kref_put(&dev->ref, ib_uverbs_release_dev);
+ if (atomic_dec_and_test(&dev->refcount))
+ ib_uverbs_comp_dev(dev);
+
return ret;
}
static int ib_uverbs_close(struct inode *inode, struct file *filp)
{
struct ib_uverbs_file *file = filp->private_data;
+ struct ib_uverbs_device *dev = file->device;
ib_uverbs_cleanup_ucontext(file, file->ucontext);
@@ -665,6 +677,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
kref_put(&file->ref, ib_uverbs_release_file);
+ kobject_put(&dev->kobj);
return 0;
}
@@ -760,10 +773,11 @@ static void ib_uverbs_add_one(struct ib_device *device)
if (!uverbs_dev)
return;
- kref_init(&uverbs_dev->ref);
+ atomic_set(&uverbs_dev->refcount, 1);
init_completion(&uverbs_dev->comp);
uverbs_dev->xrcd_tree = RB_ROOT;
mutex_init(&uverbs_dev->xrcd_tree_mutex);
+ kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
spin_lock(&map_lock);
devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -790,6 +804,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
cdev_init(&uverbs_dev->cdev, NULL);
uverbs_dev->cdev.owner = THIS_MODULE;
uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+ uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
if (cdev_add(&uverbs_dev->cdev, base, 1))
goto err_cdev;
@@ -820,9 +835,10 @@ err_cdev:
clear_bit(devnum, overflow_map);
err:
- kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
+ if (atomic_dec_and_test(&uverbs_dev->refcount))
+ ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
- kfree(uverbs_dev);
+ kobject_put(&uverbs_dev->kobj);
return;
}
@@ -842,9 +858,10 @@ static void ib_uverbs_remove_one(struct ib_device *device)
else
clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
- kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
+ if (atomic_dec_and_test(&uverbs_dev->refcount))
+ ib_uverbs_comp_dev(uverbs_dev);
wait_for_completion(&uverbs_dev->comp);
- kfree(uverbs_dev);
+ kobject_put(&uverbs_dev->kobj);
}
static char *uverbs_devnode(struct device *dev, umode_t *mode)
--
1.9.1
next prev parent reply other threads:[~2016-04-18 10:48 UTC|newest]
Thread overview: 98+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-04-18 10:45 [PATCH 3.4 00/92] 3.4.112-rc1 review lizf
2016-04-18 10:45 ` [PATCH 3.4 01/92] rc-core: fix remove uevent generation lizf
2016-04-18 10:45 ` [PATCH 3.4 02/92] PCI: Fix TI816X class code quirk lizf
2016-04-18 10:45 ` [PATCH 3.4 03/92] mac80211: enable assoc check for mesh interfaces lizf
2016-04-18 10:45 ` [PATCH 3.4 04/92] PCI: Add dev_flags bit to access VPD through function 0 lizf
2016-04-18 10:45 ` [PATCH 3.4 05/92] PCI: Add VPD function 0 quirk for Intel Ethernet devices lizf
2016-04-18 10:45 ` [PATCH 3.4 06/92] powerpc/rtas: Introduce rtas_get_sensor_fast() for IRQ handlers lizf
2016-04-18 10:45 ` [PATCH 3.4 07/92] svcrdma: Fix send_reply() scatter/gather set-up lizf
2016-04-18 10:45 ` [PATCH 3.4 08/92] md/raid0: update queue parameter in a safer location lizf
2016-04-18 10:45 ` [PATCH 3.4 09/92] auxdisplay: ks0108: fix refcount lizf
2016-04-18 10:45 ` [PATCH 3.4 10/92] devres: fix devres_get() lizf
2016-04-18 10:45 ` [PATCH 3.4 11/92] windfarm: decrement client count when unregistering lizf
2016-04-18 10:45 ` [PATCH 3.4 12/92] NFSv4: don't set SETATTR for O_RDONLY|O_EXCL lizf
2016-04-18 10:45 ` [PATCH 3.4 13/92] usb: host: ehci-sys: delete useless bus_to_hcd conversion lizf
2016-04-18 10:45 ` [PATCH 3.4 14/92] USB: ftdi_sio: Added custom PID for CustomWare products lizf
2016-04-18 10:45 ` [PATCH 3.4 15/92] eCryptfs: Invalidate dcache entries when lower i_nlink is zero lizf
2016-04-18 10:45 ` [PATCH 3.4 16/92] DRM - radeon: Don't link train DisplayPort on HPD until we get the dpcd lizf
2016-04-18 10:45 ` [PATCH 3.4 17/92] of/address: Don't loop forever in of_find_matching_node_by_address() lizf
2016-04-18 10:45 ` [PATCH 3.4 18/92] drivercore: Fix unregistration path of platform devices lizf
2016-04-18 10:45 ` [PATCH 3.4 19/92] SUNRPC: xs_reset_transport must mark the connection as disconnected lizf
2016-04-18 10:45 ` [PATCH 3.4 20/92] IB/mlx4: Use correct SL on AH query under RoCE lizf
2016-04-18 10:45 ` lizf [this message]
2016-04-18 10:45 ` [PATCH 3.4 22/92] Add radeon suspend/resume quirk for HP Compaq dc5750 lizf
2016-04-18 10:45 ` [PATCH 3.4 23/92] IB/uverbs: reject invalid or unknown opcodes lizf
2016-04-18 10:45 ` [PATCH 3.4 24/92] hpfs: update ctime and mtime on directory modification lizf
2016-04-18 10:45 ` [PATCH 3.4 25/92] crypto: ghash-clmulni: specify context size for ghash async algorithm lizf
2016-04-18 10:45 ` [PATCH 3.4 26/92] fs: create and use seq_show_option for escaping lizf
2016-04-18 10:45 ` [PATCH 3.4 27/92] hfs,hfsplus: cache pages correctly between bnode_create and bnode_free lizf
2016-04-18 10:45 ` [PATCH 3.4 28/92] hfs: fix B-tree corruption after insertion at position 0 lizf
2016-04-18 10:45 ` [PATCH 3.4 29/92] scsi_dh: fix randconfig build error lizf
2016-04-18 10:45 ` [PATCH 3.4 30/92] ARM: 8429/1: disable GCC SRA optimization lizf
2016-04-18 10:45 ` [PATCH 3.4 31/92] powerpc/MSI: Fix race condition in tearing down MSI interrupts lizf
2016-04-18 10:45 ` [PATCH 3.4 32/92] perf header: Fixup reading of HEADER_NRCPUS feature lizf
2016-04-18 10:45 ` [PATCH 3.4 33/92] ARM: 7880/1: Clear the IT state independent of the Thumb-2 mode lizf
2016-04-18 10:45 ` [PATCH 3.4 34/92] ARM: fix Thumb2 signal handling when ARMv6 is enabled lizf
2016-04-18 10:45 ` [PATCH 3.4 35/92] x86/platform: Fix Geode LX timekeeping in the generic x86 build lizf
2016-04-18 10:45 ` [PATCH 3.4 36/92] module: Fix locking in symbol_put_addr() lizf
2016-04-18 10:45 ` [PATCH 3.4 37/92] ipv6: Fix IPsec pre-encap fragmentation check lizf
2016-04-18 10:45 ` [PATCH 3.4 38/92] ASoC: fix broken pxa SoC support lizf
2016-04-18 10:45 ` [PATCH 3.4 39/92] MIPS: dma-default: Fix 32-bit fall back to GFP_DMA lizf
2016-04-18 10:45 ` [PATCH 3.4 40/92] md/raid0: apply base queue limits *before* disk_stack_limits lizf
2016-04-18 10:45 ` [PATCH 3.4 41/92] iwlwifi: dvm: fix D3 firmware PN programming lizf
2016-04-18 10:45 ` [PATCH 3.4 42/92] sched/core: Fix TASK_DEAD race in finish_task_switch() lizf
2016-04-18 10:45 ` [PATCH 3.4 43/92] IB/cm: Fix rb-tree duplicate free and use-after-free lizf
2016-04-18 10:45 ` [PATCH 3.4 44/92] powerpc/rtas: Validate rtas.entry before calling enter_rtas() lizf
2016-04-18 10:45 ` [PATCH 3.4 45/92] md/raid10: ensure device failure recorded before write request returns lizf
2016-04-18 10:45 ` [PATCH 3.4 46/92] md/raid10: don't clear bitmap bit when bad-block-list write fails lizf
2016-04-18 10:45 ` [PATCH 3.4 47/92] md/raid1: ensure device failure recorded before write request returns lizf
2016-04-18 10:45 ` [PATCH 3.4 48/92] md/raid1: don't clear bitmap bit when bad-block-list write fails lizf
2016-04-18 10:45 ` [PATCH 3.4 49/92] drm: crtc: integer overflow in drm_property_create_blob() lizf
2016-04-18 10:45 ` [PATCH 3.4 50/92] spi: spi-pxa2xx: Check status register to determine if SSSR_TINT is disabled lizf
2016-04-18 10:45 ` [PATCH 3.4 51/92] spi: Fix documentation of spi_alloc_master() lizf
2016-04-18 10:45 ` [PATCH 3.4 52/92] btrfs: skip waiting on ordered range for special files lizf
2016-04-18 10:45 ` [PATCH 3.4 53/92] regmap: debugfs: Ensure we don't underflow when printing access masks lizf
2016-04-18 10:45 ` [PATCH 3.4 54/92] regmap: debugfs: Don't bother actually printing when calculating max length lizf
2016-04-18 10:46 ` [PATCH 3.4 55/92] KVM: x86: trap AMD MSRs for the TSeg base and mask lizf
2016-04-18 10:46 ` [PATCH 3.4 56/92] usb: Use the USB_SS_MULT() macro to get the burst multiplier lizf
2016-04-18 10:46 ` [PATCH 3.4 57/92] xhci: give command abortion one more chance before killing xhci lizf
2016-04-18 10:46 ` [PATCH 3.4 58/92] usb: xhci: Clear XHCI_STATE_DYING on start lizf
2016-04-18 10:46 ` [PATCH 3.4 59/92] xhci: change xhci 1.0 only restrictions to support xhci 1.1 lizf
2016-04-18 10:46 ` [PATCH 3.4 60/92] cifs: use server timestamp for ntlmv2 authentication lizf
2016-04-18 10:46 ` [PATCH 3.4 61/92] ocfs2/dlm: fix deadlock when dispatch assert master lizf
2016-04-18 11:29 ` Joseph Qi
2016-04-19 0:18 ` Zefan Li
2016-04-18 10:46 ` [PATCH 3.4 62/92] ath9k: declare required extra tx headroom lizf
2016-04-18 10:46 ` [PATCH 3.4 63/92] m68k: Define asmlinkage_protect lizf
2016-04-18 10:46 ` [PATCH 3.4 64/92] x86/xen: Do not clip xen_e820_map to xen_e820_map_entries when sanitizing map lizf
2016-04-18 10:46 ` [PATCH 3.4 65/92] UBI: Validate data_size lizf
2016-04-18 10:46 ` [PATCH 3.4 66/92] UBI: return ENOSPC if no enough space available lizf
2016-04-18 10:46 ` [PATCH 3.4 67/92] x86/process: Add proper bound checks in 64bit get_wchan() lizf
2016-04-18 10:46 ` [PATCH 3.4 68/92] genirq: Fix race in register_irq_proc() lizf
2016-04-18 10:46 ` [PATCH 3.4 69/92] mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault lizf
2016-04-18 10:46 ` [PATCH 3.4 70/92] clocksource: Fix abs() usage w/ 64bit values lizf
2016-04-18 10:46 ` [PATCH 3.4 71/92] USB: Add reset-resume quirk for two Plantronics usb headphones lizf
2016-04-18 10:46 ` [PATCH 3.4 72/92] usb: Add device quirk for Logitech PTZ cameras lizf
2016-04-18 10:46 ` [PATCH 3.4 73/92] tty: fix stall caused by missing memory barrier in drivers/tty/n_tty.c lizf
2016-04-18 10:46 ` [PATCH 3.4 74/92] drivers/tty: require read access for controlling terminal lizf
2016-04-18 10:46 ` [PATCH 3.4 75/92] ALSA: synth: Fix conflicting OSS device registration on AWE32 lizf
2016-04-18 10:46 ` [PATCH 3.4 76/92] xen-blkfront: check for null drvdata in blkback_changed (XenbusStateClosing) lizf
2016-04-18 10:46 ` [PATCH 3.4 77/92] crypto: ahash - ensure statesize is non-zero lizf
2016-04-18 10:46 ` [PATCH 3.4 78/92] iommu/vt-d: fix range computation when making room for large pages lizf
2016-04-18 10:46 ` [PATCH 3.4 79/92] xhci: handle no ping response error properly lizf
2016-04-18 10:46 ` [PATCH 3.4 80/92] xhci: Add spurious wakeup quirk for LynxPoint-LP controllers lizf
2016-04-18 10:46 ` [PATCH 3.4 81/92] crypto: api - Only abort operations on fatal signal lizf
2016-04-18 10:46 ` [PATCH 3.4 82/92] ASoC: wm8904: Correct number of EQ registers lizf
2016-04-18 10:46 ` [PATCH 3.4 83/92] iommu/amd: Don't clear DTE flags when modifying it lizf
2016-04-18 10:46 ` [PATCH 3.4 84/92] drm/nouveau/gem: return only valid domain when there's only one lizf
2016-04-18 10:46 ` [PATCH 3.4 85/92] mm: make sendfile(2) killable lizf
2016-04-18 10:46 ` [PATCH 3.4 86/92] dm btree: fix leak of bufio-backed block in btree_split_beneath error path lizf
2016-04-18 10:46 ` [PATCH 3.4 87/92] mvsas: Fix NULL pointer dereference in mvs_slot_task_free lizf
2016-04-18 10:46 ` [PATCH 3.4 88/92] raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang lizf
2016-04-18 10:46 ` [PATCH 3.4 89/92] usb: Use the USB_SS_MULT() macro to decode burst multiplier for log message lizf
2016-04-18 10:46 ` [PATCH 3.4 90/92] pipe: Fix buffer offset after partially failed read lizf
2016-04-18 10:46 ` [PATCH 3.4 91/92] splice: sendfile() at once fails for big files lizf
2016-04-18 10:57 ` [PATCH 3.4 92/92] x86/iopl/64: Properly context-switch IOPL on Xen PV lizf
2016-04-18 16:37 ` [PATCH 3.4 00/92] 3.4.112-rc1 review Guenter Roeck
2016-04-19 0:18 ` Zefan Li
2016-04-22 16:48 ` Christoph Biedl
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1460976397-5688-21-git-send-email-lizf@kernel.org \
--to=lizf@kernel.org \
--cc=dledford@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=lizefan@huawei.com \
--cc=raindel@mellanox.com \
--cc=stable@vger.kernel.org \
--cc=yishaih@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).