stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: lizf@kernel.org
To: stable@vger.kernel.org
Cc: linux-kernel@vger.kernel.org, Yishai Hadas <yishaih@mellanox.com>,
	Shachar Raindel <raindel@mellanox.com>,
	Doug Ledford <dledford@redhat.com>, Zefan Li <lizefan@huawei.com>
Subject: [PATCH 3.4 21/92] IB/uverbs: Fix race between ib_uverbs_open and remove_one
Date: Mon, 18 Apr 2016 18:45:26 +0800	[thread overview]
Message-ID: <1460976397-5688-21-git-send-email-lizf@kernel.org> (raw)
In-Reply-To: <1460976338-5631-1-git-send-email-lizf@kernel.org>

From: Yishai Hadas <yishaih@mellanox.com>

3.4.112-rc1 review patch.  If anyone has any objections, please let me know.

------------------


commit 35d4a0b63dc0c6d1177d4f532a9deae958f0662c upstream.

Fixes: 2a72f212263701b927559f6850446421d5906c41 ("IB/uverbs: Remove dev_table")

Before this commit there was a device look-up table that was protected
by a spin_lock used by ib_uverbs_open and by ib_uverbs_remove_one. When
it was dropped and container_of was used instead, it enabled the race
with remove_one as dev might be freed just after:
dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev) but
before the kref_get.

In addition, this buggy patch added some dead code as
container_of(x,y,z) can never be NULL and so dev can never be NULL.
As a result the comment above ib_uverbs_open saying "the open method
will either immediately run -ENXIO" is wrong as it can never happen.

The solution follows Jason Gunthorpe suggestion from below URL:
https://www.mail-archive.com/linux-rdma@vger.kernel.org/msg25692.html

cdev will hold a kref on the parent (the containing structure,
ib_uverbs_device) and only when that kref is released it is
guaranteed that open will never be called again.

In addition, fixes the active count scheme to use an atomic
not a kref to prevent WARN_ON as pointed by above comment
from Jason.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Reviewed-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Signed-off-by: Zefan Li <lizefan@huawei.com>
---
 drivers/infiniband/core/uverbs.h      |  3 ++-
 drivers/infiniband/core/uverbs_main.c | 43 ++++++++++++++++++++++++-----------
 2 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 5bcb2af..228af18 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -69,7 +69,7 @@
  */
 
 struct ib_uverbs_device {
-	struct kref				ref;
+	atomic_t				refcount;
 	int					num_comp_vectors;
 	struct completion			comp;
 	struct device			       *dev;
@@ -78,6 +78,7 @@ struct ib_uverbs_device {
 	struct cdev			        cdev;
 	struct rb_root				xrcd_tree;
 	struct mutex				xrcd_tree_mutex;
+	struct kobject				kobj;
 };
 
 struct ib_uverbs_event_file {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 5b51e4e..c8e7669 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -117,14 +117,18 @@ static ssize_t (*uverbs_cmd_table[])(struct ib_uverbs_file *file,
 static void ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device);
 
-static void ib_uverbs_release_dev(struct kref *ref)
+static void ib_uverbs_release_dev(struct kobject *kobj)
 {
 	struct ib_uverbs_device *dev =
-		container_of(ref, struct ib_uverbs_device, ref);
+		container_of(kobj, struct ib_uverbs_device, kobj);
 
-	complete(&dev->comp);
+	kfree(dev);
 }
 
+static struct kobj_type ib_uverbs_dev_ktype = {
+	.release = ib_uverbs_release_dev,
+};
+
 static void ib_uverbs_release_event_file(struct kref *ref)
 {
 	struct ib_uverbs_event_file *file =
@@ -273,13 +277,19 @@ static int ib_uverbs_cleanup_ucontext(struct ib_uverbs_file *file,
 	return context->device->dealloc_ucontext(context);
 }
 
+static void ib_uverbs_comp_dev(struct ib_uverbs_device *dev)
+{
+	complete(&dev->comp);
+}
+
 static void ib_uverbs_release_file(struct kref *ref)
 {
 	struct ib_uverbs_file *file =
 		container_of(ref, struct ib_uverbs_file, ref);
 
 	module_put(file->device->ib_dev->owner);
-	kref_put(&file->device->ref, ib_uverbs_release_dev);
+	if (atomic_dec_and_test(&file->device->refcount))
+		ib_uverbs_comp_dev(file->device);
 
 	kfree(file);
 }
@@ -621,9 +631,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	int ret;
 
 	dev = container_of(inode->i_cdev, struct ib_uverbs_device, cdev);
-	if (dev)
-		kref_get(&dev->ref);
-	else
+	if (!atomic_inc_not_zero(&dev->refcount))
 		return -ENXIO;
 
 	if (!try_module_get(dev->ib_dev->owner)) {
@@ -644,6 +652,7 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	mutex_init(&file->mutex);
 
 	filp->private_data = file;
+	kobject_get(&dev->kobj);
 
 	return nonseekable_open(inode, filp);
 
@@ -651,13 +660,16 @@ err_module:
 	module_put(dev->ib_dev->owner);
 
 err:
-	kref_put(&dev->ref, ib_uverbs_release_dev);
+	if (atomic_dec_and_test(&dev->refcount))
+		ib_uverbs_comp_dev(dev);
+
 	return ret;
 }
 
 static int ib_uverbs_close(struct inode *inode, struct file *filp)
 {
 	struct ib_uverbs_file *file = filp->private_data;
+	struct ib_uverbs_device *dev = file->device;
 
 	ib_uverbs_cleanup_ucontext(file, file->ucontext);
 
@@ -665,6 +677,7 @@ static int ib_uverbs_close(struct inode *inode, struct file *filp)
 		kref_put(&file->async_file->ref, ib_uverbs_release_event_file);
 
 	kref_put(&file->ref, ib_uverbs_release_file);
+	kobject_put(&dev->kobj);
 
 	return 0;
 }
@@ -760,10 +773,11 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	if (!uverbs_dev)
 		return;
 
-	kref_init(&uverbs_dev->ref);
+	atomic_set(&uverbs_dev->refcount, 1);
 	init_completion(&uverbs_dev->comp);
 	uverbs_dev->xrcd_tree = RB_ROOT;
 	mutex_init(&uverbs_dev->xrcd_tree_mutex);
+	kobject_init(&uverbs_dev->kobj, &ib_uverbs_dev_ktype);
 
 	spin_lock(&map_lock);
 	devnum = find_first_zero_bit(dev_map, IB_UVERBS_MAX_DEVICES);
@@ -790,6 +804,7 @@ static void ib_uverbs_add_one(struct ib_device *device)
 	cdev_init(&uverbs_dev->cdev, NULL);
 	uverbs_dev->cdev.owner = THIS_MODULE;
 	uverbs_dev->cdev.ops = device->mmap ? &uverbs_mmap_fops : &uverbs_fops;
+	uverbs_dev->cdev.kobj.parent = &uverbs_dev->kobj;
 	kobject_set_name(&uverbs_dev->cdev.kobj, "uverbs%d", uverbs_dev->devnum);
 	if (cdev_add(&uverbs_dev->cdev, base, 1))
 		goto err_cdev;
@@ -820,9 +835,10 @@ err_cdev:
 		clear_bit(devnum, overflow_map);
 
 err:
-	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
+	if (atomic_dec_and_test(&uverbs_dev->refcount))
+		ib_uverbs_comp_dev(uverbs_dev);
 	wait_for_completion(&uverbs_dev->comp);
-	kfree(uverbs_dev);
+	kobject_put(&uverbs_dev->kobj);
 	return;
 }
 
@@ -842,9 +858,10 @@ static void ib_uverbs_remove_one(struct ib_device *device)
 	else
 		clear_bit(uverbs_dev->devnum - IB_UVERBS_MAX_DEVICES, overflow_map);
 
-	kref_put(&uverbs_dev->ref, ib_uverbs_release_dev);
+	if (atomic_dec_and_test(&uverbs_dev->refcount))
+		ib_uverbs_comp_dev(uverbs_dev);
 	wait_for_completion(&uverbs_dev->comp);
-	kfree(uverbs_dev);
+	kobject_put(&uverbs_dev->kobj);
 }
 
 static char *uverbs_devnode(struct device *dev, umode_t *mode)
-- 
1.9.1


  parent reply	other threads:[~2016-04-18 10:48 UTC|newest]

Thread overview: 98+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-18 10:45 [PATCH 3.4 00/92] 3.4.112-rc1 review lizf
2016-04-18 10:45 ` [PATCH 3.4 01/92] rc-core: fix remove uevent generation lizf
2016-04-18 10:45 ` [PATCH 3.4 02/92] PCI: Fix TI816X class code quirk lizf
2016-04-18 10:45 ` [PATCH 3.4 03/92] mac80211: enable assoc check for mesh interfaces lizf
2016-04-18 10:45 ` [PATCH 3.4 04/92] PCI: Add dev_flags bit to access VPD through function 0 lizf
2016-04-18 10:45 ` [PATCH 3.4 05/92] PCI: Add VPD function 0 quirk for Intel Ethernet devices lizf
2016-04-18 10:45 ` [PATCH 3.4 06/92] powerpc/rtas: Introduce rtas_get_sensor_fast() for IRQ handlers lizf
2016-04-18 10:45 ` [PATCH 3.4 07/92] svcrdma: Fix send_reply() scatter/gather set-up lizf
2016-04-18 10:45 ` [PATCH 3.4 08/92] md/raid0: update queue parameter in a safer location lizf
2016-04-18 10:45 ` [PATCH 3.4 09/92] auxdisplay: ks0108: fix refcount lizf
2016-04-18 10:45 ` [PATCH 3.4 10/92] devres: fix devres_get() lizf
2016-04-18 10:45 ` [PATCH 3.4 11/92] windfarm: decrement client count when unregistering lizf
2016-04-18 10:45 ` [PATCH 3.4 12/92] NFSv4: don't set SETATTR for O_RDONLY|O_EXCL lizf
2016-04-18 10:45 ` [PATCH 3.4 13/92] usb: host: ehci-sys: delete useless bus_to_hcd conversion lizf
2016-04-18 10:45 ` [PATCH 3.4 14/92] USB: ftdi_sio: Added custom PID for CustomWare products lizf
2016-04-18 10:45 ` [PATCH 3.4 15/92] eCryptfs: Invalidate dcache entries when lower i_nlink is zero lizf
2016-04-18 10:45 ` [PATCH 3.4 16/92] DRM - radeon: Don't link train DisplayPort on HPD until we get the dpcd lizf
2016-04-18 10:45 ` [PATCH 3.4 17/92] of/address: Don't loop forever in of_find_matching_node_by_address() lizf
2016-04-18 10:45 ` [PATCH 3.4 18/92] drivercore: Fix unregistration path of platform devices lizf
2016-04-18 10:45 ` [PATCH 3.4 19/92] SUNRPC: xs_reset_transport must mark the connection as disconnected lizf
2016-04-18 10:45 ` [PATCH 3.4 20/92] IB/mlx4: Use correct SL on AH query under RoCE lizf
2016-04-18 10:45 ` lizf [this message]
2016-04-18 10:45 ` [PATCH 3.4 22/92] Add radeon suspend/resume quirk for HP Compaq dc5750 lizf
2016-04-18 10:45 ` [PATCH 3.4 23/92] IB/uverbs: reject invalid or unknown opcodes lizf
2016-04-18 10:45 ` [PATCH 3.4 24/92] hpfs: update ctime and mtime on directory modification lizf
2016-04-18 10:45 ` [PATCH 3.4 25/92] crypto: ghash-clmulni: specify context size for ghash async algorithm lizf
2016-04-18 10:45 ` [PATCH 3.4 26/92] fs: create and use seq_show_option for escaping lizf
2016-04-18 10:45 ` [PATCH 3.4 27/92] hfs,hfsplus: cache pages correctly between bnode_create and bnode_free lizf
2016-04-18 10:45 ` [PATCH 3.4 28/92] hfs: fix B-tree corruption after insertion at position 0 lizf
2016-04-18 10:45 ` [PATCH 3.4 29/92] scsi_dh: fix randconfig build error lizf
2016-04-18 10:45 ` [PATCH 3.4 30/92] ARM: 8429/1: disable GCC SRA optimization lizf
2016-04-18 10:45 ` [PATCH 3.4 31/92] powerpc/MSI: Fix race condition in tearing down MSI interrupts lizf
2016-04-18 10:45 ` [PATCH 3.4 32/92] perf header: Fixup reading of HEADER_NRCPUS feature lizf
2016-04-18 10:45 ` [PATCH 3.4 33/92] ARM: 7880/1: Clear the IT state independent of the Thumb-2 mode lizf
2016-04-18 10:45 ` [PATCH 3.4 34/92] ARM: fix Thumb2 signal handling when ARMv6 is enabled lizf
2016-04-18 10:45 ` [PATCH 3.4 35/92] x86/platform: Fix Geode LX timekeeping in the generic x86 build lizf
2016-04-18 10:45 ` [PATCH 3.4 36/92] module: Fix locking in symbol_put_addr() lizf
2016-04-18 10:45 ` [PATCH 3.4 37/92] ipv6: Fix IPsec pre-encap fragmentation check lizf
2016-04-18 10:45 ` [PATCH 3.4 38/92] ASoC: fix broken pxa SoC support lizf
2016-04-18 10:45 ` [PATCH 3.4 39/92] MIPS: dma-default: Fix 32-bit fall back to GFP_DMA lizf
2016-04-18 10:45 ` [PATCH 3.4 40/92] md/raid0: apply base queue limits *before* disk_stack_limits lizf
2016-04-18 10:45 ` [PATCH 3.4 41/92] iwlwifi: dvm: fix D3 firmware PN programming lizf
2016-04-18 10:45 ` [PATCH 3.4 42/92] sched/core: Fix TASK_DEAD race in finish_task_switch() lizf
2016-04-18 10:45 ` [PATCH 3.4 43/92] IB/cm: Fix rb-tree duplicate free and use-after-free lizf
2016-04-18 10:45 ` [PATCH 3.4 44/92] powerpc/rtas: Validate rtas.entry before calling enter_rtas() lizf
2016-04-18 10:45 ` [PATCH 3.4 45/92] md/raid10: ensure device failure recorded before write request returns lizf
2016-04-18 10:45 ` [PATCH 3.4 46/92] md/raid10: don't clear bitmap bit when bad-block-list write fails lizf
2016-04-18 10:45 ` [PATCH 3.4 47/92] md/raid1: ensure device failure recorded before write request returns lizf
2016-04-18 10:45 ` [PATCH 3.4 48/92] md/raid1: don't clear bitmap bit when bad-block-list write fails lizf
2016-04-18 10:45 ` [PATCH 3.4 49/92] drm: crtc: integer overflow in drm_property_create_blob() lizf
2016-04-18 10:45 ` [PATCH 3.4 50/92] spi: spi-pxa2xx: Check status register to determine if SSSR_TINT is disabled lizf
2016-04-18 10:45 ` [PATCH 3.4 51/92] spi: Fix documentation of spi_alloc_master() lizf
2016-04-18 10:45 ` [PATCH 3.4 52/92] btrfs: skip waiting on ordered range for special files lizf
2016-04-18 10:45 ` [PATCH 3.4 53/92] regmap: debugfs: Ensure we don't underflow when printing access masks lizf
2016-04-18 10:45 ` [PATCH 3.4 54/92] regmap: debugfs: Don't bother actually printing when calculating max length lizf
2016-04-18 10:46 ` [PATCH 3.4 55/92] KVM: x86: trap AMD MSRs for the TSeg base and mask lizf
2016-04-18 10:46 ` [PATCH 3.4 56/92] usb: Use the USB_SS_MULT() macro to get the burst multiplier lizf
2016-04-18 10:46 ` [PATCH 3.4 57/92] xhci: give command abortion one more chance before killing xhci lizf
2016-04-18 10:46 ` [PATCH 3.4 58/92] usb: xhci: Clear XHCI_STATE_DYING on start lizf
2016-04-18 10:46 ` [PATCH 3.4 59/92] xhci: change xhci 1.0 only restrictions to support xhci 1.1 lizf
2016-04-18 10:46 ` [PATCH 3.4 60/92] cifs: use server timestamp for ntlmv2 authentication lizf
2016-04-18 10:46 ` [PATCH 3.4 61/92] ocfs2/dlm: fix deadlock when dispatch assert master lizf
2016-04-18 11:29   ` Joseph Qi
2016-04-19  0:18     ` Zefan Li
2016-04-18 10:46 ` [PATCH 3.4 62/92] ath9k: declare required extra tx headroom lizf
2016-04-18 10:46 ` [PATCH 3.4 63/92] m68k: Define asmlinkage_protect lizf
2016-04-18 10:46 ` [PATCH 3.4 64/92] x86/xen: Do not clip xen_e820_map to xen_e820_map_entries when sanitizing map lizf
2016-04-18 10:46 ` [PATCH 3.4 65/92] UBI: Validate data_size lizf
2016-04-18 10:46 ` [PATCH 3.4 66/92] UBI: return ENOSPC if no enough space available lizf
2016-04-18 10:46 ` [PATCH 3.4 67/92] x86/process: Add proper bound checks in 64bit get_wchan() lizf
2016-04-18 10:46 ` [PATCH 3.4 68/92] genirq: Fix race in register_irq_proc() lizf
2016-04-18 10:46 ` [PATCH 3.4 69/92] mm: hugetlbfs: skip shared VMAs when unmapping private pages to satisfy a fault lizf
2016-04-18 10:46 ` [PATCH 3.4 70/92] clocksource: Fix abs() usage w/ 64bit values lizf
2016-04-18 10:46 ` [PATCH 3.4 71/92] USB: Add reset-resume quirk for two Plantronics usb headphones lizf
2016-04-18 10:46 ` [PATCH 3.4 72/92] usb: Add device quirk for Logitech PTZ cameras lizf
2016-04-18 10:46 ` [PATCH 3.4 73/92] tty: fix stall caused by missing memory barrier in drivers/tty/n_tty.c lizf
2016-04-18 10:46 ` [PATCH 3.4 74/92] drivers/tty: require read access for controlling terminal lizf
2016-04-18 10:46 ` [PATCH 3.4 75/92] ALSA: synth: Fix conflicting OSS device registration on AWE32 lizf
2016-04-18 10:46 ` [PATCH 3.4 76/92] xen-blkfront: check for null drvdata in blkback_changed (XenbusStateClosing) lizf
2016-04-18 10:46 ` [PATCH 3.4 77/92] crypto: ahash - ensure statesize is non-zero lizf
2016-04-18 10:46 ` [PATCH 3.4 78/92] iommu/vt-d: fix range computation when making room for large pages lizf
2016-04-18 10:46 ` [PATCH 3.4 79/92] xhci: handle no ping response error properly lizf
2016-04-18 10:46 ` [PATCH 3.4 80/92] xhci: Add spurious wakeup quirk for LynxPoint-LP controllers lizf
2016-04-18 10:46 ` [PATCH 3.4 81/92] crypto: api - Only abort operations on fatal signal lizf
2016-04-18 10:46 ` [PATCH 3.4 82/92] ASoC: wm8904: Correct number of EQ registers lizf
2016-04-18 10:46 ` [PATCH 3.4 83/92] iommu/amd: Don't clear DTE flags when modifying it lizf
2016-04-18 10:46 ` [PATCH 3.4 84/92] drm/nouveau/gem: return only valid domain when there's only one lizf
2016-04-18 10:46 ` [PATCH 3.4 85/92] mm: make sendfile(2) killable lizf
2016-04-18 10:46 ` [PATCH 3.4 86/92] dm btree: fix leak of bufio-backed block in btree_split_beneath error path lizf
2016-04-18 10:46 ` [PATCH 3.4 87/92] mvsas: Fix NULL pointer dereference in mvs_slot_task_free lizf
2016-04-18 10:46 ` [PATCH 3.4 88/92] raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang lizf
2016-04-18 10:46 ` [PATCH 3.4 89/92] usb: Use the USB_SS_MULT() macro to decode burst multiplier for log message lizf
2016-04-18 10:46 ` [PATCH 3.4 90/92] pipe: Fix buffer offset after partially failed read lizf
2016-04-18 10:46 ` [PATCH 3.4 91/92] splice: sendfile() at once fails for big files lizf
2016-04-18 10:57 ` [PATCH 3.4 92/92] x86/iopl/64: Properly context-switch IOPL on Xen PV lizf
2016-04-18 16:37 ` [PATCH 3.4 00/92] 3.4.112-rc1 review Guenter Roeck
2016-04-19  0:18   ` Zefan Li
2016-04-22 16:48 ` Christoph Biedl

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1460976397-5688-21-git-send-email-lizf@kernel.org \
    --to=lizf@kernel.org \
    --cc=dledford@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizefan@huawei.com \
    --cc=raindel@mellanox.com \
    --cc=stable@vger.kernel.org \
    --cc=yishaih@mellanox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).