stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Sasha Levin <sashal@kernel.org>
To: linux-kernel@vger.kernel.org, stable@vger.kernel.org
Cc: Peter Zijlstra <peterz@infradead.org>,
	Jann Horn <jannh@google.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Sasha Levin <sashal@kernel.org>,
	linux-fsdevel@vger.kernel.org
Subject: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue
Date: Wed, 18 Mar 2020 16:52:54 -0400	[thread overview]
Message-ID: <20200318205337.16279-30-sashal@kernel.org> (raw)
In-Reply-To: <20200318205337.16279-1-sashal@kernel.org>

From: Peter Zijlstra <peterz@infradead.org>

[ Upstream commit 8019ad13ef7f64be44d4f892af9c840179009254 ]

As reported by Jann, ihold() does not in fact guarantee inode
persistence. And instead of making it so, replace the usage of inode
pointers with a per boot, machine wide, unique inode identifier.

This sequence number is global, but shared (file backed) futexes are
rare enough that this should not become a performance issue.

Reported-by: Jann Horn <jannh@google.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/inode.c            |  1 +
 include/linux/fs.h    |  1 +
 include/linux/futex.h | 17 +++++----
 kernel/futex.c        | 89 ++++++++++++++++++++++++++-----------------
 4 files changed, 65 insertions(+), 43 deletions(-)

diff --git a/fs/inode.c b/fs/inode.c
index 96d62d97694ef..c5267a4db0f5e 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -137,6 +137,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_sb = sb;
 	inode->i_blkbits = sb->s_blocksize_bits;
 	inode->i_flags = 0;
+	atomic64_set(&inode->i_sequence, 0);
 	atomic_set(&inode->i_count, 1);
 	inode->i_op = &empty_iops;
 	inode->i_fop = &no_open_fops;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 0b4d8fc79e0f3..06668379109e3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -698,6 +698,7 @@ struct inode {
 		struct rcu_head		i_rcu;
 	};
 	atomic64_t		i_version;
+	atomic64_t		i_sequence; /* see futex */
 	atomic_t		i_count;
 	atomic_t		i_dio_count;
 	atomic_t		i_writecount;
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 5cc3fed27d4c2..b70df27d7e85c 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -31,23 +31,26 @@ struct task_struct;
 
 union futex_key {
 	struct {
+		u64 i_seq;
 		unsigned long pgoff;
-		struct inode *inode;
-		int offset;
+		unsigned int offset;
 	} shared;
 	struct {
+		union {
+			struct mm_struct *mm;
+			u64 __tmp;
+		};
 		unsigned long address;
-		struct mm_struct *mm;
-		int offset;
+		unsigned int offset;
 	} private;
 	struct {
+		u64 ptr;
 		unsigned long word;
-		void *ptr;
-		int offset;
+		unsigned int offset;
 	} both;
 };
 
-#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
+#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = 0ULL } }
 
 #ifdef CONFIG_FUTEX
 enum {
diff --git a/kernel/futex.c b/kernel/futex.c
index afbf928d6a6b0..07ab324885ac0 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -429,7 +429,7 @@ static void get_futex_key_refs(union futex_key *key)
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		ihold(key->shared.inode); /* implies smp_mb(); (B) */
+		smp_mb();		/* explicit smp_mb(); (B) */
 		break;
 	case FUT_OFF_MMSHARED:
 		futex_get_mm(key); /* implies smp_mb(); (B) */
@@ -463,7 +463,6 @@ static void drop_futex_key_refs(union futex_key *key)
 
 	switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
 	case FUT_OFF_INODE:
-		iput(key->shared.inode);
 		break;
 	case FUT_OFF_MMSHARED:
 		mmdrop(key->private.mm);
@@ -505,6 +504,46 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
 	return timeout;
 }
 
+/*
+ * Generate a machine wide unique identifier for this inode.
+ *
+ * This relies on u64 not wrapping in the life-time of the machine; which with
+ * 1ns resolution means almost 585 years.
+ *
+ * This further relies on the fact that a well formed program will not unmap
+ * the file while it has a (shared) futex waiting on it. This mapping will have
+ * a file reference which pins the mount and inode.
+ *
+ * If for some reason an inode gets evicted and read back in again, it will get
+ * a new sequence number and will _NOT_ match, even though it is the exact same
+ * file.
+ *
+ * It is important that match_futex() will never have a false-positive, esp.
+ * for PI futexes that can mess up the state. The above argues that false-negatives
+ * are only possible for malformed programs.
+ */
+static u64 get_inode_sequence_number(struct inode *inode)
+{
+	static atomic64_t i_seq;
+	u64 old;
+
+	/* Does the inode already have a sequence number? */
+	old = atomic64_read(&inode->i_sequence);
+	if (likely(old))
+		return old;
+
+	for (;;) {
+		u64 new = atomic64_add_return(1, &i_seq);
+		if (WARN_ON_ONCE(!new))
+			continue;
+
+		old = atomic64_cmpxchg_relaxed(&inode->i_sequence, 0, new);
+		if (old)
+			return old;
+		return new;
+	}
+}
+
 /**
  * get_futex_key() - Get parameters which are the keys for a futex
  * @uaddr:	virtual address of the futex
@@ -517,9 +556,15 @@ futex_setup_timer(ktime_t *time, struct hrtimer_sleeper *timeout,
  *
  * The key words are stored in @key on success.
  *
- * For shared mappings, it's (page->index, file_inode(vma->vm_file),
- * offset_within_page).  For private mappings, it's (uaddr, current->mm).
- * We can usually work out the index without swapping in the page.
+ * For shared mappings (when @fshared), the key is:
+ *   ( inode->i_sequence, page->index, offset_within_page )
+ * [ also see get_inode_sequence_number() ]
+ *
+ * For private mappings (or when !@fshared), the key is:
+ *   ( current->mm, address, 0 )
+ *
+ * This allows (cross process, where applicable) identification of the futex
+ * without keeping the page pinned for the duration of the FUTEX_WAIT.
  *
  * lock_page() might sleep, the caller should not hold a spinlock.
  */
@@ -659,8 +704,6 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 		key->private.mm = mm;
 		key->private.address = address;
 
-		get_futex_key_refs(key); /* implies smp_mb(); (B) */
-
 	} else {
 		struct inode *inode;
 
@@ -692,40 +735,14 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, enum futex_a
 			goto again;
 		}
 
-		/*
-		 * Take a reference unless it is about to be freed. Previously
-		 * this reference was taken by ihold under the page lock
-		 * pinning the inode in place so i_lock was unnecessary. The
-		 * only way for this check to fail is if the inode was
-		 * truncated in parallel which is almost certainly an
-		 * application bug. In such a case, just retry.
-		 *
-		 * We are not calling into get_futex_key_refs() in file-backed
-		 * cases, therefore a successful atomic_inc return below will
-		 * guarantee that get_futex_key() will still imply smp_mb(); (B).
-		 */
-		if (!atomic_inc_not_zero(&inode->i_count)) {
-			rcu_read_unlock();
-			put_page(page);
-
-			goto again;
-		}
-
-		/* Should be impossible but lets be paranoid for now */
-		if (WARN_ON_ONCE(inode->i_mapping != mapping)) {
-			err = -EFAULT;
-			rcu_read_unlock();
-			iput(inode);
-
-			goto out;
-		}
-
 		key->both.offset |= FUT_OFF_INODE; /* inode-based key */
-		key->shared.inode = inode;
+		key->shared.i_seq = get_inode_sequence_number(inode);
 		key->shared.pgoff = basepage_index(tail);
 		rcu_read_unlock();
 	}
 
+	get_futex_key_refs(key); /* implies smp_mb(); (B) */
+
 out:
 	put_page(page);
 	return err;
-- 
2.20.1


  parent reply	other threads:[~2020-03-18 20:54 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-03-18 20:52 [PATCH AUTOSEL 5.4 01/73] cgroup-v1: cgroup_pidlist_next should update position index Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 02/73] cgroup: Iterate tasks that did not finish do_exit() Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 03/73] clk: imx8mn: Fix incorrect clock defines Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 04/73] batman-adv: Don't schedule OGM for disabled interface Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 05/73] pinctrl: meson-gxl: fix GPIOX sdio pins Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 06/73] pinctrl: imx: scu: Align imx sc msg structs to 4 Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 07/73] nfs: add minor version to nfs_server_key for fscache Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 08/73] pinctrl: core: Remove extra kref_get which blocks hogs being freed Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 09/73] r8152: check disconnect status after long sleep Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 10/73] net: dsa: mv88e6xxx: fix lockup on warm boot Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 11/73] net: phy: avoid clearing PHY interrupts twice in irq handler Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 12/73] bnxt_en: reinitialize IRQs when MTU is modified Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 13/73] bnxt_en: fix error handling when flashing from file Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 14/73] cpupower: avoid multiple definition with gcc -fno-common Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 15/73] fib: add missing attribute validation for tun_id Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 16/73] can: add missing attribute validation for termination Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 17/73] macsec: add missing attribute validation for port Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 18/73] team: add missing attribute validation for port ifindex Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 19/73] team: add missing attribute validation for array index Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 20/73] netfilter: cthelper: add missing attribute validation for cthelper Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 21/73] netfilter: nft_payload: add missing attribute validation for payload csum flags Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 22/73] netfilter: nft_tunnel: add missing attribute validation for tunnels Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 23/73] net: phy: bcm63xx: fix OOPS due to missing driver name Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 24/73] drivers/of/of_mdio.c:fix of_mdiobus_register() Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 25/73] cgroup1: don't call release_agent when it is "" Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 26/73] netfilter: nf_tables: dump NFTA_CHAIN_FLAGS attribute Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 27/73] netfilter: nf_tables: fix infinite loop when expr is not available Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 28/73] slip: make slhc_compress() more robust against malicious packets Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 29/73] net: hns3: fix a not link up issue when fibre port supports autoneg Sasha Levin
2020-03-18 20:52 ` Sasha Levin [this message]
2020-03-23 19:18   ` [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue Jann Horn
2020-03-24  8:06     ` Greg Kroah-Hartman
2020-04-08  9:48     ` backport request for 3.16 [was: Re: [PATCH AUTOSEL 5.4 30/73] futex: Fix inode life-time issue] Jann Horn
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 31/73] netfilter: nft_chain_nat: inet family is missing module ownership Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 32/73] dt-bindings: net: FMan erratum A050385 Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 33/73] arm64: dts: ls1043a: " Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 34/73] fsl/fman: detect " Sasha Levin
2020-03-18 20:52 ` [PATCH AUTOSEL 5.4 35/73] bonding/alb: make sure arp header is pulled before accessing it Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 36/73] virtio_ring: Fix mem leak with vring_new_virtqueue() Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 37/73] virtio-blk: fix hw_queue stopped on arbitrary error Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 38/73] virtio_balloon: Adjust label in virtballoon_probe Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 39/73] ipvlan: do not add hardware address of master to its unicast filter list Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 40/73] net: stmmac: dwmac1000: Disable ACS if enhanced descs are not used Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 41/73] drm/amd/display: update soc bb for nv14 Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 42/73] drm/amdgpu: correct ROM_INDEX/DATA offset for VEGA20 Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 43/73] futex: Unbreak futex hashing Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 44/73] ipvlan: don't deref eth hdr before checking it's set Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 45/73] ipvlan: add cond_resched_rcu() while processing muticast backlog Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 46/73] macvlan: add cond_resched() during multicast processing Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 47/73] ipvlan: do not use cond_resched_rcu() in ipvlan_process_multicast() Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 48/73] drm/exynos: Fix cleanup of IOMMU related objects Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 49/73] i2c: i801: Do not add ICH_RES_IO_SMI for the iTCO_wdt device Sasha Levin
2020-03-19  7:30   ` Wolfram Sang
2020-03-29 20:07     ` Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 50/73] iommu/vt-d: Fix RCU-list bugs in intel_iommu_init() Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 51/73] iommu/vt-d: Silence RCU-list debugging warnings Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 52/73] i2c: gpio: suppress error on probe defer Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 53/73] s390/qeth: don't reset default_out_queue Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 54/73] s390/qeth: handle error when backing RX buffer Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 55/73] scsi: ipr: Fix softlockup when rescanning devices in petitboot Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 56/73] nl80211: add missing attribute validation for critical protocol indication Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 57/73] nl80211: add missing attribute validation for beacon report scanning Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 58/73] nl80211: add missing attribute validation for channel switch Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 59/73] mac80211: Do not send mesh HWMP PREQ if HWMP is disabled Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 60/73] driver code: clarify and fix platform device DMA mask allocation Sasha Levin
2020-03-19  6:49   ` Greg Kroah-Hartman
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 61/73] dpaa_eth: Remove unnecessary boolean expression in dpaa_get_headroom Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 62/73] net: fec: validate the new settings in fec_enet_set_coalesce() Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 63/73] sxgbe: Fix off by one in samsung driver strncpy size arg Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 64/73] net: mvmdio: avoid error message for optional IRQ Sasha Levin
2020-03-18 20:57   ` Chris Packham
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 65/73] net: hns3: fix "tc qdisc del" failed issue Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 66/73] net: systemport: fix index check to avoid an array out of bounds access Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 67/73] iommu/vt-d: quirk_ioat_snb_local_iommu: replace WARN_TAINT with pr_warn + add_taint Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 68/73] iommu/vt-d: Fix debugfs register reads Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 69/73] i2c: acpi: put device when verifying client fails Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 70/73] iommu/vt-d: Fix the wrong printing in RHSA parsing Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 71/73] iommu/vt-d: Ignore devices with out-of-spec domain number Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 72/73] iommu/amd: Fix IOMMU AVIC not properly update the is_run bit in IRTE Sasha Levin
2020-03-18 20:53 ` [PATCH AUTOSEL 5.4 73/73] iommu/vt-d: Populate debugfs if IOMMUs are detected Sasha Levin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200318205337.16279-30-sashal@kernel.org \
    --to=sashal@kernel.org \
    --cc=jannh@google.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=stable@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).