From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from sub5.mail.dreamhost.com ([208.113.200.129]:33094 "EHLO homiemail-a124.g.dreamhost.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1754470AbcLaEKD (ORCPT ); Fri, 30 Dec 2016 23:10:03 -0500 Received: from homiemail-a124.g.dreamhost.com (localhost [127.0.0.1]) by homiemail-a124.g.dreamhost.com (Postfix) with ESMTP id 08F7160000D03 for ; Fri, 30 Dec 2016 20:10:03 -0800 (PST) Received: from kmjvbox (c-73-70-90-212.hsd1.ca.comcast.net [73.70.90.212]) (using TLSv1 with cipher ECDHE-RSA-AES256-SHA (256/256 bits)) (No client certificate requested) (Authenticated sender: kjlx@templeofstupid.com) by homiemail-a124.g.dreamhost.com (Postfix) with ESMTPSA id E2EDC60000D00 for ; Fri, 30 Dec 2016 20:10:02 -0800 (PST) Date: Fri, 30 Dec 2016 20:10:01 -0800 From: Krister Johansen To: Alexander Viro Cc: linux-fsdevel@vger.kernel.org, "Eric W. Biederman" Subject: [PATCH] Fix a race in put_mountpoint. Message-ID: <20161231041001.GA2448@templeofstupid.com> MIME-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline Sender: linux-fsdevel-owner@vger.kernel.org List-ID: This can cause a panic when simultaneous callers of put_mountpoint attempt to free the same mountpoint. This occurs because some callers hold the mount_hash_lock, while others hold the namespace lock. Some even hold both. In this submitter's case, the panic manifested itself as a GP fault in put_mountpoint() when it called hlist_del() and attempted to dereference a m_hash.pprev that had been poisioned by another thread. Instead of trying to force all mountpoint hash users back under the namespace lock, add locks that cover just the mountpoint hash. This uses hlist_bl to protect against simlultaneous additions and removals, and RCU for lookups. Signed-off-by: Krister Johansen --- fs/mount.h | 6 ++++-- fs/namespace.c | 62 ++++++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 47 insertions(+), 21 deletions(-) diff --git a/fs/mount.h b/fs/mount.h index 2c856fc..1a2f41a 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -3,6 +3,7 @@ #include #include #include +#include struct mnt_namespace { atomic_t count; @@ -24,10 +25,11 @@ struct mnt_pcp { }; struct mountpoint { - struct hlist_node m_hash; + struct hlist_bl_node m_hash; struct dentry *m_dentry; struct hlist_head m_list; - int m_count; + atomic_t m_count; + struct rcu_head m_rcu; }; struct mount { diff --git a/fs/namespace.c b/fs/namespace.c index b5b1259..7c29420 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -63,7 +63,7 @@ static int mnt_id_start = 0; static int mnt_group_start = 1; static struct hlist_head *mount_hashtable __read_mostly; -static struct hlist_head *mountpoint_hashtable __read_mostly; +static struct hlist_bl_head *mountpoint_hashtable __read_mostly; static struct kmem_cache *mnt_cache __read_mostly; static DECLARE_RWSEM(namespace_sem); @@ -89,7 +89,7 @@ static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *den return &mount_hashtable[tmp & m_hash_mask]; } -static inline struct hlist_head *mp_hash(struct dentry *dentry) +static inline struct hlist_bl_head *mp_hash(struct dentry *dentry) { unsigned long tmp = ((unsigned long)dentry / L1_CACHE_BYTES); tmp = tmp + (tmp >> mp_hash_shift); @@ -727,27 +727,36 @@ bool __is_local_mountpoint(struct dentry *dentry) static struct mountpoint *lookup_mountpoint(struct dentry *dentry) { - struct hlist_head *chain = mp_hash(dentry); + struct hlist_bl_head *chain = mp_hash(dentry); + struct hlist_bl_node *node; struct mountpoint *mp; - hlist_for_each_entry(mp, chain, m_hash) { + rcu_read_lock(); + hlist_bl_for_each_entry_rcu(mp, node, chain, m_hash) { if (mp->m_dentry == dentry) { /* might be worth a WARN_ON() */ - if (d_unlinked(dentry)) - return ERR_PTR(-ENOENT); - mp->m_count++; - return mp; + if (d_unlinked(dentry)) { + mp = ERR_PTR(-ENOENT); + goto out; + } + if (atomic_inc_not_zero(&mp->m_count)) + goto out; } } - return NULL; + mp = NULL; +out: + rcu_read_unlock(); + return mp; } static struct mountpoint *new_mountpoint(struct dentry *dentry) { - struct hlist_head *chain = mp_hash(dentry); + struct hlist_bl_head *chain = mp_hash(dentry); struct mountpoint *mp; int ret; + WARN_ON(!rwsem_is_locked(&namespace_sem)); + mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL); if (!mp) return ERR_PTR(-ENOMEM); @@ -759,22 +768,37 @@ static struct mountpoint *new_mountpoint(struct dentry *dentry) } mp->m_dentry = dentry; - mp->m_count = 1; - hlist_add_head(&mp->m_hash, chain); + init_rcu_head(&mp->m_rcu); + atomic_set(&mp->m_count, 1); + hlist_bl_lock(chain); + hlist_bl_add_head_rcu(&mp->m_hash, chain); + hlist_bl_unlock(chain); INIT_HLIST_HEAD(&mp->m_list); return mp; } +static void free_mountpoint(struct rcu_head *head) +{ + struct mountpoint *mp; + + mp = container_of(head, struct mountpoint, m_rcu); + kfree(mp); +} + static void put_mountpoint(struct mountpoint *mp) { - if (!--mp->m_count) { + if (atomic_dec_and_test(&mp->m_count)) { struct dentry *dentry = mp->m_dentry; + struct hlist_bl_head *chain = mp_hash(dentry); + BUG_ON(!hlist_empty(&mp->m_list)); spin_lock(&dentry->d_lock); dentry->d_flags &= ~DCACHE_MOUNTED; spin_unlock(&dentry->d_lock); - hlist_del(&mp->m_hash); - kfree(mp); + hlist_bl_lock(chain); + hlist_bl_del_rcu(&mp->m_hash); + hlist_bl_unlock(chain); + call_rcu(&mp->m_rcu, free_mountpoint); } } @@ -846,7 +870,7 @@ void mnt_set_mountpoint(struct mount *mnt, struct mountpoint *mp, struct mount *child_mnt) { - mp->m_count++; + atomic_inc(&mp->m_count); mnt_add_count(mnt, 1); /* essentially, that's mntget */ child_mnt->mnt_mountpoint = dget(mp->m_dentry); child_mnt->mnt_parent = mnt; @@ -3120,7 +3144,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, /* make certain new is below the root */ if (!is_path_reachable(new_mnt, new.dentry, &root)) goto out4; - root_mp->m_count++; /* pin it so it won't go away */ + atomic_inc(&root_mp->m_count); /* pin it so it won't go away */ lock_mount_hash(); detach_mnt(new_mnt, &parent_path); detach_mnt(root_mnt, &root_parent); @@ -3199,7 +3223,7 @@ void __init mnt_init(void) 0, &m_hash_shift, &m_hash_mask, 0, 0); mountpoint_hashtable = alloc_large_system_hash("Mountpoint-cache", - sizeof(struct hlist_head), + sizeof(struct hlist_bl_head), mphash_entries, 19, 0, &mp_hash_shift, &mp_hash_mask, 0, 0); @@ -3210,7 +3234,7 @@ void __init mnt_init(void) for (u = 0; u <= m_hash_mask; u++) INIT_HLIST_HEAD(&mount_hashtable[u]); for (u = 0; u <= mp_hash_mask; u++) - INIT_HLIST_HEAD(&mountpoint_hashtable[u]); + INIT_HLIST_BL_HEAD(&mountpoint_hashtable[u]); kernfs_init(); -- 2.7.4