stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org,
	"Kirill A. Shutemov" <kirill@shutemov.name>,
	Thomas Graf <tgraf@suug.ch>, Florian Westphal <fw@strlen.de>,
	"David S. Miller" <davem@davemloft.net>
Subject: [PATCH 3.14 61/84] netlink: dont hold mutex in rcu callback when releasing mmapd ring
Date: Tue, 29 Sep 2015 17:18:53 +0200	[thread overview]
Message-ID: <20150929145333.636008842@linuxfoundation.org> (raw)
In-Reply-To: <20150929145330.924730721@linuxfoundation.org>

3.14-stable review patch.  If anyone has any objections, please let me know.

------------------

From: Florian Westphal <fw@strlen.de>

[ Upstream commit 0470eb99b4721586ccac954faac3fa4472da0845 ]

Kirill A. Shutemov says:

This simple test-case trigers few locking asserts in kernel:

int main(int argc, char **argv)
{
        unsigned int block_size = 16 * 4096;
        struct nl_mmap_req req = {
                .nm_block_size          = block_size,
                .nm_block_nr            = 64,
                .nm_frame_size          = 16384,
                .nm_frame_nr            = 64 * block_size / 16384,
        };
        unsigned int ring_size;
	int fd;

	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
        if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
                exit(1);
        if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
                exit(1);

	ring_size = req.nm_block_nr * req.nm_block_size;
	mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	return 0;
}

+++ exited with 0 +++
BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616
in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init
3 locks held by init/1:
 #0:  (reboot_mutex){+.+...}, at: [<ffffffff81080959>] SyS_reboot+0xa9/0x220
 #1:  ((reboot_notifier_list).rwsem){.+.+..}, at: [<ffffffff8107f379>] __blocking_notifier_call_chain+0x39/0x70
 #2:  (rcu_callback){......}, at: [<ffffffff810d32e0>] rcu_do_batch.isra.49+0x160/0x10c0
Preemption disabled at:[<ffffffff8145365f>] __delay+0xf/0x20

CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 #253
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014
 ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102
 0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002
 ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98
Call Trace:
 <IRQ>  [<ffffffff81929ceb>] dump_stack+0x4f/0x7b
 [<ffffffff81085a9d>] ___might_sleep+0x16d/0x270
 [<ffffffff81085bed>] __might_sleep+0x4d/0x90
 [<ffffffff8192e96f>] mutex_lock_nested+0x2f/0x430
 [<ffffffff81932fed>] ? _raw_spin_unlock_irqrestore+0x5d/0x80
 [<ffffffff81464143>] ? __this_cpu_preempt_check+0x13/0x20
 [<ffffffff8182fc3d>] netlink_set_ring+0x1ed/0x350
 [<ffffffff8182e000>] ? netlink_undo_bind+0x70/0x70
 [<ffffffff8182fe20>] netlink_sock_destruct+0x80/0x150
 [<ffffffff817e484d>] __sk_free+0x1d/0x160
 [<ffffffff817e49a9>] sk_free+0x19/0x20
[..]

Cong Wang says:

We can't hold mutex lock in a rcu callback, [..]

Thomas Graf says:

The socket should be dead at this point. It might be simpler to
add a netlink_release_ring() function which doesn't require
locking at all.

Reported-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Diagnosed-by: Cong Wang <cwang@twopensource.com>
Suggested-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/netlink/af_netlink.c |   79 +++++++++++++++++++++++++++--------------------
 1 file changed, 47 insertions(+), 32 deletions(-)

--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -350,25 +350,52 @@ err1:
 	return NULL;
 }
 
+
+static void
+__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+		   unsigned int order)
+{
+	struct netlink_sock *nlk = nlk_sk(sk);
+	struct sk_buff_head *queue;
+	struct netlink_ring *ring;
+
+	queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+	ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+
+	spin_lock_bh(&queue->lock);
+
+	ring->frame_max		= req->nm_frame_nr - 1;
+	ring->head		= 0;
+	ring->frame_size	= req->nm_frame_size;
+	ring->pg_vec_pages	= req->nm_block_size / PAGE_SIZE;
+
+	swap(ring->pg_vec_len, req->nm_block_nr);
+	swap(ring->pg_vec_order, order);
+	swap(ring->pg_vec, pg_vec);
+
+	__skb_queue_purge(queue);
+	spin_unlock_bh(&queue->lock);
+
+	WARN_ON(atomic_read(&nlk->mapped));
+
+	if (pg_vec)
+		free_pg_vec(pg_vec, order, req->nm_block_nr);
+}
+
 static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
-			    bool closing, bool tx_ring)
+			    bool tx_ring)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct netlink_ring *ring;
-	struct sk_buff_head *queue;
 	void **pg_vec = NULL;
 	unsigned int order = 0;
-	int err;
 
 	ring  = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
-	queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
 
-	if (!closing) {
-		if (atomic_read(&nlk->mapped))
-			return -EBUSY;
-		if (atomic_read(&ring->pending))
-			return -EBUSY;
-	}
+	if (atomic_read(&nlk->mapped))
+		return -EBUSY;
+	if (atomic_read(&ring->pending))
+		return -EBUSY;
 
 	if (req->nm_block_nr) {
 		if (ring->pg_vec != NULL)
@@ -400,31 +427,19 @@ static int netlink_set_ring(struct sock
 			return -EINVAL;
 	}
 
-	err = -EBUSY;
 	mutex_lock(&nlk->pg_vec_lock);
-	if (closing || atomic_read(&nlk->mapped) == 0) {
-		err = 0;
-		spin_lock_bh(&queue->lock);
-
-		ring->frame_max		= req->nm_frame_nr - 1;
-		ring->head		= 0;
-		ring->frame_size	= req->nm_frame_size;
-		ring->pg_vec_pages	= req->nm_block_size / PAGE_SIZE;
-
-		swap(ring->pg_vec_len, req->nm_block_nr);
-		swap(ring->pg_vec_order, order);
-		swap(ring->pg_vec, pg_vec);
-
-		__skb_queue_purge(queue);
-		spin_unlock_bh(&queue->lock);
-
-		WARN_ON(atomic_read(&nlk->mapped));
+	if (atomic_read(&nlk->mapped) == 0) {
+		__netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+		mutex_unlock(&nlk->pg_vec_lock);
+		return 0;
 	}
+
 	mutex_unlock(&nlk->pg_vec_lock);
 
 	if (pg_vec)
 		free_pg_vec(pg_vec, order, req->nm_block_nr);
-	return err;
+
+	return -EBUSY;
 }
 
 static void netlink_mm_open(struct vm_area_struct *vma)
@@ -893,10 +908,10 @@ static void netlink_sock_destruct(struct
 
 		memset(&req, 0, sizeof(req));
 		if (nlk->rx_ring.pg_vec)
-			netlink_set_ring(sk, &req, true, false);
+			__netlink_set_ring(sk, &req, false, NULL, 0);
 		memset(&req, 0, sizeof(req));
 		if (nlk->tx_ring.pg_vec)
-			netlink_set_ring(sk, &req, true, true);
+			__netlink_set_ring(sk, &req, true, NULL, 0);
 	}
 #endif /* CONFIG_NETLINK_MMAP */
 
@@ -2190,7 +2205,7 @@ static int netlink_setsockopt(struct soc
 			return -EINVAL;
 		if (copy_from_user(&req, optval, sizeof(req)))
 			return -EFAULT;
-		err = netlink_set_ring(sk, &req, false,
+		err = netlink_set_ring(sk, &req,
 				       optname == NETLINK_TX_RING);
 		break;
 	}



  parent reply	other threads:[~2015-09-29 15:23 UTC|newest]

Thread overview: 93+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-09-29 15:17 [PATCH 3.14 00/84] 3.14.54-stable review Greg Kroah-Hartman
2015-09-29 15:17 ` [PATCH 3.14 01/84] blk-mq: fix buffer overflow when reading sysfs file of pending Greg Kroah-Hartman
2015-09-29 15:17 ` [PATCH 3.14 02/84] unshare: Unsharing a thread does not require unsharing a vm Greg Kroah-Hartman
2015-09-29 15:17 ` [PATCH 3.14 03/84] rtlwifi: rtl8192cu: Add new device ID Greg Kroah-Hartman
2015-09-29 15:17 ` [PATCH 3.14 04/84] tg3: Fix temperature reporting Greg Kroah-Hartman
2015-09-29 15:17 ` [PATCH 3.14 05/84] mac80211: enable assoc check for mesh interfaces Greg Kroah-Hartman
2015-09-29 15:17 ` [PATCH 3.14 06/84] arm64: kconfig: Move LIST_POISON to a safe value Greg Kroah-Hartman
2015-09-29 15:17 ` [PATCH 3.14 07/84] arm64: compat: fix vfp save/restore across signal handlers in big-endian Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 08/84] arm64: head.S: initialise mdcr_el2 in el2_setup Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 09/84] arm64: errata: add module build workaround for erratum #843419 Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 10/84] arm64: KVM: Disable virtual timer even if the guest is not using it Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 11/84] Input: evdev - do not report errors form flush() Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 12/84] ALSA: hda - Enable headphone jack detect on old Fujitsu laptops Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 13/84] ALSA: hda - Use ALC880_FIXUP_FUJITSU for FSC Amilo M1437 Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 14/84] powerpc/mm: Fix pte_pagesize_index() crash on 4K w/64K hash Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 15/84] powerpc/rtas: Introduce rtas_get_sensor_fast() for IRQ handlers Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 16/84] powerpc/mm: Recompute hash value after a failed update Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 17/84] CIFS: fix type confusion in copy offload ioctl Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 18/84] Add radeon suspend/resume quirk for HP Compaq dc5750 Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 19/84] mm: check if section present during memory block registering Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 20/84] x86/mm: Initialize pmd_idx in page_table_range_init_count() Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 22/84] [media] v4l: omap3isp: Fix sub-device power management code Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 23/84] Btrfs: check if previous transaction aborted to avoid fs corruption Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 24/84] NFSv4: dont set SETATTR for O_RDONLY|O_EXCL Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 25/84] NFS: Fix a NULL pointer dereference of migration recovery ops for v4.2 client Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 26/84] NFS: nfs_set_pgio_error sometimes misses errors Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 27/84] parisc: Use double word condition in 64bit CAS operation Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 28/84] parisc: Filter out spurious interrupts in PA-RISC irq handler Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 29/84] vmscan: fix increasing nr_isolated incurred by putback unevictable pages Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 30/84] fs: if a coredump already exists, unlink and recreate with O_EXCL Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 31/84] mmc: core: fix race condition in mmc_wait_data_done Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 32/84] md/raid10: always set reshape_safe when initializing reshape_position Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 33/84] xen/gntdev: convert priv->lock to a mutex Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 34/84] hfs: fix B-tree corruption after insertion at position 0 Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 35/84] IB/qib: Change lkey table allocation to support more MRs Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 36/84] IB/uverbs: reject invalid or unknown opcodes Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 37/84] IB/uverbs: Fix race between ib_uverbs_open and remove_one Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 38/84] IB/mlx4: Forbid using sysfs to change RoCE pkeys Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 39/84] IB/mlx4: Use correct SL on AH query under RoCE Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 40/84] stmmac: fix check for phydev being open Greg Kroah-Hartman
2015-09-30 11:22   ` Sergei Shtylyov
2015-10-01  3:04     ` Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 41/84] stmmac: troubleshoot unexpected bits in des0 & des1 Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 42/84] hfs,hfsplus: cache pages correctly between bnode_create and bnode_free Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 43/84] ipv6: Make MLD packets to only be processed locally Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 44/84] net: graceful exit from netif_alloc_netdev_queues() Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 45/84] rtnetlink: verify IFLA_VF_INFO attributes before passing them to driver Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 47/84] net/tipc: initialize security state for new connection socket Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 48/84] bridge: mdb: zero out the local br_ip variable before use Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 49/84] net: pktgen: fix race between pktgen_thread_worker() and kthread_stop() Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 50/84] net: do not process device backlog during unregistration Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 51/84] net: call rcu_read_lock early in process_backlog Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 52/84] net: Clone skb before setting peeked flag Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 53/84] net: Fix skb csum races when peeking Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 54/84] net: Fix skb_set_peeked use-after-free bug Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 55/84] bridge: mdb: fix double add notification Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 56/84] isdn/gigaset: reset tty->receive_room when attaching ser_gigaset Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 57/84] ipv6: lock socket in ip6_datagram_connect() Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 58/84] bonding: fix destruction of bond with devices different from arphrd_ether Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 59/84] bonding: correct the MAC address for "follow" fail_over_mac policy Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 60/84] inet: frags: fix defragmented packets IP header for af_packet Greg Kroah-Hartman
2015-09-29 15:18 ` Greg Kroah-Hartman [this message]
2015-09-29 15:18 ` [PATCH 3.14 62/84] net/mlx4_core: Fix wrong index in propagating port change event to VFs Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 63/84] ip6_gre: release cached dst on tunnel removal Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 64/84] usbnet: Get EVENT_NO_RUNTIME_PM bit before it is cleared Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 65/84] ipv6: fix exthdrs offload registration in out_rt path Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 66/84] net/ipv6: Correct PIM6 mrt_lock handling Greg Kroah-Hartman
2015-09-29 15:18 ` [PATCH 3.14 67/84] netlink, mmap: transform mmap skb into full skb on taps Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 68/84] sctp: fix race on protocol/netns initialization Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 69/84] openvswitch: Zero flows on allocation Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 70/84] fib_rules: fix fib rule dumps across multiple skbs Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 71/84] packet: missing dev_put() in packet_do_bind() Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 72/84] rds: fix an integer overflow test in rds_info_getsockopt() Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 74/84] bna: fix interrupts storm caused by erroneous packets Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 75/84] net: gso: use feature flag argument in all protocol gso handlers Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 76/84] Revert "iio: bmg160: IIO_BUFFER and IIO_TRIGGERED_BUFFER are required" Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 77/84] x86/nmi: Enable nested do_nmi() handling for 64-bit kernels Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 78/84] x86/nmi/64: Remove asm code that saves CR2 Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 79/84] x86/nmi/64: Switch stacks on userspace NMI entry Greg Kroah-Hartman
2015-09-29 17:25   ` Andy Lutomirski
2015-09-29 17:57     ` Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 80/84] x86/nmi/64: Improve nested NMI comments Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 81/84] x86/nmi/64: Reorder nested NMI checks Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 82/84] x86/nmi/64: Use DF to avoid userspace RSP confusing nested NMI detection Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 83/84] udf: Check length of extended attributes and allocation descriptors Greg Kroah-Hartman
2015-09-29 15:19 ` [PATCH 3.14 84/84] NVMe: Initialize device reference count earlier Greg Kroah-Hartman
2015-09-29 16:53 ` [PATCH 3.14 00/84] 3.14.54-stable review Shuah Khan
2015-09-29 19:41   ` Greg Kroah-Hartman
2015-09-29 21:15 ` Guenter Roeck
2015-09-30  2:11   ` Greg Kroah-Hartman
2015-09-30  5:53 ` Sudip Mukherjee
2015-09-30  6:00   ` Greg Kroah-Hartman
     [not found] ` <560e8874.e968c20a.57231.fffff396@mx.google.com>
2015-10-02 13:38   ` Kevin Hilman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150929145333.636008842@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=davem@davemloft.net \
    --cc=fw@strlen.de \
    --cc=kirill@shutemov.name \
    --cc=linux-kernel@vger.kernel.org \
    --cc=stable@vger.kernel.org \
    --cc=tgraf@suug.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).