All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnd Bergmann <arnd@arndb.de>
To: Patrick McHardy <kaber@trash.net>
Cc: Sridhar Samudrala <sri@us.ibm.com>,
	Ed Swierk <eswierk@aristanetworks.com>,
	netdev@vger.kernel.org
Subject: [PATCH] net/macvtap: fix reference counting
Date: Thu, 11 Feb 2010 16:45:02 +0100	[thread overview]
Message-ID: <201002111645.02770.arnd@arndb.de> (raw)
In-Reply-To: <4B72F67F.1040008@trash.net>

The RCU usage in the original code was broken because
there are cases where we possibly sleep with rcu_read_lock
held. As a fix, change the macvtap_file_get_queue to
get a reference on the socket and the netdev instead of
taking the full rcu_read_lock.

Also, change macvtap_file_get_queue failure case to
not require a subsequent macvtap_file_put_queue, as
pointed out by Ed Swierk.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Cc: Ed Swierk <eswierk@aristanetworks.com>
Cc: Sridhar Samudrala <sri@us.ibm.com>
---
 drivers/net/macvtap.c |   57 +++++++++++++++++++++++++++++++-----------------
 1 files changed, 37 insertions(+), 20 deletions(-)

Sridhar, Ed: Does this look ok to you? I'm still working
on restoring my test setup, but I'd like you to take a
look at this version.

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index ad1f6ef..5954324 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -159,8 +159,12 @@ static void macvtap_del_queues(struct net_device *dev)
 
 static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file)
 {
+	struct macvtap_queue *q;
 	rcu_read_lock_bh();
-	return rcu_dereference(file->private_data);
+	q = rcu_dereference(file->private_data);
+	if (!q)
+		rcu_read_unlock_bh();
+	return q;
 }
 
 static inline void macvtap_file_put_queue(void)
@@ -314,13 +318,13 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
 	     sock_writeable(&q->sk)))
 		mask |= POLLOUT | POLLWRNORM;
 
-out:
 	macvtap_file_put_queue();
+out:
 	return mask;
 }
 
 /* Get packet from user space buffer */
-static ssize_t macvtap_get_user(struct macvtap_queue *q,
+static ssize_t macvtap_get_user(struct macvlan_dev *vlan, struct sock *sk,
 				const struct iovec *iv, size_t count,
 				int noblock)
 {
@@ -331,10 +335,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
 	if (unlikely(len < ETH_HLEN))
 		return -EINVAL;
 
-	skb = sock_alloc_send_skb(&q->sk, NET_IP_ALIGN + len, noblock, &err);
+	skb = sock_alloc_send_skb(sk, NET_IP_ALIGN + len, noblock, &err);
 
 	if (!skb) {
-		macvlan_count_rx(q->vlan, 0, false, false);
+		macvlan_count_rx(vlan, 0, false, false);
 		return err;
 	}
 
@@ -342,14 +346,14 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
 	skb_put(skb, count);
 
 	if (skb_copy_datagram_from_iovec(skb, 0, iv, 0, len)) {
-		macvlan_count_rx(q->vlan, 0, false, false);
+		macvlan_count_rx(vlan, 0, false, false);
 		kfree_skb(skb);
 		return -EFAULT;
 	}
 
 	skb_set_network_header(skb, ETH_HLEN);
 
-	macvlan_start_xmit(skb, q->vlan->dev);
+	macvlan_start_xmit(skb, vlan->dev);
 
 	return count;
 }
@@ -360,23 +364,29 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
 	struct file *file = iocb->ki_filp;
 	ssize_t result = -ENOLINK;
 	struct macvtap_queue *q = macvtap_file_get_queue(file);
+	struct macvlan_dev *vlan;
+	struct sock *sk;
 
 	if (!q)
 		goto out;
 
-	result = macvtap_get_user(q, iv, iov_length(iv, count),
+	vlan = q->vlan;
+	sk = &q->sk;
+	sock_hold(sk);
+	macvtap_file_put_queue();
+
+	result = macvtap_get_user(vlan, sk, iv, iov_length(iv, count),
 			      file->f_flags & O_NONBLOCK);
+	sock_put(sk);
 out:
-	macvtap_file_put_queue();
 	return result;
 }
 
 /* Put packet to the user space buffer */
-static ssize_t macvtap_put_user(struct macvtap_queue *q,
+static ssize_t macvtap_put_user(struct macvlan_dev *vlan,
 				const struct sk_buff *skb,
 				const struct iovec *iv, int len)
 {
-	struct macvlan_dev *vlan = q->vlan;
 	int ret;
 
 	len = min_t(int, skb->len, len);
@@ -393,15 +403,20 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
 {
 	struct file *file = iocb->ki_filp;
 	struct macvtap_queue *q = macvtap_file_get_queue(file);
+	struct macvlan_dev *vlan;
+	struct sock *sk;
 
 	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
 	ssize_t len, ret = 0;
 
-	if (!q) {
-		ret = -ENOLINK;
-		goto out;
-	}
+	if (!q)
+		return -ENOLINK;
+
+	vlan = q->vlan;
+	sk = &q->sk;
+	sock_hold(sk);
+	macvtap_file_put_queue();
 
 	len = iov_length(iv, count);
 	if (len < 0) {
@@ -409,12 +424,12 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
 		goto out;
 	}
 
-	add_wait_queue(q->sk.sk_sleep, &wait);
+	add_wait_queue(sk->sk_sleep, &wait);
 	while (len) {
 		current->state = TASK_INTERRUPTIBLE;
 
 		/* Read frames from the queue */
-		skb = skb_dequeue(&q->sk.sk_receive_queue);
+		skb = skb_dequeue(&sk->sk_receive_queue);
 		if (!skb) {
 			if (file->f_flags & O_NONBLOCK) {
 				ret = -EAGAIN;
@@ -428,16 +443,16 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
 			schedule();
 			continue;
 		}
-		ret = macvtap_put_user(q, skb, iv, len);
+		ret = macvtap_put_user(vlan, skb, iv, len);
 		kfree_skb(skb);
 		break;
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(q->sk.sk_sleep, &wait);
+	remove_wait_queue(sk->sk_sleep, &wait);
 
 out:
-	macvtap_file_put_queue();
+	sock_put(sk);
 	return ret;
 }
 
@@ -485,6 +500,8 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
 			return -EFAULT;
 
 		q = macvtap_file_get_queue(file);
+		if (!q)
+			return -ENOLINK;
 		q->sk.sk_sndbuf = u;
 		macvtap_file_put_queue();
 		return 0;
-- 
1.6.3.3

  reply	other threads:[~2010-02-11 15:45 UTC|newest]

Thread overview: 63+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-01-27 10:04 [Bridge] [PATCH 0/3 v3] macvtap driver Arnd Bergmann
2010-01-27 10:04 ` Arnd Bergmann
2010-01-27 10:05 ` [Bridge] [PATCH 1/3] net: maintain namespace isolation between vlan and real device Arnd Bergmann
2010-01-27 10:05   ` Arnd Bergmann
2010-01-29  5:33   ` [Bridge] " David Miller
2010-01-29  5:33     ` David Miller
2010-01-29 10:12     ` [Bridge] " Arnd Bergmann
2010-01-29 10:12       ` Arnd Bergmann
2010-01-27 10:06 ` [Bridge] [PATCH 2/3] net/macvlan: allow multiple driver backends Arnd Bergmann
2010-01-27 10:06   ` Arnd Bergmann
2010-01-27 21:09 ` [Bridge] [PATCH 3/3] net: macvtap driver Arnd Bergmann
2010-01-27 21:09   ` Arnd Bergmann
2010-01-28 17:34   ` [Bridge] " Michael S. Tsirkin
2010-01-28 17:34     ` Michael S. Tsirkin
2010-01-28 20:18     ` [Bridge] " Arnd Bergmann
2010-01-28 20:18       ` Arnd Bergmann
2010-01-29 11:21       ` [Bridge] " Michael S. Tsirkin
2010-01-29 11:21         ` Michael S. Tsirkin
2010-01-29 19:49         ` [Bridge] " Arnd Bergmann
2010-01-29 19:49           ` Arnd Bergmann
2010-01-27 21:59 ` [Bridge] [PATCH 0/3 v3] " Arnd Bergmann
2010-01-27 21:59   ` Arnd Bergmann
2010-01-30 22:22 ` [Bridge] [PATCH 0/3 v4] " Arnd Bergmann
2010-01-30 22:22   ` Arnd Bergmann
2010-01-30 22:23   ` [Bridge] [PATCH 1/3] net: maintain namespace isolation between vlan and real device Arnd Bergmann
2010-01-30 22:23     ` Arnd Bergmann
2010-01-30 22:23   ` Arnd Bergmann
2010-01-30 22:23   ` [Bridge] [PATCH 2/3] macvlan: allow multiple driver backends Arnd Bergmann
2010-01-30 22:23     ` Arnd Bergmann
2010-01-30 22:23   ` Arnd Bergmann
2010-01-30 22:24   ` [PATCH 3/3] net: macvtap driver Arnd Bergmann
2010-01-30 22:24   ` [Bridge] " Arnd Bergmann
2010-01-30 22:24     ` Arnd Bergmann
2010-02-04  4:21   ` [PATCH 0/3 v4] " David Miller
2010-02-04  4:21   ` [Bridge] " David Miller
2010-02-04  4:21     ` David Miller
2010-02-08 17:14     ` Ed Swierk
2010-02-08 18:55       ` Sridhar Samudrala
2010-02-08 23:30         ` Ed Swierk
2010-02-10 14:50           ` Arnd Bergmann
2010-02-11  0:42             ` Ed Swierk
2010-02-11  7:12               ` Arnd Bergmann
2010-02-09  3:25         ` Ed Swierk
2010-02-10 14:52           ` Arnd Bergmann
2010-02-10 14:48         ` Arnd Bergmann
2010-02-10 18:05           ` Sridhar Samudrala
2010-02-10 18:10             ` Patrick McHardy
2010-02-11 15:45               ` Arnd Bergmann [this message]
2010-02-11 15:55                 ` [PATCH v2] net/macvtap: fix reference counting Arnd Bergmann
2010-02-11 21:09                   ` Sridhar Samudrala
2010-02-16  5:53                     ` David Miller
2010-02-18 15:44                       ` Arnd Bergmann
2010-02-18 15:45                         ` [PATCH 1/3] macvtap: rework object lifetime rules Arnd Bergmann
2010-02-18 20:09                           ` Sridhar Samudrala
2010-02-18 22:11                           ` David Miller
2010-02-18 15:46                         ` [PATCH 2/3] net/macvtap: add vhost support Arnd Bergmann
2010-02-18 20:10                           ` Sridhar Samudrala
2010-02-18 22:11                           ` David Miller
2010-02-18 15:48                         ` [PATCH 3/3] macvtap: add GSO/csum offload support Arnd Bergmann
2010-02-18 20:38                           ` Sridhar Samudrala
2010-02-18 22:11                           ` David Miller
2010-02-12 20:58                   ` [PATCH v2] net/macvtap: fix reference counting Ed Swierk
2010-01-30 22:22 ` [PATCH 0/3 v4] macvtap driver Arnd Bergmann

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=201002111645.02770.arnd@arndb.de \
    --to=arnd@arndb.de \
    --cc=eswierk@aristanetworks.com \
    --cc=kaber@trash.net \
    --cc=netdev@vger.kernel.org \
    --cc=sri@us.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.