From: "Michael S. Tsirkin" <mst@redhat.com>
To: Arnd Bergmann <arnd@arndb.de>
Cc: netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Ed Swierk <eswierk@aristanetworks.com>,
Patrick McHardy <kaber@trash.net>,
Sridhar Samudrala <sri@us.ibm.com>,
qemu-devel@nongnu.org
Subject: Re: [PATCH 2/2] net/macvtap: add vhost support
Date: Sun, 14 Feb 2010 15:27:32 +0200 [thread overview]
Message-ID: <20100214132732.GB31180@redhat.com> (raw)
In-Reply-To: <201002131135.08477.arnd@arndb.de>
On Sat, Feb 13, 2010 at 11:35:08AM +0100, Arnd Bergmann wrote:
> This adds support for passing a macvtap file descriptor into
> vhost-net, much like we already do for tun/tap.
>
> Most of the new code is taken from the respective patch
> in the tun driver and may get consolidated in the future.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
> drivers/net/macvtap.c | 98 ++++++++++++++++++++++++++++++++++---------
> drivers/vhost/net.c | 8 +++-
> include/linux/if_macvlan.h | 13 ++++++
> 3 files changed, 96 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> index 7050997..e354501 100644
> --- a/drivers/net/macvtap.c
> +++ b/drivers/net/macvtap.c
> @@ -58,6 +58,8 @@ static unsigned int macvtap_major;
> static struct class *macvtap_class;
> static struct cdev macvtap_cdev;
>
> +static const struct proto_ops macvtap_socket_ops;
> +
> /*
> * RCU usage:
> * The macvtap_queue and the macvlan_dev are loosely coupled, the
> @@ -176,7 +178,7 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
> return -ENOLINK;
>
> skb_queue_tail(&q->sk.sk_receive_queue, skb);
> - wake_up(q->sk.sk_sleep);
> + wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND);
> return 0;
> }
>
> @@ -242,7 +244,7 @@ static void macvtap_sock_write_space(struct sock *sk)
> return;
>
> if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
> - wake_up_interruptible_sync(sk->sk_sleep);
> + wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND);
> }
>
> static int macvtap_open(struct inode *inode, struct file *file)
> @@ -270,6 +272,8 @@ static int macvtap_open(struct inode *inode, struct file *file)
> init_waitqueue_head(&q->sock.wait);
> q->sock.type = SOCK_RAW;
> q->sock.state = SS_CONNECTED;
> + q->sock.file = file;
> + q->sock.ops = &macvtap_socket_ops;
> sock_init_data(&q->sock, &q->sk);
> q->sk.sk_write_space = macvtap_sock_write_space;
>
> @@ -387,32 +391,20 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
>
> rcu_read_lock_bh();
> vlan = rcu_dereference(q->vlan);
> - macvlan_count_rx(vlan, len, ret == 0, 0);
> + if (vlan)
> + macvlan_count_rx(vlan, len, ret == 0, 0);
> rcu_read_unlock_bh();
>
> return ret ? ret : len;
> }
>
> -static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
> - unsigned long count, loff_t pos)
> +static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
> + const struct iovec *iv, unsigned long len,
> + int noblock)
> {
> - struct file *file = iocb->ki_filp;
> - struct macvtap_queue *q = file->private_data;
> -
> DECLARE_WAITQUEUE(wait, current);
> struct sk_buff *skb;
> - ssize_t len, ret = 0;
> -
> - if (!q) {
> - ret = -ENOLINK;
> - goto out;
> - }
> -
> - len = iov_length(iv, count);
> - if (len < 0) {
> - ret = -EINVAL;
> - goto out;
> - }
> + ssize_t ret = 0;
>
> add_wait_queue(q->sk.sk_sleep, &wait);
> while (len) {
> @@ -421,7 +413,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
> /* Read frames from the queue */
> skb = skb_dequeue(&q->sk.sk_receive_queue);
> if (!skb) {
> - if (file->f_flags & O_NONBLOCK) {
> + if (noblock) {
> ret = -EAGAIN;
> break;
> }
> @@ -440,7 +432,24 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
>
> current->state = TASK_RUNNING;
> remove_wait_queue(q->sk.sk_sleep, &wait);
> + return ret;
> +}
> +
> +static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
> + unsigned long count, loff_t pos)
> +{
> + struct file *file = iocb->ki_filp;
> + struct macvtap_queue *q = file->private_data;
> + ssize_t len, ret = 0;
>
> + len = iov_length(iv, count);
> + if (len < 0) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
> + ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
> out:
> return ret;
> }
> @@ -538,6 +547,53 @@ static const struct file_operations macvtap_fops = {
> #endif
> };
>
> +static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
> + struct msghdr *m, size_t total_len)
> +{
> + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
> + return macvtap_get_user(q, m->msg_iov, total_len,
> + m->msg_flags & MSG_DONTWAIT);
> +}
> +
> +static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
> + struct msghdr *m, size_t total_len,
> + int flags)
> +{
> + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
> + int ret;
> + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
> + return -EINVAL;
> + ret = macvtap_do_read(q, iocb, m->msg_iov, total_len,
> + flags & MSG_DONTWAIT);
> + if (ret > total_len) {
> + m->msg_flags |= MSG_TRUNC;
> + ret = flags & MSG_TRUNC ? ret : total_len;
> + }
> + return ret;
> +}
> +
> +/* Ops structure to mimic raw sockets with tun */
> +static const struct proto_ops macvtap_socket_ops = {
> + .sendmsg = macvtap_sendmsg,
> + .recvmsg = macvtap_recvmsg,
> +};
> +
> +/* Get an underlying socket object from tun file. Returns error unless file is
> + * attached to a device. The returned object works like a packet socket, it
> + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
> + * holding a reference to the file for as long as the socket is in use. */
> +struct socket *macvtap_get_socket(struct file *file)
> +{
> + struct macvtap_queue *q;
> + if (file->f_op != &macvtap_fops)
> + return ERR_PTR(-EINVAL);
> + q = file->private_data;
> + if (!q)
> + return ERR_PTR(-EBADFD);
> + return &q->sock;
> +}
> +EXPORT_SYMBOL_GPL(macvtap_get_socket);
> +
> static int macvtap_init(void)
> {
> int err;
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 4c89283..91a324c 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -22,6 +22,7 @@
> #include <linux/if_packet.h>
> #include <linux/if_arp.h>
> #include <linux/if_tun.h>
> +#include <linux/if_macvlan.h>
>
> #include <net/sock.h>
>
> @@ -452,13 +453,16 @@ err:
> return ERR_PTR(r);
> }
>
> -static struct socket *get_tun_socket(int fd)
> +static struct socket *get_tap_socket(int fd)
> {
> struct file *file = fget(fd);
> struct socket *sock;
> if (!file)
> return ERR_PTR(-EBADF);
> sock = tun_get_socket(file);
> + if (!IS_ERR(sock))
> + return sock;
> + sock = macvtap_get_socket(file);
> if (IS_ERR(sock))
> fput(file);
> return sock;
> @@ -473,7 +477,7 @@ static struct socket *get_socket(int fd)
> sock = get_raw_socket(fd);
> if (!IS_ERR(sock))
> return sock;
> - sock = get_tun_socket(fd);
> + sock = get_tap_socket(fd);
> if (!IS_ERR(sock))
> return sock;
> return ERR_PTR(-ENOTSOCK);
This will also need a dependency on macvtap in Kconfig.
See how it's done for tun.
> diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
> index 51f1512..7d7f1e3 100644
> --- a/include/linux/if_macvlan.h
> +++ b/include/linux/if_macvlan.h
> @@ -7,6 +7,19 @@
> #include <linux/netlink.h>
> #include <net/netlink.h>
>
> +#if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE)
> +struct socket *macvtap_get_socket(struct file *);
> +#else
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +struct file;
> +struct socket;
> +static inline struct socket *macvtap_get_socket(struct file *f)
> +{
> + return ERR_PTR(-EINVAL);
> +}
> +#endif /* CONFIG_MACVTAP */
> +
> struct macvlan_port;
> struct macvtap_queue;
>
> --
> 1.6.3.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
WARNING: multiple messages have this Message-ID (diff)
From: "Michael S. Tsirkin" <mst@redhat.com>
To: Arnd Bergmann <arnd@arndb.de>
Cc: netdev@vger.kernel.org, qemu-devel@nongnu.org,
Ed Swierk <eswierk@aristanetworks.com>,
linux-kernel@vger.kernel.org, Patrick McHardy <kaber@trash.net>,
Sridhar Samudrala <sri@us.ibm.com>
Subject: [Qemu-devel] Re: [PATCH 2/2] net/macvtap: add vhost support
Date: Sun, 14 Feb 2010 15:27:32 +0200 [thread overview]
Message-ID: <20100214132732.GB31180@redhat.com> (raw)
In-Reply-To: <201002131135.08477.arnd@arndb.de>
On Sat, Feb 13, 2010 at 11:35:08AM +0100, Arnd Bergmann wrote:
> This adds support for passing a macvtap file descriptor into
> vhost-net, much like we already do for tun/tap.
>
> Most of the new code is taken from the respective patch
> in the tun driver and may get consolidated in the future.
>
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
> ---
> drivers/net/macvtap.c | 98 ++++++++++++++++++++++++++++++++++---------
> drivers/vhost/net.c | 8 +++-
> include/linux/if_macvlan.h | 13 ++++++
> 3 files changed, 96 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
> index 7050997..e354501 100644
> --- a/drivers/net/macvtap.c
> +++ b/drivers/net/macvtap.c
> @@ -58,6 +58,8 @@ static unsigned int macvtap_major;
> static struct class *macvtap_class;
> static struct cdev macvtap_cdev;
>
> +static const struct proto_ops macvtap_socket_ops;
> +
> /*
> * RCU usage:
> * The macvtap_queue and the macvlan_dev are loosely coupled, the
> @@ -176,7 +178,7 @@ static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
> return -ENOLINK;
>
> skb_queue_tail(&q->sk.sk_receive_queue, skb);
> - wake_up(q->sk.sk_sleep);
> + wake_up_interruptible_poll(q->sk.sk_sleep, POLLIN | POLLRDNORM | POLLRDBAND);
> return 0;
> }
>
> @@ -242,7 +244,7 @@ static void macvtap_sock_write_space(struct sock *sk)
> return;
>
> if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
> - wake_up_interruptible_sync(sk->sk_sleep);
> + wake_up_interruptible_poll(sk->sk_sleep, POLLOUT | POLLWRNORM | POLLWRBAND);
> }
>
> static int macvtap_open(struct inode *inode, struct file *file)
> @@ -270,6 +272,8 @@ static int macvtap_open(struct inode *inode, struct file *file)
> init_waitqueue_head(&q->sock.wait);
> q->sock.type = SOCK_RAW;
> q->sock.state = SS_CONNECTED;
> + q->sock.file = file;
> + q->sock.ops = &macvtap_socket_ops;
> sock_init_data(&q->sock, &q->sk);
> q->sk.sk_write_space = macvtap_sock_write_space;
>
> @@ -387,32 +391,20 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
>
> rcu_read_lock_bh();
> vlan = rcu_dereference(q->vlan);
> - macvlan_count_rx(vlan, len, ret == 0, 0);
> + if (vlan)
> + macvlan_count_rx(vlan, len, ret == 0, 0);
> rcu_read_unlock_bh();
>
> return ret ? ret : len;
> }
>
> -static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
> - unsigned long count, loff_t pos)
> +static ssize_t macvtap_do_read(struct macvtap_queue *q, struct kiocb *iocb,
> + const struct iovec *iv, unsigned long len,
> + int noblock)
> {
> - struct file *file = iocb->ki_filp;
> - struct macvtap_queue *q = file->private_data;
> -
> DECLARE_WAITQUEUE(wait, current);
> struct sk_buff *skb;
> - ssize_t len, ret = 0;
> -
> - if (!q) {
> - ret = -ENOLINK;
> - goto out;
> - }
> -
> - len = iov_length(iv, count);
> - if (len < 0) {
> - ret = -EINVAL;
> - goto out;
> - }
> + ssize_t ret = 0;
>
> add_wait_queue(q->sk.sk_sleep, &wait);
> while (len) {
> @@ -421,7 +413,7 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
> /* Read frames from the queue */
> skb = skb_dequeue(&q->sk.sk_receive_queue);
> if (!skb) {
> - if (file->f_flags & O_NONBLOCK) {
> + if (noblock) {
> ret = -EAGAIN;
> break;
> }
> @@ -440,7 +432,24 @@ static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
>
> current->state = TASK_RUNNING;
> remove_wait_queue(q->sk.sk_sleep, &wait);
> + return ret;
> +}
> +
> +static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
> + unsigned long count, loff_t pos)
> +{
> + struct file *file = iocb->ki_filp;
> + struct macvtap_queue *q = file->private_data;
> + ssize_t len, ret = 0;
>
> + len = iov_length(iv, count);
> + if (len < 0) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + ret = macvtap_do_read(q, iocb, iv, len, file->f_flags & O_NONBLOCK);
> + ret = min_t(ssize_t, ret, len); /* XXX copied from tun.c. Why? */
> out:
> return ret;
> }
> @@ -538,6 +547,53 @@ static const struct file_operations macvtap_fops = {
> #endif
> };
>
> +static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
> + struct msghdr *m, size_t total_len)
> +{
> + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
> + return macvtap_get_user(q, m->msg_iov, total_len,
> + m->msg_flags & MSG_DONTWAIT);
> +}
> +
> +static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
> + struct msghdr *m, size_t total_len,
> + int flags)
> +{
> + struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
> + int ret;
> + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
> + return -EINVAL;
> + ret = macvtap_do_read(q, iocb, m->msg_iov, total_len,
> + flags & MSG_DONTWAIT);
> + if (ret > total_len) {
> + m->msg_flags |= MSG_TRUNC;
> + ret = flags & MSG_TRUNC ? ret : total_len;
> + }
> + return ret;
> +}
> +
> +/* Ops structure to mimic raw sockets with tun */
> +static const struct proto_ops macvtap_socket_ops = {
> + .sendmsg = macvtap_sendmsg,
> + .recvmsg = macvtap_recvmsg,
> +};
> +
> +/* Get an underlying socket object from tun file. Returns error unless file is
> + * attached to a device. The returned object works like a packet socket, it
> + * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
> + * holding a reference to the file for as long as the socket is in use. */
> +struct socket *macvtap_get_socket(struct file *file)
> +{
> + struct macvtap_queue *q;
> + if (file->f_op != &macvtap_fops)
> + return ERR_PTR(-EINVAL);
> + q = file->private_data;
> + if (!q)
> + return ERR_PTR(-EBADFD);
> + return &q->sock;
> +}
> +EXPORT_SYMBOL_GPL(macvtap_get_socket);
> +
> static int macvtap_init(void)
> {
> int err;
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 4c89283..91a324c 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -22,6 +22,7 @@
> #include <linux/if_packet.h>
> #include <linux/if_arp.h>
> #include <linux/if_tun.h>
> +#include <linux/if_macvlan.h>
>
> #include <net/sock.h>
>
> @@ -452,13 +453,16 @@ err:
> return ERR_PTR(r);
> }
>
> -static struct socket *get_tun_socket(int fd)
> +static struct socket *get_tap_socket(int fd)
> {
> struct file *file = fget(fd);
> struct socket *sock;
> if (!file)
> return ERR_PTR(-EBADF);
> sock = tun_get_socket(file);
> + if (!IS_ERR(sock))
> + return sock;
> + sock = macvtap_get_socket(file);
> if (IS_ERR(sock))
> fput(file);
> return sock;
> @@ -473,7 +477,7 @@ static struct socket *get_socket(int fd)
> sock = get_raw_socket(fd);
> if (!IS_ERR(sock))
> return sock;
> - sock = get_tun_socket(fd);
> + sock = get_tap_socket(fd);
> if (!IS_ERR(sock))
> return sock;
> return ERR_PTR(-ENOTSOCK);
This will also need a dependency on macvtap in Kconfig.
See how it's done for tun.
> diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
> index 51f1512..7d7f1e3 100644
> --- a/include/linux/if_macvlan.h
> +++ b/include/linux/if_macvlan.h
> @@ -7,6 +7,19 @@
> #include <linux/netlink.h>
> #include <net/netlink.h>
>
> +#if defined(CONFIG_MACVTAP) || defined(CONFIG_MACVTAP_MODULE)
> +struct socket *macvtap_get_socket(struct file *);
> +#else
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +struct file;
> +struct socket;
> +static inline struct socket *macvtap_get_socket(struct file *f)
> +{
> + return ERR_PTR(-EINVAL);
> +}
> +#endif /* CONFIG_MACVTAP */
> +
> struct macvlan_port;
> struct macvtap_queue;
>
> --
> 1.6.3.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
next prev parent reply other threads:[~2010-02-14 13:31 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-02-13 10:33 [PATCH 1/2] macvtap: rework object lifetime rules Arnd Bergmann
2010-02-13 10:33 ` [Qemu-devel] " Arnd Bergmann
2010-02-13 10:35 ` [PATCH 2/2] net/macvtap: add vhost support Arnd Bergmann
2010-02-13 10:35 ` [Qemu-devel] " Arnd Bergmann
2010-02-14 13:27 ` Michael S. Tsirkin [this message]
2010-02-14 13:27 ` [Qemu-devel] " Michael S. Tsirkin
2010-02-15 9:20 ` [PATCH 2/2] net/macvtap: add vhost suppor Arnd Bergmann
2010-02-15 9:20 ` [Qemu-devel] " Arnd Bergmann
2010-02-15 23:48 ` [PATCH 1/2] macvtap: rework object lifetime rules Ed Swierk
2010-02-15 23:48 ` [Qemu-devel] " Ed Swierk
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100214132732.GB31180@redhat.com \
--to=mst@redhat.com \
--cc=arnd@arndb.de \
--cc=eswierk@aristanetworks.com \
--cc=kaber@trash.net \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=qemu-devel@nongnu.org \
--cc=sri@us.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.