From: Paul Moore <paul.moore@hp.com>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: David Miller <davem@davemloft.net>,
m.s.tsirkin@gmail.com, netdev@vger.kernel.org,
herbert@gondor.apana.org.au
Subject: Re: [PATCH RFC] tun: export underlying socket
Date: Fri, 11 Sep 2009 00:17:27 -0400 [thread overview]
Message-ID: <200909110017.27668.paul.moore@hp.com> (raw)
In-Reply-To: <20090910125929.GA32593@redhat.com>
On Thursday 10 September 2009 08:59:29 am Michael S. Tsirkin wrote:
> Tun device looks similar to a packet socket
> in that both pass complete frames from/to userspace.
>
> This patch fills in enough fields in the socket underlying tun driver
> to support sendmsg/recvmsg operations, and exports access to this socket
> to modules.
>
> This way, code using raw sockets to inject packets
> into a physical device, can support injecting
> packets into host network stack almost without modification.
>
> First user of this interface will be vhost virtualization
> accelerator.
No comments on the code at this point - I'm just trying to understand the
intended user right now which I'm assuming is the vhost-net bits you sent
previously?
> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
>
> This patch is on top of net-next master.
> An alternative approach would be to add an ioctl to tun, to export the
> underlying socket to userspace: a uniform way to work with a network
> device and the host stack might be useful there, as well.
> Kernel users could then do sockfd_lookup to get the socket.
> I decided against it for now as it requires more code.
> Please comment.
>
> drivers/net/tun.c | 78
> +++++++++++++++++++++++++++++++++++++++++++---- include/linux/if_tun.h |
> 14 ++++++++
> 2 files changed, 85 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 589a44a..76f5faa 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct
> file *file) err = 0;
> tfile->tun = tun;
> tun->tfile = tfile;
> + tun->socket.file = file;
> dev_hold(tun->dev);
> sock_hold(tun->socket.sk);
> atomic_inc(&tfile->count);
> @@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun)
> /* Detach from net device */
> netif_tx_lock_bh(tun->dev);
> tun->tfile = NULL;
> + tun->socket.file = NULL;
> netif_tx_unlock_bh(tun->dev);
>
> /* Drop read queue */
> @@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct
> tun_struct *tun, len = min_t(int, skb->len, len);
>
> skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
> - total += len;
> + total += skb->len;
>
> tun->dev->stats.tx_packets++;
> tun->dev->stats.tx_bytes += len;
> @@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct
> tun_struct *tun, return total;
> }
>
> -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec
> *iv, - unsigned long count, loff_t pos)
> +static ssize_t tun_do_read(struct tun_struct *tun,
> + struct kiocb *iocb, const struct iovec *iv,
> + unsigned long count, int noblock)
> {
> - struct file *file = iocb->ki_filp;
> - struct tun_file *tfile = file->private_data;
> - struct tun_struct *tun = __tun_get(tfile);
> DECLARE_WAITQUEUE(wait, current);
> struct sk_buff *skb;
> ssize_t len, ret = 0;
> @@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb,
> const struct iovec *iv,
>
> /* Read frames from the queue */
> if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
> - if (file->f_flags & O_NONBLOCK) {
> + if (noblock) {
> ret = -EAGAIN;
> break;
> }
> @@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb,
> const struct iovec *iv, remove_wait_queue(&tun->socket.wait, &wait);
>
> out:
> + return ret;
> +}
> +
> +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec
> *iv, + unsigned long count, loff_t pos)
> +{
> + struct file *file = iocb->ki_filp;
> + struct tun_file *tfile = file->private_data;
> + struct tun_struct *tun = __tun_get(tfile);
> + ssize_t ret;
> +
> + if (!tun)
> + return -EBADFD;
> + ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK);
> + ret = min_t(ssize_t, ret, count);
> tun_put(tun);
> return ret;
> }
> @@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk)
> free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
> }
>
> +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
> + struct msghdr *m, size_t total_len)
> +{
> + struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> + return tun_get_user(tun, m->msg_iov, total_len,
> + m->msg_flags & MSG_DONTWAIT);
> +}
> +
> +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
> + struct msghdr *m, size_t total_len,
> + int flags)
> +{
> + struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> + int ret;
> + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
> + return -EINVAL;
> + ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
> + flags & MSG_DONTWAIT);
> + if (ret > total_len) {
> + m->msg_flags |= MSG_TRUNC;
> + ret = flags & MSG_TRUNC ? ret : total_len;
> + }
> + return ret;
> +}
> +
> +/* Ops structure to mimic raw sockets with tun */
> +static const struct proto_ops tun_socket_ops = {
> + .sendmsg = tun_sendmsg,
> + .recvmsg = tun_recvmsg,
> +};
> +
> static struct proto tun_proto = {
> .name = "tun",
> .owner = THIS_MODULE,
> @@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file
> *file, struct ifreq *ifr) goto err_free_dev;
>
> init_waitqueue_head(&tun->socket.wait);
> + tun->socket.ops = &tun_socket_ops;
> sock_init_data(&tun->socket, sk);
> sk->sk_write_space = tun_sock_write_space;
> sk->sk_sndbuf = INT_MAX;
> @@ -1483,6 +1530,23 @@ static void tun_cleanup(void)
> rtnl_link_unregister(&tun_link_ops);
> }
>
> +/* Get an underlying socket object from tun file. Returns error unless
> file is + * attached to a device. The returned object works like a packet
> socket, it + * can be used for sock_sendmsg/sock_recvmsg. The caller is
> responsible for + * holding a reference to the file for as long as the
> socket is in use. */ +struct socket *tun_get_socket(struct file *file)
> +{
> + struct tun_struct *tun;
> + if (file->f_op != &tun_fops)
> + return ERR_PTR(-EINVAL);
> + tun = tun_get(file);
> + if (!tun)
> + return ERR_PTR(-EBADFD);
> + tun_put(tun);
> + return &tun->socket;
> +}
> +EXPORT_SYMBOL_GPL(tun_get_socket);
> +
> module_init(tun_init);
> module_exit(tun_cleanup);
> MODULE_DESCRIPTION(DRV_DESCRIPTION);
> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> index 3f5fd52..404abe0 100644
> --- a/include/linux/if_tun.h
> +++ b/include/linux/if_tun.h
> @@ -86,4 +86,18 @@ struct tun_filter {
> __u8 addr[0][ETH_ALEN];
> };
>
> +#ifdef __KERNEL__
> +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
> +struct socket *tun_get_socket(struct file *);
> +#else
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +struct file;
> +struct socket;
> +static inline struct socket *tun_get_socket(struct file *f)
> +{
> + return ERR_PTR(-EINVAL);
> +}
> +#endif /* CONFIG_TUN */
> +#endif /* __KERNEL__ */
> #endif /* __IF_TUN_H */
>
--
paul moore
linux @ hp
next prev parent reply other threads:[~2009-09-11 4:17 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-10 12:59 [PATCH RFC] tun: export underlying socket Michael S. Tsirkin
2009-09-10 13:19 ` Eric Dumazet
2009-09-10 13:27 ` Michael S. Tsirkin
2009-09-11 4:17 ` Paul Moore [this message]
2009-09-11 4:59 ` Michael S. Tsirkin
2009-09-11 5:36 ` Michael S. Tsirkin
2009-09-11 6:10 ` Eric Dumazet
2009-09-11 9:44 ` Michael S. Tsirkin
2009-09-14 8:01 ` Or Gerlitz
2009-09-14 8:07 ` Or Gerlitz
2009-09-14 8:09 ` Michael S. Tsirkin
2009-09-14 8:17 ` Or Gerlitz
2009-09-14 9:11 ` Michael S. Tsirkin
2009-09-14 9:43 ` Or Gerlitz
2009-09-14 10:10 ` Michael S. Tsirkin
2009-09-14 14:06 ` Or Gerlitz
2009-09-14 15:03 ` Herbert Xu
2009-09-15 13:02 ` Or Gerlitz
2009-09-15 13:31 ` Herbert Xu
2009-09-14 15:40 ` Michael S. Tsirkin
2009-09-15 13:11 ` Or Gerlitz
2009-09-15 13:18 ` Michael S. Tsirkin
2009-11-02 17:20 ` [PATCHv2] " Michael S. Tsirkin
2009-11-03 15:10 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200909110017.27668.paul.moore@hp.com \
--to=paul.moore@hp.com \
--cc=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=m.s.tsirkin@gmail.com \
--cc=mst@redhat.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.