netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Paul Moore <paul.moore@hp.com>
To: "Michael S. Tsirkin" <mst@redhat.com>
Cc: David Miller <davem@davemloft.net>,
	m.s.tsirkin@gmail.com, netdev@vger.kernel.org,
	herbert@gondor.apana.org.au
Subject: Re: [PATCH RFC] tun: export underlying socket
Date: Fri, 11 Sep 2009 00:17:27 -0400	[thread overview]
Message-ID: <200909110017.27668.paul.moore@hp.com> (raw)
In-Reply-To: <20090910125929.GA32593@redhat.com>

On Thursday 10 September 2009 08:59:29 am Michael S. Tsirkin wrote:
> Tun device looks similar to a packet socket
> in that both pass complete frames from/to userspace.
> 
> This patch fills in enough fields in the socket underlying tun driver
> to support sendmsg/recvmsg operations, and exports access to this socket
> to modules.
> 
> This way, code using raw sockets to inject packets
> into a physical device, can support injecting
> packets into host network stack almost without modification.
> 
> First user of this interface will be vhost virtualization
> accelerator.

No comments on the code at this point - I'm just trying to understand the 
intended user right now which I'm assuming is the vhost-net bits you sent 
previously? 

> Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
> ---
> 
> This patch is on top of net-next master.
> An alternative approach would be to add an ioctl to tun, to export the
> underlying socket to userspace: a uniform way to work with a network
> device and the host stack might be useful there, as well.
> Kernel users could then do sockfd_lookup to get the socket.
> I decided against it for now as it requires more code.
> Please comment.
> 
>  drivers/net/tun.c      |   78
>  +++++++++++++++++++++++++++++++++++++++++++---- include/linux/if_tun.h |  
>  14 ++++++++
>  2 files changed, 85 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index 589a44a..76f5faa 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct
>  file *file) err = 0;
>  	tfile->tun = tun;
>  	tun->tfile = tfile;
> +	tun->socket.file = file;
>  	dev_hold(tun->dev);
>  	sock_hold(tun->socket.sk);
>  	atomic_inc(&tfile->count);
> @@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun)
>  	/* Detach from net device */
>  	netif_tx_lock_bh(tun->dev);
>  	tun->tfile = NULL;
> +	tun->socket.file = NULL;
>  	netif_tx_unlock_bh(tun->dev);
> 
>  	/* Drop read queue */
> @@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct
>  tun_struct *tun, len = min_t(int, skb->len, len);
> 
>  	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
> -	total += len;
> +	total += skb->len;
> 
>  	tun->dev->stats.tx_packets++;
>  	tun->dev->stats.tx_bytes += len;
> @@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct
>  tun_struct *tun, return total;
>  }
> 
> -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec
>  *iv, -			    unsigned long count, loff_t pos)
> +static ssize_t tun_do_read(struct tun_struct *tun,
> +			   struct kiocb *iocb, const struct iovec *iv,
> +			   unsigned long count, int noblock)
>  {
> -	struct file *file = iocb->ki_filp;
> -	struct tun_file *tfile = file->private_data;
> -	struct tun_struct *tun = __tun_get(tfile);
>  	DECLARE_WAITQUEUE(wait, current);
>  	struct sk_buff *skb;
>  	ssize_t len, ret = 0;
> @@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb,
>  const struct iovec *iv,
> 
>  		/* Read frames from the queue */
>  		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
> -			if (file->f_flags & O_NONBLOCK) {
> +			if (noblock) {
>  				ret = -EAGAIN;
>  				break;
>  			}
> @@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb,
>  const struct iovec *iv, remove_wait_queue(&tun->socket.wait, &wait);
> 
>  out:
> +	return ret;
> +}
> +
> +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec
>  *iv, +			    unsigned long count, loff_t pos)
> +{
> +	struct file *file = iocb->ki_filp;
> +	struct tun_file *tfile = file->private_data;
> +	struct tun_struct *tun = __tun_get(tfile);
> +	ssize_t ret;
> +
> +	if (!tun)
> +		return -EBADFD;
> +	ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK);
> +	ret = min_t(ssize_t, ret, count);
>  	tun_put(tun);
>  	return ret;
>  }
> @@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk)
>  	free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
>  }
> 
> +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
> +		       struct msghdr *m, size_t total_len)
> +{
> +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> +	return tun_get_user(tun, m->msg_iov, total_len,
> +			    m->msg_flags & MSG_DONTWAIT);
> +}
> +
> +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
> +		       struct msghdr *m, size_t total_len,
> +		       int flags)
> +{
> +	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
> +	int ret;
> +	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
> +		return -EINVAL;
> +	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
> +			  flags & MSG_DONTWAIT);
> +	if (ret > total_len) {
> +		m->msg_flags |= MSG_TRUNC;
> +		ret = flags & MSG_TRUNC ? ret : total_len;
> +	}
> +	return ret;
> +}
> +
> +/* Ops structure to mimic raw sockets with tun */
> +static const struct proto_ops tun_socket_ops = {
> +	.sendmsg = tun_sendmsg,
> +	.recvmsg = tun_recvmsg,
> +};
> +
>  static struct proto tun_proto = {
>  	.name		= "tun",
>  	.owner		= THIS_MODULE,
> @@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file
>  *file, struct ifreq *ifr) goto err_free_dev;
> 
>  		init_waitqueue_head(&tun->socket.wait);
> +		tun->socket.ops = &tun_socket_ops;
>  		sock_init_data(&tun->socket, sk);
>  		sk->sk_write_space = tun_sock_write_space;
>  		sk->sk_sndbuf = INT_MAX;
> @@ -1483,6 +1530,23 @@ static void tun_cleanup(void)
>  	rtnl_link_unregister(&tun_link_ops);
>  }
> 
> +/* Get an underlying socket object from tun file.  Returns error unless
>  file is + * attached to a device.  The returned object works like a packet
>  socket, it + * can be used for sock_sendmsg/sock_recvmsg.  The caller is
>  responsible for + * holding a reference to the file for as long as the
>  socket is in use. */ +struct socket *tun_get_socket(struct file *file)
> +{
> +	struct tun_struct *tun;
> +	if (file->f_op != &tun_fops)
> +		return ERR_PTR(-EINVAL);
> +	tun = tun_get(file);
> +	if (!tun)
> +		return ERR_PTR(-EBADFD);
> +	tun_put(tun);
> +	return &tun->socket;
> +}
> +EXPORT_SYMBOL_GPL(tun_get_socket);
> +
>  module_init(tun_init);
>  module_exit(tun_cleanup);
>  MODULE_DESCRIPTION(DRV_DESCRIPTION);
> diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
> index 3f5fd52..404abe0 100644
> --- a/include/linux/if_tun.h
> +++ b/include/linux/if_tun.h
> @@ -86,4 +86,18 @@ struct tun_filter {
>  	__u8   addr[0][ETH_ALEN];
>  };
> 
> +#ifdef __KERNEL__
> +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
> +struct socket *tun_get_socket(struct file *);
> +#else
> +#include <linux/err.h>
> +#include <linux/errno.h>
> +struct file;
> +struct socket;
> +static inline struct socket *tun_get_socket(struct file *f)
> +{
> +	return ERR_PTR(-EINVAL);
> +}
> +#endif /* CONFIG_TUN */
> +#endif /* __KERNEL__ */
>  #endif /* __IF_TUN_H */
> 

-- 
paul moore
linux @ hp

  parent reply	other threads:[~2009-09-11  4:17 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-10 12:59 [PATCH RFC] tun: export underlying socket Michael S. Tsirkin
2009-09-10 13:19 ` Eric Dumazet
2009-09-10 13:27   ` Michael S. Tsirkin
2009-09-11  4:17 ` Paul Moore [this message]
2009-09-11  4:59   ` Michael S. Tsirkin
2009-09-11  5:36     ` Michael S. Tsirkin
2009-09-11  6:10       ` Eric Dumazet
2009-09-11  9:44         ` Michael S. Tsirkin
2009-09-14  8:01       ` Or Gerlitz
2009-09-14  8:07 ` Or Gerlitz
2009-09-14  8:09   ` Michael S. Tsirkin
2009-09-14  8:17     ` Or Gerlitz
2009-09-14  9:11       ` Michael S. Tsirkin
2009-09-14  9:43         ` Or Gerlitz
2009-09-14 10:10           ` Michael S. Tsirkin
2009-09-14 14:06             ` Or Gerlitz
2009-09-14 15:03               ` Herbert Xu
2009-09-15 13:02                 ` Or Gerlitz
2009-09-15 13:31                   ` Herbert Xu
2009-09-14 15:40               ` Michael S. Tsirkin
2009-09-15 13:11                 ` Or Gerlitz
2009-09-15 13:18                   ` Michael S. Tsirkin
2009-11-02 17:20 ` [PATCHv2] " Michael S. Tsirkin
2009-11-03 15:10   ` Herbert Xu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200909110017.27668.paul.moore@hp.com \
    --to=paul.moore@hp.com \
    --cc=davem@davemloft.net \
    --cc=herbert@gondor.apana.org.au \
    --cc=m.s.tsirkin@gmail.com \
    --cc=mst@redhat.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).