From mboxrd@z Thu Jan 1 00:00:00 1970 From: Paul Moore Subject: Re: [PATCH RFC] tun: export underlying socket Date: Fri, 11 Sep 2009 00:17:27 -0400 Message-ID: <200909110017.27668.paul.moore@hp.com> References: <20090910125929.GA32593@redhat.com> Mime-Version: 1.0 Content-Type: Text/Plain; charset="iso-8859-1" Content-Transfer-Encoding: 7bit Cc: David Miller , m.s.tsirkin@gmail.com, netdev@vger.kernel.org, herbert@gondor.apana.org.au To: "Michael S. Tsirkin" Return-path: Received: from g4t0016.houston.hp.com ([15.201.24.19]:43990 "EHLO g4t0016.houston.hp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1751925AbZIKER0 (ORCPT ); Fri, 11 Sep 2009 00:17:26 -0400 In-Reply-To: <20090910125929.GA32593@redhat.com> Sender: netdev-owner@vger.kernel.org List-ID: On Thursday 10 September 2009 08:59:29 am Michael S. Tsirkin wrote: > Tun device looks similar to a packet socket > in that both pass complete frames from/to userspace. > > This patch fills in enough fields in the socket underlying tun driver > to support sendmsg/recvmsg operations, and exports access to this socket > to modules. > > This way, code using raw sockets to inject packets > into a physical device, can support injecting > packets into host network stack almost without modification. > > First user of this interface will be vhost virtualization > accelerator. No comments on the code at this point - I'm just trying to understand the intended user right now which I'm assuming is the vhost-net bits you sent previously? > Signed-off-by: Michael S. Tsirkin > --- > > This patch is on top of net-next master. > An alternative approach would be to add an ioctl to tun, to export the > underlying socket to userspace: a uniform way to work with a network > device and the host stack might be useful there, as well. > Kernel users could then do sockfd_lookup to get the socket. > I decided against it for now as it requires more code. > Please comment. > > drivers/net/tun.c | 78 > +++++++++++++++++++++++++++++++++++++++++++---- include/linux/if_tun.h | > 14 ++++++++ > 2 files changed, 85 insertions(+), 7 deletions(-) > > diff --git a/drivers/net/tun.c b/drivers/net/tun.c > index 589a44a..76f5faa 100644 > --- a/drivers/net/tun.c > +++ b/drivers/net/tun.c > @@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct > file *file) err = 0; > tfile->tun = tun; > tun->tfile = tfile; > + tun->socket.file = file; > dev_hold(tun->dev); > sock_hold(tun->socket.sk); > atomic_inc(&tfile->count); > @@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun) > /* Detach from net device */ > netif_tx_lock_bh(tun->dev); > tun->tfile = NULL; > + tun->socket.file = NULL; > netif_tx_unlock_bh(tun->dev); > > /* Drop read queue */ > @@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct > tun_struct *tun, len = min_t(int, skb->len, len); > > skb_copy_datagram_const_iovec(skb, 0, iv, total, len); > - total += len; > + total += skb->len; > > tun->dev->stats.tx_packets++; > tun->dev->stats.tx_bytes += len; > @@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct > tun_struct *tun, return total; > } > > -static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec > *iv, - unsigned long count, loff_t pos) > +static ssize_t tun_do_read(struct tun_struct *tun, > + struct kiocb *iocb, const struct iovec *iv, > + unsigned long count, int noblock) > { > - struct file *file = iocb->ki_filp; > - struct tun_file *tfile = file->private_data; > - struct tun_struct *tun = __tun_get(tfile); > DECLARE_WAITQUEUE(wait, current); > struct sk_buff *skb; > ssize_t len, ret = 0; > @@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, > const struct iovec *iv, > > /* Read frames from the queue */ > if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { > - if (file->f_flags & O_NONBLOCK) { > + if (noblock) { > ret = -EAGAIN; > break; > } > @@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, > const struct iovec *iv, remove_wait_queue(&tun->socket.wait, &wait); > > out: > + return ret; > +} > + > +static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec > *iv, + unsigned long count, loff_t pos) > +{ > + struct file *file = iocb->ki_filp; > + struct tun_file *tfile = file->private_data; > + struct tun_struct *tun = __tun_get(tfile); > + ssize_t ret; > + > + if (!tun) > + return -EBADFD; > + ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK); > + ret = min_t(ssize_t, ret, count); > tun_put(tun); > return ret; > } > @@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk) > free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev); > } > > +static int tun_sendmsg(struct kiocb *iocb, struct socket *sock, > + struct msghdr *m, size_t total_len) > +{ > + struct tun_struct *tun = container_of(sock, struct tun_struct, socket); > + return tun_get_user(tun, m->msg_iov, total_len, > + m->msg_flags & MSG_DONTWAIT); > +} > + > +static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, > + struct msghdr *m, size_t total_len, > + int flags) > +{ > + struct tun_struct *tun = container_of(sock, struct tun_struct, socket); > + int ret; > + if (flags & ~(MSG_DONTWAIT|MSG_TRUNC)) > + return -EINVAL; > + ret = tun_do_read(tun, iocb, m->msg_iov, total_len, > + flags & MSG_DONTWAIT); > + if (ret > total_len) { > + m->msg_flags |= MSG_TRUNC; > + ret = flags & MSG_TRUNC ? ret : total_len; > + } > + return ret; > +} > + > +/* Ops structure to mimic raw sockets with tun */ > +static const struct proto_ops tun_socket_ops = { > + .sendmsg = tun_sendmsg, > + .recvmsg = tun_recvmsg, > +}; > + > static struct proto tun_proto = { > .name = "tun", > .owner = THIS_MODULE, > @@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file > *file, struct ifreq *ifr) goto err_free_dev; > > init_waitqueue_head(&tun->socket.wait); > + tun->socket.ops = &tun_socket_ops; > sock_init_data(&tun->socket, sk); > sk->sk_write_space = tun_sock_write_space; > sk->sk_sndbuf = INT_MAX; > @@ -1483,6 +1530,23 @@ static void tun_cleanup(void) > rtnl_link_unregister(&tun_link_ops); > } > > +/* Get an underlying socket object from tun file. Returns error unless > file is + * attached to a device. The returned object works like a packet > socket, it + * can be used for sock_sendmsg/sock_recvmsg. The caller is > responsible for + * holding a reference to the file for as long as the > socket is in use. */ +struct socket *tun_get_socket(struct file *file) > +{ > + struct tun_struct *tun; > + if (file->f_op != &tun_fops) > + return ERR_PTR(-EINVAL); > + tun = tun_get(file); > + if (!tun) > + return ERR_PTR(-EBADFD); > + tun_put(tun); > + return &tun->socket; > +} > +EXPORT_SYMBOL_GPL(tun_get_socket); > + > module_init(tun_init); > module_exit(tun_cleanup); > MODULE_DESCRIPTION(DRV_DESCRIPTION); > diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h > index 3f5fd52..404abe0 100644 > --- a/include/linux/if_tun.h > +++ b/include/linux/if_tun.h > @@ -86,4 +86,18 @@ struct tun_filter { > __u8 addr[0][ETH_ALEN]; > }; > > +#ifdef __KERNEL__ > +#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE) > +struct socket *tun_get_socket(struct file *); > +#else > +#include > +#include > +struct file; > +struct socket; > +static inline struct socket *tun_get_socket(struct file *f) > +{ > + return ERR_PTR(-EINVAL); > +} > +#endif /* CONFIG_TUN */ > +#endif /* __KERNEL__ */ > #endif /* __IF_TUN_H */ > -- paul moore linux @ hp