From: "Michael S. Tsirkin" <mst@redhat.com>
To: David Miller <davem@davemloft.net>, m.s.tsirkin@gmail.com
Cc: mst@redhat.com, netdev@vger.kernel.org, herbert@gondor.apana.org.au
Subject: [PATCH RFC] tun: export underlying socket
Date: Thu, 10 Sep 2009 15:59:29 +0300 [thread overview]
Message-ID: <20090910125929.GA32593@redhat.com> (raw)
Tun device looks similar to a packet socket
in that both pass complete frames from/to userspace.
This patch fills in enough fields in the socket underlying tun driver
to support sendmsg/recvmsg operations, and exports access to this socket
to modules.
This way, code using raw sockets to inject packets
into a physical device, can support injecting
packets into host network stack almost without modification.
First user of this interface will be vhost virtualization
accelerator.
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
This patch is on top of net-next master.
An alternative approach would be to add an ioctl to tun, to export the
underlying socket to userspace: a uniform way to work with a network
device and the host stack might be useful there, as well.
Kernel users could then do sockfd_lookup to get the socket.
I decided against it for now as it requires more code.
Please comment.
drivers/net/tun.c | 78 +++++++++++++++++++++++++++++++++++++++++++----
include/linux/if_tun.h | 14 ++++++++
2 files changed, 85 insertions(+), 7 deletions(-)
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 589a44a..76f5faa 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -151,6 +151,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
err = 0;
tfile->tun = tun;
tun->tfile = tfile;
+ tun->socket.file = file;
dev_hold(tun->dev);
sock_hold(tun->socket.sk);
atomic_inc(&tfile->count);
@@ -165,6 +166,7 @@ static void __tun_detach(struct tun_struct *tun)
/* Detach from net device */
netif_tx_lock_bh(tun->dev);
tun->tfile = NULL;
+ tun->socket.file = NULL;
netif_tx_unlock_bh(tun->dev);
/* Drop read queue */
@@ -750,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
len = min_t(int, skb->len, len);
skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
- total += len;
+ total += skb->len;
tun->dev->stats.tx_packets++;
tun->dev->stats.tx_bytes += len;
@@ -758,12 +760,10 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
return total;
}
-static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
- unsigned long count, loff_t pos)
+static ssize_t tun_do_read(struct tun_struct *tun,
+ struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, int noblock)
{
- struct file *file = iocb->ki_filp;
- struct tun_file *tfile = file->private_data;
- struct tun_struct *tun = __tun_get(tfile);
DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb;
ssize_t len, ret = 0;
@@ -785,7 +785,7 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
/* Read frames from the queue */
if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
- if (file->f_flags & O_NONBLOCK) {
+ if (noblock) {
ret = -EAGAIN;
break;
}
@@ -813,6 +813,21 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
remove_wait_queue(&tun->socket.wait, &wait);
out:
+ return ret;
+}
+
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+ unsigned long count, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+ struct tun_file *tfile = file->private_data;
+ struct tun_struct *tun = __tun_get(tfile);
+ ssize_t ret;
+
+ if (!tun)
+ return -EBADFD;
+ ret = tun_do_read(tun, iocb, iv, count, file->f_flags & O_NONBLOCK);
+ ret = min_t(ssize_t, ret, count);
tun_put(tun);
return ret;
}
@@ -865,6 +880,37 @@ static void tun_sock_destruct(struct sock *sk)
free_netdev(container_of(sk, struct tun_sock, sk)->tun->dev);
}
+static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len)
+{
+ struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+ return tun_get_user(tun, m->msg_iov, total_len,
+ m->msg_flags & MSG_DONTWAIT);
+}
+
+static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *m, size_t total_len,
+ int flags)
+{
+ struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+ int ret;
+ if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+ return -EINVAL;
+ ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
+ flags & MSG_DONTWAIT);
+ if (ret > total_len) {
+ m->msg_flags |= MSG_TRUNC;
+ ret = flags & MSG_TRUNC ? ret : total_len;
+ }
+ return ret;
+}
+
+/* Ops structure to mimic raw sockets with tun */
+static const struct proto_ops tun_socket_ops = {
+ .sendmsg = tun_sendmsg,
+ .recvmsg = tun_recvmsg,
+};
+
static struct proto tun_proto = {
.name = "tun",
.owner = THIS_MODULE,
@@ -982,6 +1028,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
goto err_free_dev;
init_waitqueue_head(&tun->socket.wait);
+ tun->socket.ops = &tun_socket_ops;
sock_init_data(&tun->socket, sk);
sk->sk_write_space = tun_sock_write_space;
sk->sk_sndbuf = INT_MAX;
@@ -1483,6 +1530,23 @@ static void tun_cleanup(void)
rtnl_link_unregister(&tun_link_ops);
}
+/* Get an underlying socket object from tun file. Returns error unless file is
+ * attached to a device. The returned object works like a packet socket, it
+ * can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
+ * holding a reference to the file for as long as the socket is in use. */
+struct socket *tun_get_socket(struct file *file)
+{
+ struct tun_struct *tun;
+ if (file->f_op != &tun_fops)
+ return ERR_PTR(-EINVAL);
+ tun = tun_get(file);
+ if (!tun)
+ return ERR_PTR(-EBADFD);
+ tun_put(tun);
+ return &tun->socket;
+}
+EXPORT_SYMBOL_GPL(tun_get_socket);
+
module_init(tun_init);
module_exit(tun_cleanup);
MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3f5fd52..404abe0 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -86,4 +86,18 @@ struct tun_filter {
__u8 addr[0][ETH_ALEN];
};
+#ifdef __KERNEL__
+#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
+struct socket *tun_get_socket(struct file *);
+#else
+#include <linux/err.h>
+#include <linux/errno.h>
+struct file;
+struct socket;
+static inline struct socket *tun_get_socket(struct file *f)
+{
+ return ERR_PTR(-EINVAL);
+}
+#endif /* CONFIG_TUN */
+#endif /* __KERNEL__ */
#endif /* __IF_TUN_H */
--
1.6.2.5
next reply other threads:[~2009-09-10 13:01 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-09-10 12:59 Michael S. Tsirkin [this message]
2009-09-10 13:19 ` [PATCH RFC] tun: export underlying socket Eric Dumazet
2009-09-10 13:27 ` Michael S. Tsirkin
2009-09-11 4:17 ` Paul Moore
2009-09-11 4:59 ` Michael S. Tsirkin
2009-09-11 5:36 ` Michael S. Tsirkin
2009-09-11 6:10 ` Eric Dumazet
2009-09-11 9:44 ` Michael S. Tsirkin
2009-09-14 8:01 ` Or Gerlitz
2009-09-14 8:07 ` Or Gerlitz
2009-09-14 8:09 ` Michael S. Tsirkin
2009-09-14 8:17 ` Or Gerlitz
2009-09-14 9:11 ` Michael S. Tsirkin
2009-09-14 9:43 ` Or Gerlitz
2009-09-14 10:10 ` Michael S. Tsirkin
2009-09-14 14:06 ` Or Gerlitz
2009-09-14 15:03 ` Herbert Xu
2009-09-15 13:02 ` Or Gerlitz
2009-09-15 13:31 ` Herbert Xu
2009-09-14 15:40 ` Michael S. Tsirkin
2009-09-15 13:11 ` Or Gerlitz
2009-09-15 13:18 ` Michael S. Tsirkin
2009-11-02 17:20 ` [PATCHv2] " Michael S. Tsirkin
2009-11-03 15:10 ` Herbert Xu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090910125929.GA32593@redhat.com \
--to=mst@redhat.com \
--cc=davem@davemloft.net \
--cc=herbert@gondor.apana.org.au \
--cc=m.s.tsirkin@gmail.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).