From: David Woodhouse <dwmw2@infradead.org>
To: netdev@vger.kernel.org
Cc: "Jason Wang" <jasowang@redhat.com>,
"Eugenio Pérez" <eperezma@redhat.com>,
"Willem de Bruijn" <willemb@google.com>
Subject: [PATCH v3 1/5] net: add header len parameter to tun_get_socket(), tap_get_socket()
Date: Thu, 24 Jun 2021 13:30:01 +0100 [thread overview]
Message-ID: <20210624123005.1301761-1-dwmw2@infradead.org> (raw)
In-Reply-To: <03ee62602dd7b7101f78e0802249a6e2e4c10b7f.camel@infradead.org>
From: David Woodhouse <dwmw@amazon.co.uk>
The vhost-net driver was making wild assumptions about the header length
of the underlying tun/tap socket. Then it was discarding packets if
the number of bytes it got from sock_recvmsg() didn't precisely match
its guess.
Fix it to get the correct information along with the socket itself.
As a side-effect, this means that tun_get_socket() won't work if the
tun file isn't actually connected to a device, since there's no 'tun'
yet in that case to get the information from.
On the receive side, where the tun device generates the virtio_net_hdr
but VIRITO_NET_F_MSG_RXBUF was negotiated and vhost-net needs to fill
in the 'num_buffers' field on top of the existing virtio_net_hdr, fix
that to use 'sock_hlen - 2' as the location, which means that it goes
in the right place regardless of whether the tun device is using an
additional tun_pi header or not. In this case, the user should have
configured the tun device with a vnet hdr size of 12, to make room.
Fixes: 8dd014adfea6f ("vhost-net: mergeable buffers support")
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
drivers/net/tap.c | 5 ++++-
drivers/net/tun.c | 16 +++++++++++++++-
drivers/vhost/net.c | 31 +++++++++++++++----------------
include/linux/if_tap.h | 4 ++--
include/linux/if_tun.h | 4 ++--
5 files changed, 38 insertions(+), 22 deletions(-)
diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 8e3a28ba6b28..2170a0d3d34c 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1246,7 +1246,7 @@ static const struct proto_ops tap_socket_ops = {
* attached to a device. The returned object works like a packet socket, it
* can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
* holding a reference to the file for as long as the socket is in use. */
-struct socket *tap_get_socket(struct file *file)
+struct socket *tap_get_socket(struct file *file, size_t *hlen)
{
struct tap_queue *q;
if (file->f_op != &tap_fops)
@@ -1254,6 +1254,9 @@ struct socket *tap_get_socket(struct file *file)
q = file->private_data;
if (!q)
return ERR_PTR(-EBADFD);
+ if (hlen)
+ *hlen = (q->flags & IFF_VNET_HDR) ? q->vnet_hdr_sz : 0;
+
return &q->sock;
}
EXPORT_SYMBOL_GPL(tap_get_socket);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 4cf38be26dc9..67b406fa0881 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -3649,7 +3649,7 @@ static void tun_cleanup(void)
* attached to a device. The returned object works like a packet socket, it
* can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
* holding a reference to the file for as long as the socket is in use. */
-struct socket *tun_get_socket(struct file *file)
+struct socket *tun_get_socket(struct file *file, size_t *hlen)
{
struct tun_file *tfile;
if (file->f_op != &tun_fops)
@@ -3657,6 +3657,20 @@ struct socket *tun_get_socket(struct file *file)
tfile = file->private_data;
if (!tfile)
return ERR_PTR(-EBADFD);
+
+ if (hlen) {
+ struct tun_struct *tun = tun_get(tfile);
+ size_t len = 0;
+
+ if (!tun)
+ return ERR_PTR(-ENOTCONN);
+ if (tun->flags & IFF_VNET_HDR)
+ len += READ_ONCE(tun->vnet_hdr_sz);
+ if (!(tun->flags & IFF_NO_PI))
+ len += sizeof(struct tun_pi);
+ tun_put(tun);
+ *hlen = len;
+ }
return &tfile->socket;
}
EXPORT_SYMBOL_GPL(tun_get_socket);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index df82b124170e..b92a7144ed90 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -1143,7 +1143,8 @@ static void handle_rx(struct vhost_net *net)
vq_log = unlikely(vhost_has_feature(vq, VHOST_F_LOG_ALL)) ?
vq->log : NULL;
- mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF);
+ mergeable = vhost_has_feature(vq, VIRTIO_NET_F_MRG_RXBUF) &&
+ (vhost_hlen || sock_hlen >= sizeof(num_buffers));
do {
sock_len = vhost_net_rx_peek_head_len(net, sock->sk,
@@ -1213,9 +1214,10 @@ static void handle_rx(struct vhost_net *net)
}
} else {
/* Header came from socket; we'll need to patch
- * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF
+ * ->num_buffers over the last two bytes if
+ * VIRTIO_NET_F_MRG_RXBUF is enabled.
*/
- iov_iter_advance(&fixup, sizeof(hdr));
+ iov_iter_advance(&fixup, sock_hlen - 2);
}
/* TODO: Should check and handle checksum. */
@@ -1420,7 +1422,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
return 0;
}
-static struct socket *get_raw_socket(int fd)
+static struct socket *get_raw_socket(int fd, size_t *hlen)
{
int r;
struct socket *sock = sockfd_lookup(fd, &r);
@@ -1438,6 +1440,7 @@ static struct socket *get_raw_socket(int fd)
r = -EPFNOSUPPORT;
goto err;
}
+ *hlen = 0;
return sock;
err:
sockfd_put(sock);
@@ -1463,33 +1466,33 @@ static struct ptr_ring *get_tap_ptr_ring(int fd)
return ring;
}
-static struct socket *get_tap_socket(int fd)
+static struct socket *get_tap_socket(int fd, size_t *hlen)
{
struct file *file = fget(fd);
struct socket *sock;
if (!file)
return ERR_PTR(-EBADF);
- sock = tun_get_socket(file);
+ sock = tun_get_socket(file, hlen);
if (!IS_ERR(sock))
return sock;
- sock = tap_get_socket(file);
+ sock = tap_get_socket(file, hlen);
if (IS_ERR(sock))
fput(file);
return sock;
}
-static struct socket *get_socket(int fd)
+static struct socket *get_socket(int fd, size_t *hlen)
{
struct socket *sock;
/* special case to disable backend */
if (fd == -1)
return NULL;
- sock = get_raw_socket(fd);
+ sock = get_raw_socket(fd, hlen);
if (!IS_ERR(sock))
return sock;
- sock = get_tap_socket(fd);
+ sock = get_tap_socket(fd, hlen);
if (!IS_ERR(sock))
return sock;
return ERR_PTR(-ENOTSOCK);
@@ -1521,7 +1524,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
r = -EFAULT;
goto err_vq;
}
- sock = get_socket(fd);
+ sock = get_socket(fd, &nvq->sock_hlen);
if (IS_ERR(sock)) {
r = PTR_ERR(sock);
goto err_vq;
@@ -1621,7 +1624,7 @@ static long vhost_net_reset_owner(struct vhost_net *n)
static int vhost_net_set_features(struct vhost_net *n, u64 features)
{
- size_t vhost_hlen, sock_hlen, hdr_len;
+ size_t vhost_hlen, hdr_len;
int i;
hdr_len = (features & ((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
@@ -1631,11 +1634,8 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
if (features & (1 << VHOST_NET_F_VIRTIO_NET_HDR)) {
/* vhost provides vnet_hdr */
vhost_hlen = hdr_len;
- sock_hlen = 0;
} else {
- /* socket provides vnet_hdr */
vhost_hlen = 0;
- sock_hlen = hdr_len;
}
mutex_lock(&n->dev.mutex);
if ((features & (1 << VHOST_F_LOG_ALL)) &&
@@ -1651,7 +1651,6 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
mutex_lock(&n->vqs[i].vq.mutex);
n->vqs[i].vq.acked_features = features;
n->vqs[i].vhost_hlen = vhost_hlen;
- n->vqs[i].sock_hlen = sock_hlen;
mutex_unlock(&n->vqs[i].vq.mutex);
}
mutex_unlock(&n->dev.mutex);
diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 915a187cfabd..b460ba98f34e 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -3,14 +3,14 @@
#define _LINUX_IF_TAP_H_
#if IS_ENABLED(CONFIG_TAP)
-struct socket *tap_get_socket(struct file *);
+struct socket *tap_get_socket(struct file *, size_t *);
struct ptr_ring *tap_get_ptr_ring(struct file *file);
#else
#include <linux/err.h>
#include <linux/errno.h>
struct file;
struct socket;
-static inline struct socket *tap_get_socket(struct file *f)
+static inline struct socket *tap_get_socket(struct file *f, size_t *)
{
return ERR_PTR(-EINVAL);
}
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 2a7660843444..8a7debd3f663 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -25,7 +25,7 @@ struct tun_xdp_hdr {
};
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
-struct socket *tun_get_socket(struct file *);
+struct socket *tun_get_socket(struct file *, size_t *);
struct ptr_ring *tun_get_tx_ring(struct file *file);
static inline bool tun_is_xdp_frame(void *ptr)
{
@@ -45,7 +45,7 @@ void tun_ptr_free(void *ptr);
#include <linux/errno.h>
struct file;
struct socket;
-static inline struct socket *tun_get_socket(struct file *f)
+static inline struct socket *tun_get_socket(struct file *f, size_t *)
{
return ERR_PTR(-EINVAL);
}
--
2.31.1
next prev parent reply other threads:[~2021-06-24 12:30 UTC|newest]
Thread overview: 73+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-06-19 13:33 [PATCH] net: tun: fix tun_xdp_one() for IFF_TUN mode David Woodhouse
2021-06-21 7:00 ` Jason Wang
2021-06-21 10:52 ` David Woodhouse
2021-06-21 14:50 ` David Woodhouse
2021-06-21 20:43 ` David Woodhouse
2021-06-22 4:52 ` Jason Wang
2021-06-22 7:24 ` David Woodhouse
2021-06-22 7:51 ` Jason Wang
2021-06-22 8:10 ` David Woodhouse
2021-06-22 11:36 ` David Woodhouse
2021-06-22 4:34 ` Jason Wang
2021-06-22 4:34 ` Jason Wang
2021-06-22 7:28 ` David Woodhouse
2021-06-22 8:00 ` Jason Wang
2021-06-22 8:29 ` David Woodhouse
2021-06-23 3:39 ` Jason Wang
2021-06-24 12:39 ` David Woodhouse
2021-06-22 16:15 ` [PATCH v2 1/4] " David Woodhouse
2021-06-22 16:15 ` [PATCH v2 2/4] net: tun: don't assume IFF_VNET_HDR in tun_xdp_one() tx path David Woodhouse
2021-06-23 3:46 ` Jason Wang
2021-06-22 16:15 ` [PATCH v2 3/4] vhost_net: validate virtio_net_hdr only if it exists David Woodhouse
2021-06-23 3:48 ` Jason Wang
2021-06-22 16:15 ` [PATCH v2 4/4] vhost_net: Add self test with tun device David Woodhouse
2021-06-23 4:02 ` Jason Wang
2021-06-23 16:12 ` David Woodhouse
2021-06-24 6:12 ` Jason Wang
2021-06-24 10:42 ` David Woodhouse
2021-06-25 2:55 ` Jason Wang
2021-06-25 7:54 ` David Woodhouse
2021-06-23 3:45 ` [PATCH v2 1/4] net: tun: fix tun_xdp_one() for IFF_TUN mode Jason Wang
2021-06-23 8:30 ` David Woodhouse
2021-06-23 13:52 ` David Woodhouse
2021-06-23 17:31 ` David Woodhouse
2021-06-23 22:52 ` David Woodhouse
2021-06-24 6:37 ` Jason Wang
2021-06-24 7:23 ` David Woodhouse
2021-06-24 6:18 ` Jason Wang
2021-06-24 7:05 ` David Woodhouse
2021-06-24 12:30 ` David Woodhouse [this message]
2021-06-24 12:30 ` [PATCH v3 2/5] net: tun: don't assume IFF_VNET_HDR in tun_xdp_one() tx path David Woodhouse
2021-06-25 6:58 ` Jason Wang
2021-06-24 12:30 ` [PATCH v3 3/5] vhost_net: remove virtio_net_hdr validation, let tun/tap do it themselves David Woodhouse
2021-06-25 7:33 ` Jason Wang
2021-06-25 8:37 ` David Woodhouse
2021-06-28 4:23 ` Jason Wang
2021-06-28 11:23 ` David Woodhouse
2021-06-28 23:29 ` David Woodhouse
2021-06-29 3:43 ` Jason Wang
2021-06-29 6:59 ` David Woodhouse
2021-06-29 10:49 ` David Woodhouse
2021-06-29 13:15 ` David Woodhouse
2021-06-30 4:39 ` Jason Wang
2021-06-30 10:02 ` David Woodhouse
2021-07-01 4:13 ` Jason Wang
2021-07-01 17:39 ` David Woodhouse
2021-07-02 3:13 ` Jason Wang
2021-07-02 8:08 ` David Woodhouse
2021-07-02 8:50 ` Jason Wang
2021-07-09 15:04 ` Eugenio Perez Martin
2021-06-29 3:21 ` Jason Wang
2021-06-24 12:30 ` [PATCH v3 4/5] net: tun: fix tun_xdp_one() for IFF_TUN mode David Woodhouse
2021-06-25 7:41 ` Jason Wang
2021-06-25 8:51 ` David Woodhouse
2021-06-28 4:27 ` Jason Wang
2021-06-28 10:43 ` David Woodhouse
2021-06-25 18:43 ` Willem de Bruijn
2021-06-25 19:00 ` David Woodhouse
2021-06-24 12:30 ` [PATCH v3 5/5] vhost_net: Add self test with tun device David Woodhouse
2021-06-25 5:00 ` [PATCH v3 1/5] net: add header len parameter to tun_get_socket(), tap_get_socket() Jason Wang
2021-06-25 8:23 ` David Woodhouse
2021-06-28 4:22 ` Jason Wang
2021-06-25 18:13 ` Willem de Bruijn
2021-06-25 18:55 ` David Woodhouse
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210624123005.1301761-1-dwmw2@infradead.org \
--to=dwmw2@infradead.org \
--cc=eperezma@redhat.com \
--cc=jasowang@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).