* [RFC PATCH 03/13] vsock: remove include/linux/vm_sockets.h file
From: Stefano Garzarella @ 2019-09-27 11:26 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
This header file now only includes the "uapi/linux/vm_sockets.h".
We can include directly it when needed.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
include/linux/vm_sockets.h | 13 -------------
include/net/af_vsock.h | 2 +-
include/net/vsock_addr.h | 2 +-
net/vmw_vsock/vmci_transport_notify.h | 1 -
4 files changed, 2 insertions(+), 16 deletions(-)
delete mode 100644 include/linux/vm_sockets.h
diff --git a/include/linux/vm_sockets.h b/include/linux/vm_sockets.h
deleted file mode 100644
index 7dd899ccb920..000000000000
--- a/include/linux/vm_sockets.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * VMware vSockets Driver
- *
- * Copyright (C) 2007-2013 VMware, Inc. All rights reserved.
- */
-
-#ifndef _VM_SOCKETS_H
-#define _VM_SOCKETS_H
-
-#include <uapi/linux/vm_sockets.h>
-
-#endif /* _VM_SOCKETS_H */
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 80ea0f93d3f7..c660402b10f2 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -10,7 +10,7 @@
#include <linux/kernel.h>
#include <linux/workqueue.h>
-#include <linux/vm_sockets.h>
+#include <uapi/linux/vm_sockets.h>
#include "vsock_addr.h"
diff --git a/include/net/vsock_addr.h b/include/net/vsock_addr.h
index 57d2db5c4bdf..cf8cc140d68d 100644
--- a/include/net/vsock_addr.h
+++ b/include/net/vsock_addr.h
@@ -8,7 +8,7 @@
#ifndef _VSOCK_ADDR_H_
#define _VSOCK_ADDR_H_
-#include <linux/vm_sockets.h>
+#include <uapi/linux/vm_sockets.h>
void vsock_addr_init(struct sockaddr_vm *addr, u32 cid, u32 port);
int vsock_addr_validate(const struct sockaddr_vm *addr);
diff --git a/net/vmw_vsock/vmci_transport_notify.h b/net/vmw_vsock/vmci_transport_notify.h
index 7843f08d4290..a1aa5a998c0e 100644
--- a/net/vmw_vsock/vmci_transport_notify.h
+++ b/net/vmw_vsock/vmci_transport_notify.h
@@ -11,7 +11,6 @@
#include <linux/types.h>
#include <linux/vmw_vmci_defs.h>
#include <linux/vmw_vmci_api.h>
-#include <linux/vm_sockets.h>
#include "vmci_transport.h"
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 04/13] vsock: add 'transport' member in the struct vsock_sock
From: Stefano Garzarella @ 2019-09-27 11:26 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
As a preparation to support multiple transports, this patch adds
the 'transport' member at the 'struct vsock_sock'.
This new field is initialized during the creation in the
__vsock_create() function.
This patch also renames the global 'transport' pointer to
'transport_single', since for now we're only supporting a single
transport registered at run-time.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
include/net/af_vsock.h | 1 +
net/vmw_vsock/af_vsock.c | 56 +++++++++++++++++++++++++++-------------
2 files changed, 39 insertions(+), 18 deletions(-)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index c660402b10f2..a5e1e134261d 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -27,6 +27,7 @@ extern spinlock_t vsock_table_lock;
struct vsock_sock {
/* sk must be the first member. */
struct sock sk;
+ const struct vsock_transport *transport;
struct sockaddr_vm local_addr;
struct sockaddr_vm remote_addr;
/* Links for the global tables of bound and connected sockets. */
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index f609434b2794..81ee2561c76f 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -126,7 +126,7 @@ static struct proto vsock_proto = {
*/
#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
-static const struct vsock_transport *transport;
+static const struct vsock_transport *transport_single;
static DEFINE_MUTEX(vsock_register_mutex);
/**** UTILS ****/
@@ -408,7 +408,9 @@ static bool vsock_is_pending(struct sock *sk)
static int vsock_send_shutdown(struct sock *sk, int mode)
{
- return transport->shutdown(vsock_sk(sk), mode);
+ struct vsock_sock *vsk = vsock_sk(sk);
+
+ return vsk->transport->shutdown(vsk, mode);
}
static void vsock_pending_work(struct work_struct *work)
@@ -518,7 +520,7 @@ static int __vsock_bind_stream(struct vsock_sock *vsk,
static int __vsock_bind_dgram(struct vsock_sock *vsk,
struct sockaddr_vm *addr)
{
- return transport->dgram_bind(vsk, addr);
+ return vsk->transport->dgram_bind(vsk, addr);
}
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
@@ -536,7 +538,7 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
* like AF_INET prevents binding to a non-local IP address (in most
* cases), we only allow binding to the local CID.
*/
- cid = transport->get_local_cid();
+ cid = vsk->transport->get_local_cid();
if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY)
return -EADDRNOTAVAIL;
@@ -586,6 +588,7 @@ struct sock *__vsock_create(struct net *net,
sk->sk_type = type;
vsk = vsock_sk(sk);
+ vsk->transport = transport_single;
vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
@@ -616,7 +619,7 @@ struct sock *__vsock_create(struct net *net,
vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
}
- if (transport->init(vsk, psk) < 0) {
+ if (vsk->transport->init(vsk, psk) < 0) {
sk_free(sk);
return NULL;
}
@@ -638,7 +641,7 @@ static void __vsock_release(struct sock *sk)
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
- transport->release(vsk);
+ vsk->transport->release(vsk);
lock_sock(sk);
sock_orphan(sk);
@@ -662,7 +665,7 @@ static void vsock_sk_destruct(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
- transport->destruct(vsk);
+ vsk->transport->destruct(vsk);
/* When clearing these addresses, there's no need to set the family and
* possibly register the address family with the kernel.
@@ -686,13 +689,13 @@ static int vsock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
s64 vsock_stream_has_data(struct vsock_sock *vsk)
{
- return transport->stream_has_data(vsk);
+ return vsk->transport->stream_has_data(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_data);
s64 vsock_stream_has_space(struct vsock_sock *vsk)
{
- return transport->stream_has_space(vsk);
+ return vsk->transport->stream_has_space(vsk);
}
EXPORT_SYMBOL_GPL(vsock_stream_has_space);
@@ -861,6 +864,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
} else if (sock->type == SOCK_STREAM) {
+ const struct vsock_transport *transport = vsk->transport;
lock_sock(sk);
/* Listening sockets that have connections in their accept
@@ -936,6 +940,7 @@ static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
struct sock *sk;
struct vsock_sock *vsk;
struct sockaddr_vm *remote_addr;
+ const struct vsock_transport *transport;
if (msg->msg_flags & MSG_OOB)
return -EOPNOTSUPP;
@@ -944,6 +949,7 @@ static int vsock_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
err = 0;
sk = sock->sk;
vsk = vsock_sk(sk);
+ transport = vsk->transport;
lock_sock(sk);
@@ -1028,8 +1034,8 @@ static int vsock_dgram_connect(struct socket *sock,
if (err)
goto out;
- if (!transport->dgram_allow(remote_addr->svm_cid,
- remote_addr->svm_port)) {
+ if (!vsk->transport->dgram_allow(remote_addr->svm_cid,
+ remote_addr->svm_port)) {
err = -EINVAL;
goto out;
}
@@ -1045,7 +1051,9 @@ static int vsock_dgram_connect(struct socket *sock,
static int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags)
{
- return transport->dgram_dequeue(vsock_sk(sock->sk), msg, len, flags);
+ struct vsock_sock *vsk = vsock_sk(sock->sk);
+
+ return vsk->transport->dgram_dequeue(vsk, msg, len, flags);
}
static const struct proto_ops vsock_dgram_ops = {
@@ -1071,6 +1079,8 @@ static const struct proto_ops vsock_dgram_ops = {
static int vsock_transport_cancel_pkt(struct vsock_sock *vsk)
{
+ const struct vsock_transport *transport = vsk->transport;
+
if (!transport->cancel_pkt)
return -EOPNOTSUPP;
@@ -1107,6 +1117,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
int err;
struct sock *sk;
struct vsock_sock *vsk;
+ const struct vsock_transport *transport;
struct sockaddr_vm *remote_addr;
long timeout;
DEFINE_WAIT(wait);
@@ -1114,6 +1125,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
err = 0;
sk = sock->sk;
vsk = vsock_sk(sk);
+ transport = vsk->transport;
lock_sock(sk);
@@ -1357,6 +1369,7 @@ static int vsock_stream_setsockopt(struct socket *sock,
int err;
struct sock *sk;
struct vsock_sock *vsk;
+ const struct vsock_transport *transport;
u64 val;
if (level != AF_VSOCK)
@@ -1377,6 +1390,7 @@ static int vsock_stream_setsockopt(struct socket *sock,
err = 0;
sk = sock->sk;
vsk = vsock_sk(sk);
+ transport = vsk->transport;
lock_sock(sk);
@@ -1434,6 +1448,7 @@ static int vsock_stream_getsockopt(struct socket *sock,
int len;
struct sock *sk;
struct vsock_sock *vsk;
+ const struct vsock_transport *transport;
u64 val;
if (level != AF_VSOCK)
@@ -1457,6 +1472,7 @@ static int vsock_stream_getsockopt(struct socket *sock,
err = 0;
sk = sock->sk;
vsk = vsock_sk(sk);
+ transport = vsk->transport;
switch (optname) {
case SO_VM_SOCKETS_BUFFER_SIZE:
@@ -1501,6 +1517,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
{
struct sock *sk;
struct vsock_sock *vsk;
+ const struct vsock_transport *transport;
ssize_t total_written;
long timeout;
int err;
@@ -1509,6 +1526,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
sk = sock->sk;
vsk = vsock_sk(sk);
+ transport = vsk->transport;
total_written = 0;
err = 0;
@@ -1640,6 +1658,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
{
struct sock *sk;
struct vsock_sock *vsk;
+ const struct vsock_transport *transport;
int err;
size_t target;
ssize_t copied;
@@ -1650,6 +1669,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
sk = sock->sk;
vsk = vsock_sk(sk);
+ transport = vsk->transport;
err = 0;
lock_sock(sk);
@@ -1864,7 +1884,7 @@ static long vsock_dev_do_ioctl(struct file *filp,
switch (cmd) {
case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
- if (put_user(transport->get_local_cid(), p) != 0)
+ if (put_user(transport_single->get_local_cid(), p) != 0)
retval = -EFAULT;
break;
@@ -1911,7 +1931,7 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
if (err)
return err;
- if (transport) {
+ if (transport_single) {
err = -EBUSY;
goto err_busy;
}
@@ -1920,7 +1940,7 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
* unload while there are open sockets.
*/
vsock_proto.owner = owner;
- transport = t;
+ transport_single = t;
vsock_device.minor = MISC_DYNAMIC_MINOR;
err = misc_register(&vsock_device);
@@ -1950,7 +1970,7 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
err_deregister_misc:
misc_deregister(&vsock_device);
err_reset_transport:
- transport = NULL;
+ transport_single = NULL;
err_busy:
mutex_unlock(&vsock_register_mutex);
return err;
@@ -1967,7 +1987,7 @@ void vsock_core_exit(void)
/* We do not want the assignment below re-ordered. */
mb();
- transport = NULL;
+ transport_single = NULL;
mutex_unlock(&vsock_register_mutex);
}
@@ -1978,7 +1998,7 @@ const struct vsock_transport *vsock_core_get_transport(void)
/* vsock_register_mutex not taken since only the transport uses this
* function and only while registered.
*/
- return transport;
+ return transport_single;
}
EXPORT_SYMBOL_GPL(vsock_core_get_transport);
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 01/13] vsock/vmci: remove unused VSOCK_DEFAULT_CONNECT_TIMEOUT
From: Stefano Garzarella @ 2019-09-27 11:26 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
The VSOCK_DEFAULT_CONNECT_TIMEOUT definition was introduced with
commit d021c344051af ("VSOCK: Introduce VM Sockets"), but it is
never used in the net/vmw_vsock/vmci_transport.c.
VSOCK_DEFAULT_CONNECT_TIMEOUT is used and defined in
net/vmw_vsock/af_vsock.c
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
net/vmw_vsock/vmci_transport.c | 5 -----
1 file changed, 5 deletions(-)
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 8c9c4ed90fa7..f8e3131ac480 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -78,11 +78,6 @@ static int PROTOCOL_OVERRIDE = -1;
#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144
#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144
-/* The default peer timeout indicates how long we will wait for a peer response
- * to a control message.
- */
-#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
-
/* Helper function to convert from a VMCI error code to a VSock error code. */
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 05/13] vsock/virtio: add transport parameter to the virtio_transport_reset_no_sock()
From: Stefano Garzarella @ 2019-09-27 11:26 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
We are going to add 'struct vsock_sock *' parameter to
virtio_transport_get_ops().
In some cases, like in the virtio_transport_reset_no_sock(),
we don't have any socket assigned to the packet received,
so we can't use the virtio_transport_get_ops().
In order to allow virtio_transport_reset_no_sock() to use the
'.send_pkt' callback from the 'vhost_transport' or 'virtio_transport',
we add the 'struct virtio_transport *' to it and to its caller:
virtio_transport_recv_pkt().
We moved the 'vhost_transport' and 'virtio_transport' definition,
to pass their address to the virtio_transport_recv_pkt().
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
drivers/vhost/vsock.c | 94 +++++++-------
include/linux/virtio_vsock.h | 3 +-
net/vmw_vsock/virtio_transport.c | 160 ++++++++++++------------
net/vmw_vsock/virtio_transport_common.c | 12 +-
4 files changed, 135 insertions(+), 134 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 9f57736fe15e..92ab3852c954 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -384,6 +384,52 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
return val < vq->num;
}
+static struct virtio_transport vhost_transport = {
+ .transport = {
+ .get_local_cid = vhost_transport_get_local_cid,
+
+ .init = virtio_transport_do_socket_init,
+ .destruct = virtio_transport_destruct,
+ .release = virtio_transport_release,
+ .connect = virtio_transport_connect,
+ .shutdown = virtio_transport_shutdown,
+ .cancel_pkt = vhost_transport_cancel_pkt,
+
+ .dgram_enqueue = virtio_transport_dgram_enqueue,
+ .dgram_dequeue = virtio_transport_dgram_dequeue,
+ .dgram_bind = virtio_transport_dgram_bind,
+ .dgram_allow = virtio_transport_dgram_allow,
+
+ .stream_enqueue = virtio_transport_stream_enqueue,
+ .stream_dequeue = virtio_transport_stream_dequeue,
+ .stream_has_data = virtio_transport_stream_has_data,
+ .stream_has_space = virtio_transport_stream_has_space,
+ .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
+ .stream_is_active = virtio_transport_stream_is_active,
+ .stream_allow = virtio_transport_stream_allow,
+
+ .notify_poll_in = virtio_transport_notify_poll_in,
+ .notify_poll_out = virtio_transport_notify_poll_out,
+ .notify_recv_init = virtio_transport_notify_recv_init,
+ .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
+ .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
+ .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
+ .notify_send_init = virtio_transport_notify_send_init,
+ .notify_send_pre_block = virtio_transport_notify_send_pre_block,
+ .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
+ .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+
+ .set_buffer_size = virtio_transport_set_buffer_size,
+ .set_min_buffer_size = virtio_transport_set_min_buffer_size,
+ .set_max_buffer_size = virtio_transport_set_max_buffer_size,
+ .get_buffer_size = virtio_transport_get_buffer_size,
+ .get_min_buffer_size = virtio_transport_get_min_buffer_size,
+ .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ },
+
+ .send_pkt = vhost_transport_send_pkt,
+};
+
static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
{
struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
@@ -438,7 +484,7 @@ static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
/* Only accept correctly addressed packets */
if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
- virtio_transport_recv_pkt(pkt);
+ virtio_transport_recv_pkt(&vhost_transport, pkt);
else
virtio_transport_free_pkt(pkt);
@@ -786,52 +832,6 @@ static struct miscdevice vhost_vsock_misc = {
.fops = &vhost_vsock_fops,
};
-static struct virtio_transport vhost_transport = {
- .transport = {
- .get_local_cid = vhost_transport_get_local_cid,
-
- .init = virtio_transport_do_socket_init,
- .destruct = virtio_transport_destruct,
- .release = virtio_transport_release,
- .connect = virtio_transport_connect,
- .shutdown = virtio_transport_shutdown,
- .cancel_pkt = vhost_transport_cancel_pkt,
-
- .dgram_enqueue = virtio_transport_dgram_enqueue,
- .dgram_dequeue = virtio_transport_dgram_dequeue,
- .dgram_bind = virtio_transport_dgram_bind,
- .dgram_allow = virtio_transport_dgram_allow,
-
- .stream_enqueue = virtio_transport_stream_enqueue,
- .stream_dequeue = virtio_transport_stream_dequeue,
- .stream_has_data = virtio_transport_stream_has_data,
- .stream_has_space = virtio_transport_stream_has_space,
- .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
- .stream_is_active = virtio_transport_stream_is_active,
- .stream_allow = virtio_transport_stream_allow,
-
- .notify_poll_in = virtio_transport_notify_poll_in,
- .notify_poll_out = virtio_transport_notify_poll_out,
- .notify_recv_init = virtio_transport_notify_recv_init,
- .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
- .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
- .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
- .notify_send_init = virtio_transport_notify_send_init,
- .notify_send_pre_block = virtio_transport_notify_send_pre_block,
- .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
- .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
-
- .set_buffer_size = virtio_transport_set_buffer_size,
- .set_min_buffer_size = virtio_transport_set_min_buffer_size,
- .set_max_buffer_size = virtio_transport_set_max_buffer_size,
- .get_buffer_size = virtio_transport_get_buffer_size,
- .get_min_buffer_size = virtio_transport_get_min_buffer_size,
- .get_max_buffer_size = virtio_transport_get_max_buffer_size,
- },
-
- .send_pkt = vhost_transport_send_pkt,
-};
-
static int __init vhost_vsock_init(void)
{
int ret;
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 4c7781f4b29b..96d8132acbd7 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -151,7 +151,8 @@ virtio_transport_dgram_enqueue(struct vsock_sock *vsk,
void virtio_transport_destruct(struct vsock_sock *vsk);
-void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt);
+void virtio_transport_recv_pkt(struct virtio_transport *t,
+ struct virtio_vsock_pkt *pkt);
void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt);
void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt);
u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 082a30936690..3756f0857946 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -86,33 +86,6 @@ static u32 virtio_transport_get_local_cid(void)
return ret;
}
-static void virtio_transport_loopback_work(struct work_struct *work)
-{
- struct virtio_vsock *vsock =
- container_of(work, struct virtio_vsock, loopback_work);
- LIST_HEAD(pkts);
-
- spin_lock_bh(&vsock->loopback_list_lock);
- list_splice_init(&vsock->loopback_list, &pkts);
- spin_unlock_bh(&vsock->loopback_list_lock);
-
- mutex_lock(&vsock->rx_lock);
-
- if (!vsock->rx_run)
- goto out;
-
- while (!list_empty(&pkts)) {
- struct virtio_vsock_pkt *pkt;
-
- pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
- list_del_init(&pkt->list);
-
- virtio_transport_recv_pkt(pkt);
- }
-out:
- mutex_unlock(&vsock->rx_lock);
-}
-
static int virtio_transport_send_pkt_loopback(struct virtio_vsock *vsock,
struct virtio_vsock_pkt *pkt)
{
@@ -370,59 +343,6 @@ static bool virtio_transport_more_replies(struct virtio_vsock *vsock)
return val < virtqueue_get_vring_size(vq);
}
-static void virtio_transport_rx_work(struct work_struct *work)
-{
- struct virtio_vsock *vsock =
- container_of(work, struct virtio_vsock, rx_work);
- struct virtqueue *vq;
-
- vq = vsock->vqs[VSOCK_VQ_RX];
-
- mutex_lock(&vsock->rx_lock);
-
- if (!vsock->rx_run)
- goto out;
-
- do {
- virtqueue_disable_cb(vq);
- for (;;) {
- struct virtio_vsock_pkt *pkt;
- unsigned int len;
-
- if (!virtio_transport_more_replies(vsock)) {
- /* Stop rx until the device processes already
- * pending replies. Leave rx virtqueue
- * callbacks disabled.
- */
- goto out;
- }
-
- pkt = virtqueue_get_buf(vq, &len);
- if (!pkt) {
- break;
- }
-
- vsock->rx_buf_nr--;
-
- /* Drop short/long packets */
- if (unlikely(len < sizeof(pkt->hdr) ||
- len > sizeof(pkt->hdr) + pkt->len)) {
- virtio_transport_free_pkt(pkt);
- continue;
- }
-
- pkt->len = len - sizeof(pkt->hdr);
- virtio_transport_deliver_tap_pkt(pkt);
- virtio_transport_recv_pkt(pkt);
- }
- } while (!virtqueue_enable_cb(vq));
-
-out:
- if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2)
- virtio_vsock_rx_fill(vsock);
- mutex_unlock(&vsock->rx_lock);
-}
-
/* event_lock must be held */
static int virtio_vsock_event_fill_one(struct virtio_vsock *vsock,
struct virtio_vsock_event *event)
@@ -586,6 +506,86 @@ static struct virtio_transport virtio_transport = {
.send_pkt = virtio_transport_send_pkt,
};
+static void virtio_transport_loopback_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, loopback_work);
+ LIST_HEAD(pkts);
+
+ spin_lock_bh(&vsock->loopback_list_lock);
+ list_splice_init(&vsock->loopback_list, &pkts);
+ spin_unlock_bh(&vsock->loopback_list_lock);
+
+ mutex_lock(&vsock->rx_lock);
+
+ if (!vsock->rx_run)
+ goto out;
+
+ while (!list_empty(&pkts)) {
+ struct virtio_vsock_pkt *pkt;
+
+ pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
+ list_del_init(&pkt->list);
+
+ virtio_transport_recv_pkt(&virtio_transport, pkt);
+ }
+out:
+ mutex_unlock(&vsock->rx_lock);
+}
+
+static void virtio_transport_rx_work(struct work_struct *work)
+{
+ struct virtio_vsock *vsock =
+ container_of(work, struct virtio_vsock, rx_work);
+ struct virtqueue *vq;
+
+ vq = vsock->vqs[VSOCK_VQ_RX];
+
+ mutex_lock(&vsock->rx_lock);
+
+ if (!vsock->rx_run)
+ goto out;
+
+ do {
+ virtqueue_disable_cb(vq);
+ for (;;) {
+ struct virtio_vsock_pkt *pkt;
+ unsigned int len;
+
+ if (!virtio_transport_more_replies(vsock)) {
+ /* Stop rx until the device processes already
+ * pending replies. Leave rx virtqueue
+ * callbacks disabled.
+ */
+ goto out;
+ }
+
+ pkt = virtqueue_get_buf(vq, &len);
+ if (!pkt) {
+ break;
+ }
+
+ vsock->rx_buf_nr--;
+
+ /* Drop short/long packets */
+ if (unlikely(len < sizeof(pkt->hdr) ||
+ len > sizeof(pkt->hdr) + pkt->len)) {
+ virtio_transport_free_pkt(pkt);
+ continue;
+ }
+
+ pkt->len = len - sizeof(pkt->hdr);
+ virtio_transport_deliver_tap_pkt(pkt);
+ virtio_transport_recv_pkt(&virtio_transport, pkt);
+ }
+ } while (!virtqueue_enable_cb(vq));
+
+out:
+ if (vsock->rx_buf_nr < vsock->rx_buf_max_nr / 2)
+ virtio_vsock_rx_fill(vsock);
+ mutex_unlock(&vsock->rx_lock);
+}
+
static int virtio_vsock_probe(struct virtio_device *vdev)
{
vq_callback_t *callbacks[] = {
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index ed1ad5289164..382536b69029 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -689,9 +689,9 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
/* Normally packets are associated with a socket. There may be no socket if an
* attempt was made to connect to a socket that does not exist.
*/
-static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
+static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
+ struct virtio_vsock_pkt *pkt)
{
- const struct virtio_transport *t;
struct virtio_vsock_pkt *reply;
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
@@ -711,7 +711,6 @@ static int virtio_transport_reset_no_sock(struct virtio_vsock_pkt *pkt)
if (!reply)
return -ENOMEM;
- t = virtio_transport_get_ops();
if (!t) {
virtio_transport_free_pkt(reply);
return -ENOTCONN;
@@ -1047,7 +1046,8 @@ static bool virtio_transport_space_update(struct sock *sk,
/* We are under the virtio-vsock's vsock->rx_lock or vhost-vsock's vq->mutex
* lock.
*/
-void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
+void virtio_transport_recv_pkt(struct virtio_transport *t,
+ struct virtio_vsock_pkt *pkt)
{
struct sockaddr_vm src, dst;
struct vsock_sock *vsk;
@@ -1069,7 +1069,7 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
le32_to_cpu(pkt->hdr.fwd_cnt));
if (le16_to_cpu(pkt->hdr.type) != VIRTIO_VSOCK_TYPE_STREAM) {
- (void)virtio_transport_reset_no_sock(pkt);
+ (void)virtio_transport_reset_no_sock(t, pkt);
goto free_pkt;
}
@@ -1080,7 +1080,7 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
if (!sk) {
sk = vsock_find_bound_socket(&dst);
if (!sk) {
- (void)virtio_transport_reset_no_sock(pkt);
+ (void)virtio_transport_reset_no_sock(t, pkt);
goto free_pkt;
}
}
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 06/13] vsock: add 'struct vsock_sock *' param to vsock_core_get_transport()
From: Stefano Garzarella @ 2019-09-27 11:26 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
Since now the 'struct vsock_sock' object contains a pointer to
the transport, this patch adds a parameter to the
vsock_core_get_transport() to return the right transport
assigned to the socket.
This patch modifies also the virtio_transport_get_ops(), that
uses the vsock_core_get_transport(), adding the
'struct vsock_sock *' parameter.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
include/net/af_vsock.h | 2 +-
net/vmw_vsock/af_vsock.c | 4 ++--
net/vmw_vsock/virtio_transport_common.c | 9 +++++----
3 files changed, 8 insertions(+), 7 deletions(-)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index a5e1e134261d..2ca67d048de4 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -166,7 +166,7 @@ static inline int vsock_core_init(const struct vsock_transport *t)
void vsock_core_exit(void);
/* The transport may downcast this to access transport-specific functions */
-const struct vsock_transport *vsock_core_get_transport(void);
+const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk);
/**** UTILS ****/
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 81ee2561c76f..f7540a3ac64e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1993,12 +1993,12 @@ void vsock_core_exit(void)
}
EXPORT_SYMBOL_GPL(vsock_core_exit);
-const struct vsock_transport *vsock_core_get_transport(void)
+const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
{
/* vsock_register_mutex not taken since only the transport uses this
* function and only while registered.
*/
- return transport_single;
+ return vsk->transport;
}
EXPORT_SYMBOL_GPL(vsock_core_get_transport);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 382536b69029..fc046c071178 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -29,9 +29,10 @@
/* Threshold for detecting small packets to copy */
#define GOOD_COPY_LEN 128
-static const struct virtio_transport *virtio_transport_get_ops(void)
+static const struct virtio_transport *
+virtio_transport_get_ops(struct vsock_sock *vsk)
{
- const struct vsock_transport *t = vsock_core_get_transport();
+ const struct vsock_transport *t = vsock_core_get_transport(vsk);
return container_of(t, struct virtio_transport, transport);
}
@@ -168,7 +169,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
struct virtio_vsock_pkt *pkt;
u32 pkt_len = info->pkt_len;
- src_cid = virtio_transport_get_ops()->transport.get_local_cid();
+ src_cid = virtio_transport_get_ops(vsk)->transport.get_local_cid();
src_port = vsk->local_addr.svm_port;
if (!info->remote_cid) {
dst_cid = vsk->remote_addr.svm_cid;
@@ -201,7 +202,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
virtio_transport_inc_tx_pkt(vvs, pkt);
- return virtio_transport_get_ops()->send_pkt(pkt);
+ return virtio_transport_get_ops(vsk)->send_pkt(pkt);
}
static void virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 07/13] vsock: handle buffer_size sockopts in the core
From: Stefano Garzarella @ 2019-09-27 11:26 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
virtio_transport and vmci_transport handle the buffer_size
sockopts in a very similar way.
In order to support multiple transports, this patch moves this
handling in the core to allow the user to change the options
also if the socket is not yet assigned to any transport.
This patch also adds the '.notify_buffer_size' callback in the
'struct virtio_transport' in order to inform the transport,
when the buffer_size is changed by the user. It is also useful
to limit the 'buffer_size' requested (e.g. virtio transports).
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
drivers/vhost/vsock.c | 7 +-
include/linux/virtio_vsock.h | 15 +----
include/net/af_vsock.h | 14 ++--
net/vmw_vsock/af_vsock.c | 43 ++++++++++---
net/vmw_vsock/hyperv_transport.c | 36 -----------
net/vmw_vsock/virtio_transport.c | 8 +--
net/vmw_vsock/virtio_transport_common.c | 78 ++++------------------
net/vmw_vsock/vmci_transport.c | 86 +++----------------------
net/vmw_vsock/vmci_transport.h | 3 -
9 files changed, 64 insertions(+), 226 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 92ab3852c954..6d7e4f022748 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -418,13 +418,8 @@ static struct virtio_transport vhost_transport = {
.notify_send_pre_block = virtio_transport_notify_send_pre_block,
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
+ .notify_buffer_size = virtio_transport_notify_buffer_size,
- .set_buffer_size = virtio_transport_set_buffer_size,
- .set_min_buffer_size = virtio_transport_set_min_buffer_size,
- .set_max_buffer_size = virtio_transport_set_max_buffer_size,
- .get_buffer_size = virtio_transport_get_buffer_size,
- .get_min_buffer_size = virtio_transport_get_min_buffer_size,
- .get_max_buffer_size = virtio_transport_get_max_buffer_size,
},
.send_pkt = vhost_transport_send_pkt,
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 96d8132acbd7..ab02d119fe79 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -7,9 +7,6 @@
#include <net/sock.h>
#include <net/af_vsock.h>
-#define VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE 128
-#define VIRTIO_VSOCK_DEFAULT_BUF_SIZE (1024 * 256)
-#define VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE (1024 * 256)
#define VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE (1024 * 4)
#define VIRTIO_VSOCK_MAX_BUF_SIZE 0xFFFFFFFFUL
#define VIRTIO_VSOCK_MAX_PKT_BUF_SIZE (1024 * 64)
@@ -25,11 +22,6 @@ enum {
struct virtio_vsock_sock {
struct vsock_sock *vsk;
- /* Protected by lock_sock(sk_vsock(trans->vsk)) */
- u32 buf_size;
- u32 buf_size_min;
- u32 buf_size_max;
-
spinlock_t tx_lock;
spinlock_t rx_lock;
@@ -93,12 +85,6 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
int virtio_transport_do_socket_init(struct vsock_sock *vsk,
struct vsock_sock *psk);
-u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk);
-u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk);
-u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk);
-void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val);
-void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val);
-void virtio_transport_set_max_buffer_size(struct vsock_sock *vs, u64 val);
int
virtio_transport_notify_poll_in(struct vsock_sock *vsk,
size_t target,
@@ -125,6 +111,7 @@ int virtio_transport_notify_send_pre_enqueue(struct vsock_sock *vsk,
struct vsock_transport_send_notify_data *data);
int virtio_transport_notify_send_post_enqueue(struct vsock_sock *vsk,
ssize_t written, struct vsock_transport_send_notify_data *data);
+int virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val);
u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk);
bool virtio_transport_stream_is_active(struct vsock_sock *vsk);
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 2ca67d048de4..86f8f463e01a 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -65,6 +65,11 @@ struct vsock_sock {
bool sent_request;
bool ignore_connecting_rst;
+ /* Protected by lock_sock(sk) */
+ u64 buffer_size;
+ u64 buffer_min_size;
+ u64 buffer_max_size;
+
/* Private to transport. */
void *trans;
};
@@ -140,18 +145,11 @@ struct vsock_transport {
struct vsock_transport_send_notify_data *);
int (*notify_send_post_enqueue)(struct vsock_sock *, ssize_t,
struct vsock_transport_send_notify_data *);
+ int (*notify_buffer_size)(struct vsock_sock *, u64 *);
/* Shutdown. */
int (*shutdown)(struct vsock_sock *, int);
- /* Buffer sizes. */
- void (*set_buffer_size)(struct vsock_sock *, u64);
- void (*set_min_buffer_size)(struct vsock_sock *, u64);
- void (*set_max_buffer_size)(struct vsock_sock *, u64);
- u64 (*get_buffer_size)(struct vsock_sock *);
- u64 (*get_min_buffer_size)(struct vsock_sock *);
- u64 (*get_max_buffer_size)(struct vsock_sock *);
-
/* Addressing. */
u32 (*get_local_cid)(void);
};
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index f7540a3ac64e..dee69d7ee148 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -126,6 +126,10 @@ static struct proto vsock_proto = {
*/
#define VSOCK_DEFAULT_CONNECT_TIMEOUT (2 * HZ)
+#define VSOCK_DEFAULT_BUFFER_SIZE (1024 * 256)
+#define VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256)
+#define VSOCK_DEFAULT_BUFFER_MIN_SIZE 128
+
static const struct vsock_transport *transport_single;
static DEFINE_MUTEX(vsock_register_mutex);
@@ -613,10 +617,16 @@ struct sock *__vsock_create(struct net *net,
vsk->trusted = psk->trusted;
vsk->owner = get_cred(psk->owner);
vsk->connect_timeout = psk->connect_timeout;
+ vsk->buffer_size = psk->buffer_size;
+ vsk->buffer_min_size = psk->buffer_min_size;
+ vsk->buffer_max_size = psk->buffer_max_size;
} else {
vsk->trusted = capable(CAP_NET_ADMIN);
vsk->owner = get_current_cred();
vsk->connect_timeout = VSOCK_DEFAULT_CONNECT_TIMEOUT;
+ vsk->buffer_size = VSOCK_DEFAULT_BUFFER_SIZE;
+ vsk->buffer_min_size = VSOCK_DEFAULT_BUFFER_MIN_SIZE;
+ vsk->buffer_max_size = VSOCK_DEFAULT_BUFFER_MAX_SIZE;
}
if (vsk->transport->init(vsk, psk) < 0) {
@@ -1360,6 +1370,23 @@ static int vsock_listen(struct socket *sock, int backlog)
return err;
}
+static void vsock_update_buffer_size(struct vsock_sock *vsk,
+ const struct vsock_transport *transport,
+ u64 val)
+{
+ if (val > vsk->buffer_max_size)
+ val = vsk->buffer_max_size;
+
+ if (val < vsk->buffer_min_size)
+ val = vsk->buffer_min_size;
+
+ if (val != vsk->buffer_size &&
+ transport && transport->notify_buffer_size)
+ transport->notify_buffer_size(vsk, &val);
+
+ vsk->buffer_size = val;
+}
+
static int vsock_stream_setsockopt(struct socket *sock,
int level,
int optname,
@@ -1397,17 +1424,19 @@ static int vsock_stream_setsockopt(struct socket *sock,
switch (optname) {
case SO_VM_SOCKETS_BUFFER_SIZE:
COPY_IN(val);
- transport->set_buffer_size(vsk, val);
+ vsock_update_buffer_size(vsk, transport, val);
break;
case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
COPY_IN(val);
- transport->set_max_buffer_size(vsk, val);
+ vsk->buffer_max_size = val;
+ vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
break;
case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
COPY_IN(val);
- transport->set_min_buffer_size(vsk, val);
+ vsk->buffer_min_size = val;
+ vsock_update_buffer_size(vsk, transport, vsk->buffer_size);
break;
case SO_VM_SOCKETS_CONNECT_TIMEOUT: {
@@ -1448,7 +1477,6 @@ static int vsock_stream_getsockopt(struct socket *sock,
int len;
struct sock *sk;
struct vsock_sock *vsk;
- const struct vsock_transport *transport;
u64 val;
if (level != AF_VSOCK)
@@ -1472,21 +1500,20 @@ static int vsock_stream_getsockopt(struct socket *sock,
err = 0;
sk = sock->sk;
vsk = vsock_sk(sk);
- transport = vsk->transport;
switch (optname) {
case SO_VM_SOCKETS_BUFFER_SIZE:
- val = transport->get_buffer_size(vsk);
+ val = vsk->buffer_size;
COPY_OUT(val);
break;
case SO_VM_SOCKETS_BUFFER_MAX_SIZE:
- val = transport->get_max_buffer_size(vsk);
+ val = vsk->buffer_max_size;
COPY_OUT(val);
break;
case SO_VM_SOCKETS_BUFFER_MIN_SIZE:
- val = transport->get_min_buffer_size(vsk);
+ val = vsk->buffer_min_size;
COPY_OUT(val);
break;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 261521d286d6..4f47af2054dd 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -843,36 +843,6 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
return 0;
}
-static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val)
-{
- /* Ignored. */
-}
-
-static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
-{
- /* Ignored. */
-}
-
-static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
-{
- /* Ignored. */
-}
-
-static u64 hvs_get_buffer_size(struct vsock_sock *vsk)
-{
- return -ENOPROTOOPT;
-}
-
-static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk)
-{
- return -ENOPROTOOPT;
-}
-
-static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk)
-{
- return -ENOPROTOOPT;
-}
-
static struct vsock_transport hvs_transport = {
.get_local_cid = hvs_get_local_cid,
@@ -906,12 +876,6 @@ static struct vsock_transport hvs_transport = {
.notify_send_pre_enqueue = hvs_notify_send_pre_enqueue,
.notify_send_post_enqueue = hvs_notify_send_post_enqueue,
- .set_buffer_size = hvs_set_buffer_size,
- .set_min_buffer_size = hvs_set_min_buffer_size,
- .set_max_buffer_size = hvs_set_max_buffer_size,
- .get_buffer_size = hvs_get_buffer_size,
- .get_min_buffer_size = hvs_get_min_buffer_size,
- .get_max_buffer_size = hvs_get_max_buffer_size,
};
static int hvs_probe(struct hv_device *hdev,
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 3756f0857946..fb1fc7760e8c 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -494,13 +494,7 @@ static struct virtio_transport virtio_transport = {
.notify_send_pre_block = virtio_transport_notify_send_pre_block,
.notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
-
- .set_buffer_size = virtio_transport_set_buffer_size,
- .set_min_buffer_size = virtio_transport_set_min_buffer_size,
- .set_max_buffer_size = virtio_transport_set_max_buffer_size,
- .get_buffer_size = virtio_transport_get_buffer_size,
- .get_min_buffer_size = virtio_transport_get_min_buffer_size,
- .get_max_buffer_size = virtio_transport_get_max_buffer_size,
+ .notify_buffer_size = virtio_transport_notify_buffer_size,
},
.send_pkt = virtio_transport_send_pkt,
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index fc046c071178..bac9e7430a2e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -403,17 +403,13 @@ int virtio_transport_do_socket_init(struct vsock_sock *vsk,
if (psk) {
struct virtio_vsock_sock *ptrans = psk->trans;
- vvs->buf_size = ptrans->buf_size;
- vvs->buf_size_min = ptrans->buf_size_min;
- vvs->buf_size_max = ptrans->buf_size_max;
vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
- } else {
- vvs->buf_size = VIRTIO_VSOCK_DEFAULT_BUF_SIZE;
- vvs->buf_size_min = VIRTIO_VSOCK_DEFAULT_MIN_BUF_SIZE;
- vvs->buf_size_max = VIRTIO_VSOCK_DEFAULT_MAX_BUF_SIZE;
}
- vvs->buf_alloc = vvs->buf_size;
+ if (vsk->buffer_size > VIRTIO_VSOCK_MAX_BUF_SIZE)
+ vsk->buffer_size = VIRTIO_VSOCK_MAX_BUF_SIZE;
+
+ vvs->buf_alloc = vsk->buffer_size;
spin_lock_init(&vvs->rx_lock);
spin_lock_init(&vvs->tx_lock);
@@ -423,68 +419,18 @@ int virtio_transport_do_socket_init(struct vsock_sock *vsk,
}
EXPORT_SYMBOL_GPL(virtio_transport_do_socket_init);
-u64 virtio_transport_get_buffer_size(struct vsock_sock *vsk)
-{
- struct virtio_vsock_sock *vvs = vsk->trans;
-
- return vvs->buf_size;
-}
-EXPORT_SYMBOL_GPL(virtio_transport_get_buffer_size);
-
-u64 virtio_transport_get_min_buffer_size(struct vsock_sock *vsk)
-{
- struct virtio_vsock_sock *vvs = vsk->trans;
-
- return vvs->buf_size_min;
-}
-EXPORT_SYMBOL_GPL(virtio_transport_get_min_buffer_size);
-
-u64 virtio_transport_get_max_buffer_size(struct vsock_sock *vsk)
-{
- struct virtio_vsock_sock *vvs = vsk->trans;
-
- return vvs->buf_size_max;
-}
-EXPORT_SYMBOL_GPL(virtio_transport_get_max_buffer_size);
-
-void virtio_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
-{
- struct virtio_vsock_sock *vvs = vsk->trans;
-
- if (val > VIRTIO_VSOCK_MAX_BUF_SIZE)
- val = VIRTIO_VSOCK_MAX_BUF_SIZE;
- if (val < vvs->buf_size_min)
- vvs->buf_size_min = val;
- if (val > vvs->buf_size_max)
- vvs->buf_size_max = val;
- vvs->buf_size = val;
- vvs->buf_alloc = val;
-}
-EXPORT_SYMBOL_GPL(virtio_transport_set_buffer_size);
-
-void virtio_transport_set_min_buffer_size(struct vsock_sock *vsk, u64 val)
+int virtio_transport_notify_buffer_size(struct vsock_sock *vsk, u64 *val)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- if (val > VIRTIO_VSOCK_MAX_BUF_SIZE)
- val = VIRTIO_VSOCK_MAX_BUF_SIZE;
- if (val > vvs->buf_size)
- vvs->buf_size = val;
- vvs->buf_size_min = val;
-}
-EXPORT_SYMBOL_GPL(virtio_transport_set_min_buffer_size);
+ if (*val > VIRTIO_VSOCK_MAX_BUF_SIZE)
+ *val = VIRTIO_VSOCK_MAX_BUF_SIZE;
-void virtio_transport_set_max_buffer_size(struct vsock_sock *vsk, u64 val)
-{
- struct virtio_vsock_sock *vvs = vsk->trans;
+ vvs->buf_alloc = *val;
- if (val > VIRTIO_VSOCK_MAX_BUF_SIZE)
- val = VIRTIO_VSOCK_MAX_BUF_SIZE;
- if (val < vvs->buf_size)
- vvs->buf_size = val;
- vvs->buf_size_max = val;
+ return 0;
}
-EXPORT_SYMBOL_GPL(virtio_transport_set_max_buffer_size);
+EXPORT_SYMBOL_GPL(virtio_transport_notify_buffer_size);
int
virtio_transport_notify_poll_in(struct vsock_sock *vsk,
@@ -576,9 +522,7 @@ EXPORT_SYMBOL_GPL(virtio_transport_notify_send_post_enqueue);
u64 virtio_transport_stream_rcvhiwat(struct vsock_sock *vsk)
{
- struct virtio_vsock_sock *vvs = vsk->trans;
-
- return vvs->buf_size;
+ return vsk->buffer_size;
}
EXPORT_SYMBOL_GPL(virtio_transport_stream_rcvhiwat);
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index f8e3131ac480..8290d37b6587 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -74,10 +74,6 @@ static u32 vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
static int PROTOCOL_OVERRIDE = -1;
-#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN 128
-#define VMCI_TRANSPORT_DEFAULT_QP_SIZE 262144
-#define VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX 262144
-
/* Helper function to convert from a VMCI error code to a VSock error code. */
static s32 vmci_transport_error_to_vsock_error(s32 vmci_error)
@@ -1025,11 +1021,11 @@ static int vmci_transport_recv_listen(struct sock *sk,
/* If the proposed size fits within our min/max, accept it. Otherwise
* propose our own size.
*/
- if (pkt->u.size >= vmci_trans(vpending)->queue_pair_min_size &&
- pkt->u.size <= vmci_trans(vpending)->queue_pair_max_size) {
+ if (pkt->u.size >= vpending->buffer_min_size &&
+ pkt->u.size <= vpending->buffer_max_size) {
qp_size = pkt->u.size;
} else {
- qp_size = vmci_trans(vpending)->queue_pair_size;
+ qp_size = vpending->buffer_size;
}
/* Figure out if we are using old or new requests based on the
@@ -1098,7 +1094,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
pending->sk_state = TCP_SYN_SENT;
vmci_trans(vpending)->produce_size =
vmci_trans(vpending)->consume_size = qp_size;
- vmci_trans(vpending)->queue_pair_size = qp_size;
+ vpending->buffer_size = qp_size;
vmci_trans(vpending)->notify_ops->process_request(pending);
@@ -1392,8 +1388,8 @@ static int vmci_transport_recv_connecting_client_negotiate(
vsk->ignore_connecting_rst = false;
/* Verify that we're OK with the proposed queue pair size */
- if (pkt->u.size < vmci_trans(vsk)->queue_pair_min_size ||
- pkt->u.size > vmci_trans(vsk)->queue_pair_max_size) {
+ if (pkt->u.size < vsk->buffer_min_size ||
+ pkt->u.size > vsk->buffer_max_size) {
err = -EINVAL;
goto destroy;
}
@@ -1498,8 +1494,7 @@ vmci_transport_recv_connecting_client_invalid(struct sock *sk,
vsk->sent_request = false;
vsk->ignore_connecting_rst = true;
- err = vmci_transport_send_conn_request(
- sk, vmci_trans(vsk)->queue_pair_size);
+ err = vmci_transport_send_conn_request(sk, vsk->buffer_size);
if (err < 0)
err = vmci_transport_error_to_vsock_error(err);
else
@@ -1583,21 +1578,6 @@ static int vmci_transport_socket_init(struct vsock_sock *vsk,
INIT_LIST_HEAD(&vmci_trans(vsk)->elem);
vmci_trans(vsk)->sk = &vsk->sk;
spin_lock_init(&vmci_trans(vsk)->lock);
- if (psk) {
- vmci_trans(vsk)->queue_pair_size =
- vmci_trans(psk)->queue_pair_size;
- vmci_trans(vsk)->queue_pair_min_size =
- vmci_trans(psk)->queue_pair_min_size;
- vmci_trans(vsk)->queue_pair_max_size =
- vmci_trans(psk)->queue_pair_max_size;
- } else {
- vmci_trans(vsk)->queue_pair_size =
- VMCI_TRANSPORT_DEFAULT_QP_SIZE;
- vmci_trans(vsk)->queue_pair_min_size =
- VMCI_TRANSPORT_DEFAULT_QP_SIZE_MIN;
- vmci_trans(vsk)->queue_pair_max_size =
- VMCI_TRANSPORT_DEFAULT_QP_SIZE_MAX;
- }
return 0;
}
@@ -1813,8 +1793,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
if (vmci_transport_old_proto_override(&old_pkt_proto) &&
old_pkt_proto) {
- err = vmci_transport_send_conn_request(
- sk, vmci_trans(vsk)->queue_pair_size);
+ err = vmci_transport_send_conn_request(sk, vsk->buffer_size);
if (err < 0) {
sk->sk_state = TCP_CLOSE;
return err;
@@ -1822,8 +1801,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
} else {
int supported_proto_versions =
vmci_transport_new_proto_supported_versions();
- err = vmci_transport_send_conn_request2(
- sk, vmci_trans(vsk)->queue_pair_size,
+ err = vmci_transport_send_conn_request2(sk, vsk->buffer_size,
supported_proto_versions);
if (err < 0) {
sk->sk_state = TCP_CLOSE;
@@ -1876,46 +1854,6 @@ static bool vmci_transport_stream_is_active(struct vsock_sock *vsk)
return !vmci_handle_is_invalid(vmci_trans(vsk)->qp_handle);
}
-static u64 vmci_transport_get_buffer_size(struct vsock_sock *vsk)
-{
- return vmci_trans(vsk)->queue_pair_size;
-}
-
-static u64 vmci_transport_get_min_buffer_size(struct vsock_sock *vsk)
-{
- return vmci_trans(vsk)->queue_pair_min_size;
-}
-
-static u64 vmci_transport_get_max_buffer_size(struct vsock_sock *vsk)
-{
- return vmci_trans(vsk)->queue_pair_max_size;
-}
-
-static void vmci_transport_set_buffer_size(struct vsock_sock *vsk, u64 val)
-{
- if (val < vmci_trans(vsk)->queue_pair_min_size)
- vmci_trans(vsk)->queue_pair_min_size = val;
- if (val > vmci_trans(vsk)->queue_pair_max_size)
- vmci_trans(vsk)->queue_pair_max_size = val;
- vmci_trans(vsk)->queue_pair_size = val;
-}
-
-static void vmci_transport_set_min_buffer_size(struct vsock_sock *vsk,
- u64 val)
-{
- if (val > vmci_trans(vsk)->queue_pair_size)
- vmci_trans(vsk)->queue_pair_size = val;
- vmci_trans(vsk)->queue_pair_min_size = val;
-}
-
-static void vmci_transport_set_max_buffer_size(struct vsock_sock *vsk,
- u64 val)
-{
- if (val < vmci_trans(vsk)->queue_pair_size)
- vmci_trans(vsk)->queue_pair_size = val;
- vmci_trans(vsk)->queue_pair_max_size = val;
-}
-
static int vmci_transport_notify_poll_in(
struct vsock_sock *vsk,
size_t target,
@@ -2098,12 +2036,6 @@ static const struct vsock_transport vmci_transport = {
.notify_send_pre_enqueue = vmci_transport_notify_send_pre_enqueue,
.notify_send_post_enqueue = vmci_transport_notify_send_post_enqueue,
.shutdown = vmci_transport_shutdown,
- .set_buffer_size = vmci_transport_set_buffer_size,
- .set_min_buffer_size = vmci_transport_set_min_buffer_size,
- .set_max_buffer_size = vmci_transport_set_max_buffer_size,
- .get_buffer_size = vmci_transport_get_buffer_size,
- .get_min_buffer_size = vmci_transport_get_min_buffer_size,
- .get_max_buffer_size = vmci_transport_get_max_buffer_size,
.get_local_cid = vmci_transport_get_local_cid,
};
diff --git a/net/vmw_vsock/vmci_transport.h b/net/vmw_vsock/vmci_transport.h
index 1ca1e8640b31..b7b072194282 100644
--- a/net/vmw_vsock/vmci_transport.h
+++ b/net/vmw_vsock/vmci_transport.h
@@ -108,9 +108,6 @@ struct vmci_transport {
struct vmci_qp *qpair;
u64 produce_size;
u64 consume_size;
- u64 queue_pair_size;
- u64 queue_pair_min_size;
- u64 queue_pair_max_size;
u32 detach_sub_id;
union vmci_transport_notify notify;
const struct vmci_transport_notify_ops *notify_ops;
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 08/13] vsock: move vsock_insert_unbound() in the vsock_create()
From: Stefano Garzarella @ 2019-09-27 11:26 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
vsock_insert_unbound() was called only when 'sock' parameter of
__vsock_create() was not null. This only happened when
__vsock_create() was called by vsock_create().
In order to simplify the multi-transports support, this patch
moves vsock_insert_unbound() at the end of vsock_create().
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
net/vmw_vsock/af_vsock.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dee69d7ee148..95e6db21e7e1 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -634,9 +634,6 @@ struct sock *__vsock_create(struct net *net,
return NULL;
}
- if (sock)
- vsock_insert_unbound(vsk);
-
return sk;
}
EXPORT_SYMBOL_GPL(__vsock_create);
@@ -1875,6 +1872,8 @@ static const struct proto_ops vsock_stream_ops = {
static int vsock_create(struct net *net, struct socket *sock,
int protocol, int kern)
{
+ struct sock *sk;
+
if (!sock)
return -EINVAL;
@@ -1894,7 +1893,13 @@ static int vsock_create(struct net *net, struct socket *sock,
sock->state = SS_UNCONNECTED;
- return __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern) ? 0 : -ENOMEM;
+ sk = __vsock_create(net, sock, NULL, GFP_KERNEL, 0, kern);
+ if (!sk)
+ return -ENOMEM;
+
+ vsock_insert_unbound(vsock_sk(sk));
+
+ return 0;
}
static const struct net_proto_family vsock_family_ops = {
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 10/13] vsock: add multi-transports support
From: Stefano Garzarella @ 2019-09-27 11:27 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
This patch adds the support of multiple transports in the
VSOCK core.
With the multi-transports support, we can use vsock with nested VMs
(using also different hypervisors) loading both guest->host and
host->guest transports at the same time.
Major changes:
- vsock core module can be loaded interdependently of the transports
- each 'struct virtio_transport' has a new feature fields
(H2G, G2H, DGRAM) to identify which directions the transport can
handle and if it's support DGRAM (only vmci)
- vsock_core_init() and vsock_core_exit() are renamed to
vsock_core_register() and vsock_core_unregister()
- each stream socket is assigned to a transport when the remote CID
is set (during the connect() or when we receive a connection request
on a listener socket).
The remote CID is used to decide which transport to use:
- remote CID > VMADDR_CID_HOST will use host->guest transport
- remote CID <= VMADDR_CID_HOST will use guest->host transport
- listener sockets are not bound to any transports since no transport
operations are done on it. In this way we can create a listener
socket, also if the transports are not loaded or with VMADDR_CID_ANY
to listen on all transports.
- DGRAM sockets are handled as before, since only the vmci_transport
provides this feature.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
RFC:
- I'd like to move MODULE_ALIAS_NETPROTO(PF_VSOCK) to af_vsock.c.
@Jorgen could this break the VMware products?
- DGRAM sockets are handled as before, I don't know if make sense work
on it now, or when another transport will support DGRAM. The big
issues here is that we cannot link 1-1 a socket to transport as
for stream sockets since DGRAM is not connection-oriented.
---
drivers/vhost/vsock.c | 6 +-
include/net/af_vsock.h | 15 +-
net/vmw_vsock/af_vsock.c | 240 ++++++++++++++++++------
net/vmw_vsock/hyperv_transport.c | 28 ++-
net/vmw_vsock/virtio_transport.c | 8 +-
net/vmw_vsock/virtio_transport_common.c | 28 ++-
net/vmw_vsock/vmci_transport.c | 31 ++-
7 files changed, 275 insertions(+), 81 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 6d7e4f022748..375af01a5b64 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -386,6 +386,8 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
static struct virtio_transport vhost_transport = {
.transport = {
+ .features = VSOCK_TRANSPORT_F_H2G,
+
.get_local_cid = vhost_transport_get_local_cid,
.init = virtio_transport_do_socket_init,
@@ -831,7 +833,7 @@ static int __init vhost_vsock_init(void)
{
int ret;
- ret = vsock_core_init(&vhost_transport.transport);
+ ret = vsock_core_register(&vhost_transport.transport);
if (ret < 0)
return ret;
return misc_register(&vhost_vsock_misc);
@@ -840,7 +842,7 @@ static int __init vhost_vsock_init(void)
static void __exit vhost_vsock_exit(void)
{
misc_deregister(&vhost_vsock_misc);
- vsock_core_exit();
+ vsock_core_unregister(&vhost_transport.transport);
};
module_init(vhost_vsock_init);
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 86f8f463e01a..2a081d19e20d 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -94,7 +94,13 @@ struct vsock_transport_send_notify_data {
u64 data2; /* Transport-defined. */
};
+#define VSOCK_TRANSPORT_F_H2G 0x00000001
+#define VSOCK_TRANSPORT_F_G2H 0x00000002
+#define VSOCK_TRANSPORT_F_DGRAM 0x00000004
+
struct vsock_transport {
+ uint64_t features;
+
/* Initialize/tear-down socket. */
int (*init)(struct vsock_sock *, struct vsock_sock *);
void (*destruct)(struct vsock_sock *);
@@ -156,12 +162,8 @@ struct vsock_transport {
/**** CORE ****/
-int __vsock_core_init(const struct vsock_transport *t, struct module *owner);
-static inline int vsock_core_init(const struct vsock_transport *t)
-{
- return __vsock_core_init(t, THIS_MODULE);
-}
-void vsock_core_exit(void);
+int vsock_core_register(const struct vsock_transport *t);
+void vsock_core_unregister(const struct vsock_transport *t);
/* The transport may downcast this to access transport-specific functions */
const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk);
@@ -192,6 +194,7 @@ struct sock *vsock_find_connected_socket(struct sockaddr_vm *src,
struct sockaddr_vm *dst);
void vsock_remove_sock(struct vsock_sock *vsk);
void vsock_for_each_connected_socket(void (*fn)(struct sock *sk));
+int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk);
/**** TAP ****/
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 95e6db21e7e1..c52203fe52c4 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -130,7 +130,12 @@ static struct proto vsock_proto = {
#define VSOCK_DEFAULT_BUFFER_MAX_SIZE (1024 * 256)
#define VSOCK_DEFAULT_BUFFER_MIN_SIZE 128
-static const struct vsock_transport *transport_single;
+/* Transport used for host->guest communication */
+static const struct vsock_transport *transport_h2g;
+/* Transport used for guest->host communication */
+static const struct vsock_transport *transport_g2h;
+/* Transport used for DGRAM communication */
+static const struct vsock_transport *transport_dgram;
static DEFINE_MUTEX(vsock_register_mutex);
/**** UTILS ****/
@@ -182,7 +187,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk)
return __vsock_bind(sk, &local_addr);
}
-static int __init vsock_init_tables(void)
+static void vsock_init_tables(void)
{
int i;
@@ -191,7 +196,6 @@ static int __init vsock_init_tables(void)
for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++)
INIT_LIST_HEAD(&vsock_connected_table[i]);
- return 0;
}
static void __vsock_insert_bound(struct list_head *list,
@@ -376,6 +380,55 @@ void vsock_enqueue_accept(struct sock *listener, struct sock *connected)
}
EXPORT_SYMBOL_GPL(vsock_enqueue_accept);
+/* Assign a transport to a socket and call the .init transport callback.
+ *
+ * Note: for stream socket this must be called when vsk->remote_addr is set
+ * (e.g. during the connect() or when a connection request on a listener
+ * socket is received).
+ * The vsk->remote_addr is used to decide which transport to use:
+ * - remote CID > VMADDR_CID_HOST will use host->guest transport
+ * - remote CID <= VMADDR_CID_HOST will use guest->host transport
+ */
+int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
+{
+ struct sock *sk = sk_vsock(vsk);
+ /* RFC-TODO: should vsk->transport be already assigned?
+ * How to handle?
+ */
+ WARN_ON(vsk->transport);
+
+ switch (sk->sk_type) {
+ case SOCK_DGRAM:
+ vsk->transport = transport_dgram;
+ break;
+ case SOCK_STREAM:
+ if (vsk->remote_addr.svm_cid > VMADDR_CID_HOST)
+ vsk->transport = transport_h2g;
+ else
+ vsk->transport = transport_g2h;
+ break;
+ default:
+ return -ESOCKTNOSUPPORT;
+ }
+
+ if (!vsk->transport)
+ return -ENODEV;
+
+ return vsk->transport->init(vsk, psk);
+}
+EXPORT_SYMBOL_GPL(vsock_assign_transport);
+
+static bool vsock_find_cid(unsigned int cid)
+{
+ if (transport_g2h && cid == transport_g2h->get_local_cid())
+ return true;
+
+ if (transport_h2g && cid == VMADDR_CID_HOST)
+ return true;
+
+ return false;
+}
+
static struct sock *vsock_dequeue_accept(struct sock *listener)
{
struct vsock_sock *vlistener;
@@ -414,6 +467,9 @@ static int vsock_send_shutdown(struct sock *sk, int mode)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ if (!vsk->transport)
+ return -ENODEV;
+
return vsk->transport->shutdown(vsk, mode);
}
@@ -530,7 +586,6 @@ static int __vsock_bind_dgram(struct vsock_sock *vsk,
static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
{
struct vsock_sock *vsk = vsock_sk(sk);
- u32 cid;
int retval;
/* First ensure this socket isn't already bound. */
@@ -540,10 +595,9 @@ static int __vsock_bind(struct sock *sk, struct sockaddr_vm *addr)
/* Now bind to the provided address or select appropriate values if
* none are provided (VMADDR_CID_ANY and VMADDR_PORT_ANY). Note that
* like AF_INET prevents binding to a non-local IP address (in most
- * cases), we only allow binding to the local CID.
+ * cases), we only allow binding to a local CID.
*/
- cid = vsk->transport->get_local_cid();
- if (addr->svm_cid != cid && addr->svm_cid != VMADDR_CID_ANY)
+ if (addr->svm_cid != VMADDR_CID_ANY && !vsock_find_cid(addr->svm_cid))
return -EADDRNOTAVAIL;
switch (sk->sk_socket->type) {
@@ -592,7 +646,6 @@ struct sock *__vsock_create(struct net *net,
sk->sk_type = type;
vsk = vsock_sk(sk);
- vsk->transport = transport_single;
vsock_addr_init(&vsk->local_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
vsock_addr_init(&vsk->remote_addr, VMADDR_CID_ANY, VMADDR_PORT_ANY);
@@ -629,11 +682,6 @@ struct sock *__vsock_create(struct net *net,
vsk->buffer_max_size = VSOCK_DEFAULT_BUFFER_MAX_SIZE;
}
- if (vsk->transport->init(vsk, psk) < 0) {
- sk_free(sk);
- return NULL;
- }
-
return sk;
}
EXPORT_SYMBOL_GPL(__vsock_create);
@@ -648,7 +696,10 @@ static void __vsock_release(struct sock *sk)
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
- vsk->transport->release(vsk);
+ if (vsk->transport)
+ vsk->transport->release(vsk);
+ else if (sk->sk_type == SOCK_STREAM)
+ vsock_remove_sock(vsk);
lock_sock(sk);
sock_orphan(sk);
@@ -672,7 +723,8 @@ static void vsock_sk_destruct(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
- vsk->transport->destruct(vsk);
+ if (vsk->transport)
+ vsk->transport->destruct(vsk);
/* When clearing these addresses, there's no need to set the family and
* possibly register the address family with the kernel.
@@ -882,7 +934,7 @@ static __poll_t vsock_poll(struct file *file, struct socket *sock,
mask |= EPOLLIN | EPOLLRDNORM;
/* If there is something in the queue then we can read. */
- if (transport->stream_is_active(vsk) &&
+ if (transport && transport->stream_is_active(vsk) &&
!(sk->sk_shutdown & RCV_SHUTDOWN)) {
bool data_ready_now = false;
int ret = transport->notify_poll_in(
@@ -1132,7 +1184,6 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
err = 0;
sk = sock->sk;
vsk = vsock_sk(sk);
- transport = vsk->transport;
lock_sock(sk);
@@ -1160,19 +1211,26 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
goto out;
}
+ /* Set the remote address that we are connecting to. */
+ memcpy(&vsk->remote_addr, remote_addr,
+ sizeof(vsk->remote_addr));
+
+ err = vsock_assign_transport(vsk, NULL);
+ if (err)
+ goto out;
+
+ transport = vsk->transport;
+
/* The hypervisor and well-known contexts do not have socket
* endpoints.
*/
- if (!transport->stream_allow(remote_addr->svm_cid,
+ if (!transport ||
+ !transport->stream_allow(remote_addr->svm_cid,
remote_addr->svm_port)) {
err = -ENETUNREACH;
goto out;
}
- /* Set the remote address that we are connecting to. */
- memcpy(&vsk->remote_addr, remote_addr,
- sizeof(vsk->remote_addr));
-
err = vsock_auto_bind(vsk);
if (err)
goto out;
@@ -1572,7 +1630,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}
- if (sk->sk_state != TCP_ESTABLISHED ||
+ if (!transport || sk->sk_state != TCP_ESTABLISHED ||
!vsock_addr_bound(&vsk->local_addr)) {
err = -ENOTCONN;
goto out;
@@ -1698,7 +1756,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
lock_sock(sk);
- if (sk->sk_state != TCP_ESTABLISHED) {
+ if (!transport || sk->sk_state != TCP_ESTABLISHED) {
/* Recvmsg is supposed to return 0 if a peer performs an
* orderly shutdown. Differentiate between that case and when a
* peer has not connected or a local shutdown occured with the
@@ -1872,7 +1930,9 @@ static const struct proto_ops vsock_stream_ops = {
static int vsock_create(struct net *net, struct socket *sock,
int protocol, int kern)
{
+ struct vsock_sock *vsk;
struct sock *sk;
+ int ret;
if (!sock)
return -EINVAL;
@@ -1897,7 +1957,20 @@ static int vsock_create(struct net *net, struct socket *sock,
if (!sk)
return -ENOMEM;
- vsock_insert_unbound(vsock_sk(sk));
+ vsk = vsock_sk(sk);
+
+ /* RFC-TODO: for dgram we still support only one transport, and
+ * we assign it during the sock creation.
+ */
+ if (sock->type == SOCK_DGRAM) {
+ ret = vsock_assign_transport(vsk, NULL);
+ if (ret < 0) {
+ sock_put(sk);
+ return ret;
+ }
+ }
+
+ vsock_insert_unbound(vsk);
return 0;
}
@@ -1912,11 +1985,20 @@ static long vsock_dev_do_ioctl(struct file *filp,
unsigned int cmd, void __user *ptr)
{
u32 __user *p = ptr;
+ u32 cid = VMADDR_CID_ANY;
int retval = 0;
switch (cmd) {
case IOCTL_VM_SOCKETS_GET_LOCAL_CID:
- if (put_user(transport_single->get_local_cid(), p) != 0)
+ /* To be compatible with the VMCI behavior, we prioritize the
+ * guest CID instead of well-know host CID (VMADDR_CID_HOST).
+ */
+ if (transport_g2h)
+ cid = transport_g2h->get_local_cid();
+ else if (transport_h2g)
+ cid = transport_h2g->get_local_cid();
+
+ if (put_user(cid, p) != 0)
retval = -EFAULT;
break;
@@ -1956,24 +2038,13 @@ static struct miscdevice vsock_device = {
.fops = &vsock_device_ops,
};
-int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
+static int __init vsock_init(void)
{
- int err = mutex_lock_interruptible(&vsock_register_mutex);
+ int err = 0;
- if (err)
- return err;
-
- if (transport_single) {
- err = -EBUSY;
- goto err_busy;
- }
-
- /* Transport must be the owner of the protocol so that it can't
- * unload while there are open sockets.
- */
- vsock_proto.owner = owner;
- transport_single = t;
+ vsock_init_tables();
+ vsock_proto.owner = THIS_MODULE;
vsock_device.minor = MISC_DYNAMIC_MINOR;
err = misc_register(&vsock_device);
if (err) {
@@ -1994,7 +2065,6 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
goto err_unregister_proto;
}
- mutex_unlock(&vsock_register_mutex);
return 0;
err_unregister_proto:
@@ -2002,28 +2072,15 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner)
err_deregister_misc:
misc_deregister(&vsock_device);
err_reset_transport:
- transport_single = NULL;
-err_busy:
- mutex_unlock(&vsock_register_mutex);
return err;
}
-EXPORT_SYMBOL_GPL(__vsock_core_init);
-void vsock_core_exit(void)
+static void __exit vsock_exit(void)
{
- mutex_lock(&vsock_register_mutex);
-
misc_deregister(&vsock_device);
sock_unregister(AF_VSOCK);
proto_unregister(&vsock_proto);
-
- /* We do not want the assignment below re-ordered. */
- mb();
- transport_single = NULL;
-
- mutex_unlock(&vsock_register_mutex);
}
-EXPORT_SYMBOL_GPL(vsock_core_exit);
const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
{
@@ -2034,12 +2091,77 @@ const struct vsock_transport *vsock_core_get_transport(struct vsock_sock *vsk)
}
EXPORT_SYMBOL_GPL(vsock_core_get_transport);
-static void __exit vsock_exit(void)
+int vsock_core_register(const struct vsock_transport *t)
{
- /* Do nothing. This function makes this module removable. */
+ const struct vsock_transport *t_h2g, *t_g2h, *t_dgram;
+ int err = mutex_lock_interruptible(&vsock_register_mutex);
+
+ if (err)
+ return err;
+
+ t_h2g = transport_h2g;
+ t_g2h = transport_g2h;
+ t_dgram = transport_dgram;
+
+ /* RFC-TODO: vmci transport offer both H2G and G2H features in the
+ * same transport. We are able to set the G2H feature only if we are
+ * in a VMware guest, but we are not able to do the same for the host.
+ */
+ if (t->features & VSOCK_TRANSPORT_F_H2G) {
+ if (t_h2g) {
+ err = -EBUSY;
+ goto err_busy;
+ }
+ t_h2g = t;
+ }
+
+ if (t->features & VSOCK_TRANSPORT_F_G2H) {
+ if (t_g2h) {
+ err = -EBUSY;
+ goto err_busy;
+ }
+ t_g2h = t;
+ }
+
+ if (t->features & VSOCK_TRANSPORT_F_DGRAM) {
+ if (t_dgram) {
+ err = -EBUSY;
+ goto err_busy;
+ }
+ t_dgram = t;
+ }
+
+ transport_h2g = t_h2g;
+ transport_g2h = t_g2h;
+ transport_dgram = t_dgram;
+
+err_busy:
+ mutex_unlock(&vsock_register_mutex);
+ return err;
+}
+EXPORT_SYMBOL_GPL(vsock_core_register);
+
+void vsock_core_unregister(const struct vsock_transport *t)
+{
+ mutex_lock(&vsock_register_mutex);
+
+ /* RFC-TODO: maybe we should check if there are open sockets
+ * assigned to that transport and avoid the unregistration
+ */
+ if (transport_h2g == t)
+ transport_h2g = NULL;
+
+ if (transport_g2h == t)
+ transport_g2h = NULL;
+
+ if (transport_dgram == t)
+ transport_dgram = NULL;
+
+ mutex_unlock(&vsock_register_mutex);
}
+EXPORT_SYMBOL_GPL(vsock_core_unregister);
-module_init(vsock_init_tables);
+module_init(vsock_init);
module_exit(vsock_exit);
MODULE_AUTHOR("VMware, Inc.");
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 306310794522..94e6fc905a77 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -163,6 +163,8 @@ static const guid_t srv_id_template =
GUID_INIT(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58,
0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3);
+static bool hvs_check_transport(struct vsock_sock *vsk);
+
static bool is_valid_srv_id(const guid_t *id)
{
return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(guid_t) - 4);
@@ -366,6 +368,18 @@ static void hvs_open_connection(struct vmbus_channel *chan)
new->sk_state = TCP_SYN_SENT;
vnew = vsock_sk(new);
+
+ hvs_addr_init(&vnew->local_addr, if_type);
+ hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
+
+ ret = vsock_assign_transport(vnew, vsock_sk(sk));
+ /* Transport assigned (looking at remote_addr) must be the
+ * same where we received the request.
+ */
+ if (ret || !hvs_check_transport(vnew)) {
+ sock_put(new);
+ goto out;
+ }
hvs_new = vnew->trans;
hvs_new->chan = chan;
} else {
@@ -429,9 +443,6 @@ static void hvs_open_connection(struct vmbus_channel *chan)
new->sk_state = TCP_ESTABLISHED;
sk->sk_ack_backlog++;
- hvs_addr_init(&vnew->local_addr, if_type);
- hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr);
-
hvs_new->vm_srv_id = *if_type;
hvs_new->host_srv_id = *if_instance;
@@ -845,6 +856,8 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
}
static struct vsock_transport hvs_transport = {
+ .features = VSOCK_TRANSPORT_F_G2H,
+
.get_local_cid = hvs_get_local_cid,
.init = hvs_sock_init,
@@ -879,6 +892,11 @@ static struct vsock_transport hvs_transport = {
};
+static bool hvs_check_transport(struct vsock_sock *vsk)
+{
+ return vsk->transport == &hvs_transport;
+}
+
static int hvs_probe(struct hv_device *hdev,
const struct hv_vmbus_device_id *dev_id)
{
@@ -927,7 +945,7 @@ static int __init hvs_init(void)
if (ret != 0)
return ret;
- ret = vsock_core_init(&hvs_transport);
+ ret = vsock_core_register(&hvs_transport);
if (ret) {
vmbus_driver_unregister(&hvs_drv);
return ret;
@@ -938,7 +956,7 @@ static int __init hvs_init(void)
static void __exit hvs_exit(void)
{
- vsock_core_exit();
+ vsock_core_unregister(&hvs_transport);
vmbus_driver_unregister(&hvs_drv);
}
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index fb1fc7760e8c..0ff037ef7f8e 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -462,6 +462,8 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
static struct virtio_transport virtio_transport = {
.transport = {
+ .features = VSOCK_TRANSPORT_F_G2H,
+
.get_local_cid = virtio_transport_get_local_cid,
.init = virtio_transport_do_socket_init,
@@ -770,7 +772,7 @@ static int __init virtio_vsock_init(void)
if (!virtio_vsock_workqueue)
return -ENOMEM;
- ret = vsock_core_init(&virtio_transport.transport);
+ ret = vsock_core_register(&virtio_transport.transport);
if (ret)
goto out_wq;
@@ -781,7 +783,7 @@ static int __init virtio_vsock_init(void)
return 0;
out_vci:
- vsock_core_exit();
+ vsock_core_unregister(&virtio_transport.transport);
out_wq:
destroy_workqueue(virtio_vsock_workqueue);
return ret;
@@ -790,7 +792,7 @@ static int __init virtio_vsock_init(void)
static void __exit virtio_vsock_exit(void)
{
unregister_virtio_driver(&virtio_vsock_driver);
- vsock_core_exit();
+ vsock_core_unregister(&virtio_transport.transport);
destroy_workqueue(virtio_vsock_workqueue);
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index bac9e7430a2e..ebb4701310a4 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -400,7 +400,7 @@ int virtio_transport_do_socket_init(struct vsock_sock *vsk,
vsk->trans = vvs;
vvs->vsk = vsk;
- if (psk) {
+ if (psk && psk->trans) {
struct virtio_vsock_sock *ptrans = psk->trans;
vvs->peer_buf_alloc = ptrans->peer_buf_alloc;
@@ -927,11 +927,13 @@ virtio_transport_send_response(struct vsock_sock *vsk,
/* Handle server socket */
static int
-virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
+virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
+ struct virtio_transport *t)
{
struct vsock_sock *vsk = vsock_sk(sk);
struct vsock_sock *vchild;
struct sock *child;
+ int ret;
if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
virtio_transport_reset(vsk, pkt);
@@ -962,6 +964,17 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
le32_to_cpu(pkt->hdr.src_port));
+ ret = vsock_assign_transport(vchild, vsk);
+ /* Transport assigned (looking at remote_addr) must be the same
+ * where we received the request.
+ */
+ if (ret || vchild->transport != &t->transport) {
+ release_sock(child);
+ virtio_transport_reset(vsk, pkt);
+ sock_put(child);
+ return ret;
+ }
+
vsock_insert_connected(vchild);
vsock_enqueue_accept(sk, child);
virtio_transport_send_response(vchild, pkt);
@@ -979,6 +992,14 @@ static bool virtio_transport_space_update(struct sock *sk,
struct virtio_vsock_sock *vvs = vsk->trans;
bool space_available;
+ /* Listener sockets are not associated with any transport, so we are
+ * not able to take the state to see if there is space available in the
+ * remote peer, but since they are only used to receive requests, we
+ * can assume that there is always space available in the other peer.
+ */
+ if (!vvs)
+ return true;
+
/* buf_alloc and fwd_cnt is always included in the hdr */
spin_lock_bh(&vvs->tx_lock);
vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
@@ -1044,7 +1065,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
switch (sk->sk_state) {
case TCP_LISTEN:
- virtio_transport_recv_listen(sk, pkt);
+ virtio_transport_recv_listen(sk, pkt, t);
virtio_transport_free_pkt(pkt);
break;
case TCP_SYN_SENT:
@@ -1062,6 +1083,7 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
virtio_transport_free_pkt(pkt);
break;
}
+
release_sock(sk);
/* Release refcnt obtained when we fetched this socket out of the
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 8290d37b6587..52e63952d0d4 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -57,6 +57,7 @@ static bool vmci_transport_old_proto_override(bool *old_pkt_proto);
static u16 vmci_transport_new_proto_supported_versions(void);
static bool vmci_transport_proto_to_notify_struct(struct sock *sk, u16 *proto,
bool old_pkt_proto);
+static bool vmci_check_transport(struct vsock_sock *vsk);
struct vmci_transport_recv_pkt_info {
struct work_struct work;
@@ -1018,6 +1019,15 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_addr_init(&vpending->remote_addr, pkt->dg.src.context,
pkt->src_port);
+ err = vsock_assign_transport(vpending, vsock_sk(sk));
+ /* Transport assigned (looking at remote_addr) must be the same
+ * where we received the request.
+ */
+ if (err || !vmci_check_transport(vpending)) {
+ sock_put(pending);
+ return err;
+ }
+
/* If the proposed size fits within our min/max, accept it. Otherwise
* propose our own size.
*/
@@ -2009,7 +2019,8 @@ static u32 vmci_transport_get_local_cid(void)
return vmci_get_context_id();
}
-static const struct vsock_transport vmci_transport = {
+static struct vsock_transport vmci_transport = {
+ .features = VSOCK_TRANSPORT_F_DGRAM | VSOCK_TRANSPORT_F_H2G,
.init = vmci_transport_socket_init,
.destruct = vmci_transport_destruct,
.release = vmci_transport_release,
@@ -2039,10 +2050,24 @@ static const struct vsock_transport vmci_transport = {
.get_local_cid = vmci_transport_get_local_cid,
};
+static bool vmci_check_transport(struct vsock_sock *vsk)
+{
+ return vsk->transport == &vmci_transport;
+}
+
static int __init vmci_transport_init(void)
{
+ int cid;
int err;
+ cid = vmci_get_context_id();
+
+ if (cid == VMCI_INVALID_ID)
+ return -EINVAL;
+
+ if (cid != VMCI_HOST_CONTEXT_ID)
+ vmci_transport.features |= VSOCK_TRANSPORT_F_G2H;
+
/* Create the datagram handle that we will use to send and receive all
* VSocket control messages for this context.
*/
@@ -2066,7 +2091,7 @@ static int __init vmci_transport_init(void)
goto err_destroy_stream_handle;
}
- err = vsock_core_init(&vmci_transport);
+ err = vsock_core_register(&vmci_transport);
if (err < 0)
goto err_unsubscribe;
@@ -2097,7 +2122,7 @@ static void __exit vmci_transport_exit(void)
vmci_transport_qp_resumed_sub_id = VMCI_INVALID_ID;
}
- vsock_core_exit();
+ vsock_core_unregister(&vmci_transport);
}
module_exit(vmci_transport_exit);
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 11/13] vsock: add 'transport_hg' to handle g2h\h2g transports
From: Stefano Garzarella @ 2019-09-27 11:27 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
VMCI transport provides both g2h and h2g behaviors in a single
transport.
We are able to set (or not) the g2h behavior, detecting if we
are in a VMware guest (or not), but the h2g feature is always set.
This prevents to load other h2g transports while we are in a
VMware guest.
This patch adds a new 'transport_hg' to handle this case, reducing
the priority of transports that provide both g2h and h2g
behaviors. A transport that has g2h and h2g features, can be
bypassed by a transport that has only the h2g feature.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
net/vmw_vsock/af_vsock.c | 29 ++++++++++++++++++++++++-----
1 file changed, 24 insertions(+), 5 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c52203fe52c4..c5f46b8242ce 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -134,6 +134,8 @@ static struct proto vsock_proto = {
static const struct vsock_transport *transport_h2g;
/* Transport used for guest->host communication */
static const struct vsock_transport *transport_g2h;
+/* Transport used for both host->guest and guest->host communication */
+static const struct vsock_transport *transport_hg;
/* Transport used for DGRAM communication */
static const struct vsock_transport *transport_dgram;
static DEFINE_MUTEX(vsock_register_mutex);
@@ -402,10 +404,13 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
vsk->transport = transport_dgram;
break;
case SOCK_STREAM:
- if (vsk->remote_addr.svm_cid > VMADDR_CID_HOST)
+ if (vsk->remote_addr.svm_cid > VMADDR_CID_HOST) {
vsk->transport = transport_h2g;
- else
+ if (!vsk->transport)
+ vsk->transport = transport_hg;
+ } else {
vsk->transport = transport_g2h;
+ }
break;
default:
return -ESOCKTNOSUPPORT;
@@ -423,7 +428,7 @@ static bool vsock_find_cid(unsigned int cid)
if (transport_g2h && cid == transport_g2h->get_local_cid())
return true;
- if (transport_h2g && cid == VMADDR_CID_HOST)
+ if ((transport_h2g || transport_hg) && cid == VMADDR_CID_HOST)
return true;
return false;
@@ -1997,6 +2002,8 @@ static long vsock_dev_do_ioctl(struct file *filp,
cid = transport_g2h->get_local_cid();
else if (transport_h2g)
cid = transport_h2g->get_local_cid();
+ else if (transport_hg)
+ cid = transport_hg->get_local_cid();
if (put_user(cid, p) != 0)
retval = -EFAULT;
@@ -2093,13 +2100,14 @@ EXPORT_SYMBOL_GPL(vsock_core_get_transport);
int vsock_core_register(const struct vsock_transport *t)
{
- const struct vsock_transport *t_h2g, *t_g2h, *t_dgram;
+ const struct vsock_transport *t_h2g, *t_hg, *t_g2h, *t_dgram;
int err = mutex_lock_interruptible(&vsock_register_mutex);
if (err)
return err;
t_h2g = transport_h2g;
+ t_hg = transport_hg;
t_g2h = transport_g2h;
t_dgram = transport_dgram;
@@ -2107,7 +2115,14 @@ int vsock_core_register(const struct vsock_transport *t)
* same transport. We are able to set the G2H feature only if we are
* in a VMware guest, but we are not able to do the same for the host.
*/
- if (t->features & VSOCK_TRANSPORT_F_H2G) {
+ if ((t->features & VSOCK_TRANSPORT_F_H2G) &&
+ (t->features & VSOCK_TRANSPORT_F_G2H)) {
+ if (t_hg) {
+ err = -EBUSY;
+ goto err_busy;
+ }
+ t_hg = t;
+ } else if (t->features & VSOCK_TRANSPORT_F_H2G) {
if (t_h2g) {
err = -EBUSY;
goto err_busy;
@@ -2132,6 +2147,7 @@ int vsock_core_register(const struct vsock_transport *t)
}
transport_h2g = t_h2g;
+ transport_hg = t_hg;
transport_g2h = t_g2h;
transport_dgram = t_dgram;
@@ -2151,6 +2167,9 @@ void vsock_core_unregister(const struct vsock_transport *t)
if (transport_h2g == t)
transport_h2g = NULL;
+ if (transport_hg == t)
+ transport_hg = NULL;
+
if (transport_g2h == t)
transport_g2h = NULL;
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 12/13] vsock: prevent transport modules unloading
From: Stefano Garzarella @ 2019-09-27 11:27 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
This patch adds 'module' member in the 'struct vsock_transport'
in order to get/put the transport module. This prevents the
module unloading while sockets are assigned to it.
We increase the module refcnt when a socket is assigned to a
transport, and we decrease the module refcnt when the socket
is destructed.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
drivers/vhost/vsock.c | 1 +
include/net/af_vsock.h | 1 +
net/vmw_vsock/af_vsock.c | 23 +++++++++++++++++------
net/vmw_vsock/hyperv_transport.c | 1 +
net/vmw_vsock/virtio_transport.c | 1 +
net/vmw_vsock/vmci_transport.c | 1 +
6 files changed, 22 insertions(+), 6 deletions(-)
diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 375af01a5b64..6d7a8fc9eb63 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -387,6 +387,7 @@ static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
static struct virtio_transport vhost_transport = {
.transport = {
.features = VSOCK_TRANSPORT_F_H2G,
+ .module = THIS_MODULE,
.get_local_cid = vhost_transport_get_local_cid,
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 2a081d19e20d..f10fa918bf23 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -100,6 +100,7 @@ struct vsock_transport_send_notify_data {
struct vsock_transport {
uint64_t features;
+ struct module *module;
/* Initialize/tear-down socket. */
int (*init)(struct vsock_sock *, struct vsock_sock *);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c5f46b8242ce..750b62711b01 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -416,13 +416,28 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
return -ESOCKTNOSUPPORT;
}
- if (!vsk->transport)
+ /* We increase the module refcnt to prevent the tranport unloading
+ * while there are open sockets assigned to it.
+ */
+ if (!vsk->transport || !try_module_get(vsk->transport->module)) {
+ vsk->transport = NULL;
return -ENODEV;
+ }
return vsk->transport->init(vsk, psk);
}
EXPORT_SYMBOL_GPL(vsock_assign_transport);
+static void vsock_deassign_transport(struct vsock_sock *vsk)
+{
+ if (!vsk->transport)
+ return;
+
+ vsk->transport->destruct(vsk);
+ module_put(vsk->transport->module);
+ vsk->transport = NULL;
+}
+
static bool vsock_find_cid(unsigned int cid)
{
if (transport_g2h && cid == transport_g2h->get_local_cid())
@@ -728,8 +743,7 @@ static void vsock_sk_destruct(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
- if (vsk->transport)
- vsk->transport->destruct(vsk);
+ vsock_deassign_transport(vsk);
/* When clearing these addresses, there's no need to set the family and
* possibly register the address family with the kernel.
@@ -2161,9 +2175,6 @@ void vsock_core_unregister(const struct vsock_transport *t)
{
mutex_lock(&vsock_register_mutex);
- /* RFC-TODO: maybe we should check if there are open sockets
- * assigned to that transport and avoid the unregistration
- */
if (transport_h2g == t)
transport_h2g = NULL;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 94e6fc905a77..bd4f3c222904 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -857,6 +857,7 @@ int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written,
static struct vsock_transport hvs_transport = {
.features = VSOCK_TRANSPORT_F_G2H,
+ .module = THIS_MODULE,
.get_local_cid = hvs_get_local_cid,
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 0ff037ef7f8e..439fe01e6691 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -463,6 +463,7 @@ static void virtio_vsock_rx_done(struct virtqueue *vq)
static struct virtio_transport virtio_transport = {
.transport = {
.features = VSOCK_TRANSPORT_F_G2H,
+ .module = THIS_MODULE,
.get_local_cid = virtio_transport_get_local_cid,
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 52e63952d0d4..900392686c03 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -2021,6 +2021,7 @@ static u32 vmci_transport_get_local_cid(void)
static struct vsock_transport vmci_transport = {
.features = VSOCK_TRANSPORT_F_DGRAM | VSOCK_TRANSPORT_F_H2G,
+ .module = THIS_MODULE,
.init = vmci_transport_socket_init,
.destruct = vmci_transport_destruct,
.release = vmci_transport_release,
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 13/13] vsock: fix bind() behaviour taking care of CID
From: Stefano Garzarella @ 2019-09-27 11:27 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
When we are looking for a socket bound to a specific address,
we also have to take into account the CID.
This patch is useful with multi-transports support because it
allows the binding of the same port with different CID, and
it prevents a connection to a wrong socket bound to the same
port, but with different CID.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
net/vmw_vsock/af_vsock.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 750b62711b01..bffa6fa7b8e5 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -230,10 +230,16 @@ static struct sock *__vsock_find_bound_socket(struct sockaddr_vm *addr)
{
struct vsock_sock *vsk;
- list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table)
- if (addr->svm_port == vsk->local_addr.svm_port)
+ list_for_each_entry(vsk, vsock_bound_sockets(addr), bound_table) {
+ if (vsock_addr_equals_addr(addr, &vsk->local_addr))
return sk_vsock(vsk);
+ if (addr->svm_port == vsk->local_addr.svm_port &&
+ (vsk->local_addr.svm_cid == VMADDR_CID_ANY ||
+ addr->svm_cid == VMADDR_CID_ANY))
+ return sk_vsock(vsk);
+ }
+
return NULL;
}
--
2.21.0
^ permalink raw reply related
* [RFC PATCH 09/13] hv_sock: set VMADDR_CID_HOST in the hvs_remote_addr_init()
From: Stefano Garzarella @ 2019-09-27 11:26 UTC (permalink / raw)
To: netdev
Cc: linux-hyperv, K. Y. Srinivasan, Stefan Hajnoczi, Sasha Levin,
linux-kernel, kvm, David S. Miller, virtualization,
Stephen Hemminger, Jason Wang, Michael S. Tsirkin, Haiyang Zhang,
Dexuan Cui, Jorgen Hansen
In-Reply-To: <20190927112703.17745-1-sgarzare@redhat.com>
Remote peer is always the host, so we set VMADDR_CID_HOST as
remote CID instead of VMADDR_CID_ANY.
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
---
net/vmw_vsock/hyperv_transport.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 4f47af2054dd..306310794522 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -186,7 +186,8 @@ static void hvs_remote_addr_init(struct sockaddr_vm *remote,
static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT;
struct sock *sk;
- vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY);
+ /* Remote peer is always the host */
+ vsock_addr_init(remote, VMADDR_CID_HOST, VMADDR_PORT_ANY);
while (1) {
/* Wrap around ? */
--
2.21.0
^ permalink raw reply related
* Re: [PATCH] HID: hyperv: Add the support of hibernation
From: Sasha Levin @ 2019-09-27 12:05 UTC (permalink / raw)
To: Dexuan Cui
Cc: Jiri Kosina, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
benjamin.tissoires@redhat.com, linux-hyperv@vger.kernel.org,
linux-input@vger.kernel.org, linux-kernel@vger.kernel.org,
Michael Kelley
In-Reply-To: <PU1P153MB016973F30CC1A52E46D15230BF810@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM>
On Fri, Sep 27, 2019 at 05:42:31AM +0000, Dexuan Cui wrote:
>> From: Jiri Kosina <jikos@kernel.org>
>> Sent: Thursday, September 26, 2019 6:23 AM
>> To: Dexuan Cui <decui@microsoft.com>
>>
>> On Thu, 26 Sep 2019, Jiri Kosina wrote:
>>
>> > > > This patch is basically a pure Hyper-V specific change and it has a
>> > > > build dependency on the commit 271b2224d42f ("Drivers: hv: vmbus:
>> > > > Implement
>> > > > suspend/resume for VSC drivers for hibernation"), which is on Sasha
>> Levin's
>> > > > Hyper-V tree's hyperv-next branch [ ... snipped ...]
>> > > >
>> > > > I request this patch should go through Sasha's tree rather than the
>> > > > input subsystem's tree.
>> > > >
>> > > > Hi Jiri, Benjamin, can you please Ack?
>> > >
>> > > Hi Jiri, Benjamin,
>> > > Can you please take a look at the patch?
>> >
>> > Hi Dexuan,
>> >
>> > I am planning to process it once 5.4 merge window is over and thus hid.git
>> > is open again for 5.5 material.
>>
>> Ah, now I see you asked for this go through hyperv tree. For that, feel
>> free to add
>> Acked-by: Jiri Kosina <jkosina@suse.cz>
>> Jiri Kosina
>
>Thanks for the Ack, Jiri!
>
>I have a bunch of patches, including this one, to support Linux VM's hibernation
>when the VM runs on Hyper-V. I just feel it would be better for all of them to
>go through the Hyper-V tree. :-)
Thank Dexuan, Jiri,
Dexuan, I've been silently ignoring your patches for the past few weeks
for the same reason as Jiri has mentioned. I'll pick them all up once
the 5.4 merge window closes in a few days.
--
Thanks,
Sasha
^ permalink raw reply
* Re: [PATCH v3 02/26] PCI: hv: Use PCI_STD_NUM_BARS
From: Bjorn Helgaas @ 2019-09-27 12:43 UTC (permalink / raw)
To: Denis Efremov
Cc: linux-kernel, linux-pci, Andrew Murray, linux-hyperv,
K. Y. Srinivasan, Haiyang Zhang, Stephen Hemminger, Sasha Levin
In-Reply-To: <20190926220531.GA200826@google.com>
On Thu, Sep 26, 2019 at 05:05:31PM -0500, Bjorn Helgaas wrote:
> On Mon, Sep 16, 2019 at 11:41:34PM +0300, Denis Efremov wrote:
> > Replace the magic constant (6) with define PCI_STD_NUM_BARS representing
> > the number of PCI BARs.
>
> For some reason patches 0 and 1 didn't make it to the list. Can you
> resend them?
(No need to resend the whole series, which might annoy all the other
maintainers. Just send 0 (the cover letter) and 1 (which I assume
adds the PCI_STD_NUM_BARS definition)).
^ permalink raw reply
* [PATCH RESEND v3 00/26] Add definition for the number of standard PCI BARs
From: Denis Efremov @ 2019-09-27 23:40 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: Denis Efremov, linux-kernel, linux-pci, linux-hyperv, x86,
linux-s390, linux-alpha, linux-ia64, linux-arm-kernel, netdev,
linux-fbdev, kvm, linux-scsi, linux-ide, linux-usb, devel,
linux-serial, linux-mmc
In-Reply-To: <20190916204158.6889-3-efremov@linux.com>
Code that iterates over all standard PCI BARs typically uses
PCI_STD_RESOURCE_END, but this is error-prone because it requires
"i <= PCI_STD_RESOURCE_END" rather than something like
"i < PCI_STD_NUM_BARS". We could add such a definition and use it the same
way PCI_SRIOV_NUM_BARS is used. The patchset also replaces constant (6)
with new define PCI_STD_NUM_BARS where appropriate and removes local
declarations for the number of PCI BARs.
Changes in v3:
- Updated commits description.
- Refactored "< PCI_ROM_RESOURCE" with "< PCI_STD_NUM_BARS" in loops.
- Refactored "<= BAR_5" with "< PCI_STD_NUM_BARS" in loops.
- Removed local define GASKET_NUM_BARS.
- Removed local define PCI_NUM_BAR_RESOURCES.
Changes in v2:
- Reversed checks in pci_iomap_range,pci_iomap_wc_range.
- Refactored loops in vfio_pci to keep PCI_STD_RESOURCES.
- Added 2 new patches to replace the magic constant with new define.
- Splitted net patch in v1 to separate stmmac and dwc-xlgmac patches.
Denis Efremov (26):
PCI: Add define for the number of standard PCI BARs
PCI: hv: Use PCI_STD_NUM_BARS
PCI: dwc: Use PCI_STD_NUM_BARS
PCI: endpoint: Use PCI_STD_NUM_BARS
misc: pci_endpoint_test: Use PCI_STD_NUM_BARS
s390/pci: Use PCI_STD_NUM_BARS
x86/PCI: Loop using PCI_STD_NUM_BARS
alpha/PCI: Use PCI_STD_NUM_BARS
ia64: Use PCI_STD_NUM_BARS
stmmac: pci: Loop using PCI_STD_NUM_BARS
net: dwc-xlgmac: Loop using PCI_STD_NUM_BARS
ixgb: use PCI_STD_NUM_BARS
e1000: Use PCI_STD_NUM_BARS
rapidio/tsi721: Loop using PCI_STD_NUM_BARS
efifb: Loop using PCI_STD_NUM_BARS
fbmem: use PCI_STD_NUM_BARS
vfio_pci: Loop using PCI_STD_NUM_BARS
scsi: pm80xx: Use PCI_STD_NUM_BARS
ata: sata_nv: Use PCI_STD_NUM_BARS
staging: gasket: Use PCI_STD_NUM_BARS
serial: 8250_pci: Use PCI_STD_NUM_BARS
pata_atp867x: Use PCI_STD_NUM_BARS
memstick: use PCI_STD_NUM_BARS
USB: core: Use PCI_STD_NUM_BARS
usb: pci-quirks: Use PCI_STD_NUM_BARS
devres: use PCI_STD_NUM_BARS
arch/alpha/kernel/pci-sysfs.c | 8 ++---
arch/ia64/sn/pci/pcibr/pcibr_dma.c | 4 +--
arch/s390/include/asm/pci.h | 5 +--
arch/s390/include/asm/pci_clp.h | 6 ++--
arch/s390/pci/pci.c | 16 +++++-----
arch/s390/pci/pci_clp.c | 6 ++--
arch/x86/pci/common.c | 2 +-
arch/x86/pci/intel_mid_pci.c | 2 +-
drivers/ata/pata_atp867x.c | 2 +-
drivers/ata/sata_nv.c | 2 +-
drivers/memstick/host/jmb38x_ms.c | 2 +-
drivers/misc/pci_endpoint_test.c | 8 ++---
drivers/net/ethernet/intel/e1000/e1000.h | 1 -
drivers/net/ethernet/intel/e1000/e1000_main.c | 2 +-
drivers/net/ethernet/intel/ixgb/ixgb.h | 1 -
drivers/net/ethernet/intel/ixgb/ixgb_main.c | 2 +-
.../net/ethernet/stmicro/stmmac/stmmac_pci.c | 4 +--
.../net/ethernet/synopsys/dwc-xlgmac-pci.c | 2 +-
drivers/pci/controller/dwc/pci-dra7xx.c | 2 +-
.../pci/controller/dwc/pci-layerscape-ep.c | 2 +-
drivers/pci/controller/dwc/pcie-artpec6.c | 2 +-
.../pci/controller/dwc/pcie-designware-plat.c | 2 +-
drivers/pci/controller/dwc/pcie-designware.h | 2 +-
drivers/pci/controller/pci-hyperv.c | 10 +++---
drivers/pci/endpoint/functions/pci-epf-test.c | 10 +++---
drivers/pci/pci-sysfs.c | 4 +--
drivers/pci/pci.c | 13 ++++----
drivers/pci/proc.c | 4 +--
drivers/pci/quirks.c | 4 +--
drivers/rapidio/devices/tsi721.c | 2 +-
drivers/scsi/pm8001/pm8001_hwi.c | 2 +-
drivers/scsi/pm8001/pm8001_init.c | 2 +-
drivers/staging/gasket/gasket_constants.h | 3 --
drivers/staging/gasket/gasket_core.c | 12 +++----
drivers/staging/gasket/gasket_core.h | 4 +--
drivers/tty/serial/8250/8250_pci.c | 8 ++---
drivers/usb/core/hcd-pci.c | 2 +-
drivers/usb/host/pci-quirks.c | 2 +-
drivers/vfio/pci/vfio_pci.c | 11 ++++---
drivers/vfio/pci/vfio_pci_config.c | 32 ++++++++++---------
drivers/vfio/pci/vfio_pci_private.h | 4 +--
drivers/video/fbdev/core/fbmem.c | 4 +--
drivers/video/fbdev/efifb.c | 2 +-
include/linux/pci-epc.h | 2 +-
include/linux/pci.h | 2 +-
include/uapi/linux/pci_regs.h | 1 +
lib/devres.c | 2 +-
47 files changed, 112 insertions(+), 115 deletions(-)
--
2.21.0
^ permalink raw reply
* [PATCH RESEND v3 01/26] PCI: Add define for the number of standard PCI BARs
From: Denis Efremov @ 2019-09-27 23:43 UTC (permalink / raw)
To: Bjorn Helgaas
Cc: Denis Efremov, linux-kernel, linux-pci, linux-hyperv, x86,
linux-s390, linux-alpha, linux-ia64, linux-arm-kernel
In-Reply-To: <20190916204158.6889-3-efremov@linux.com>
Code that iterates over all standard PCI BARs typically uses
PCI_STD_RESOURCE_END. However, it requires the "unusual" loop condition
"i <= PCI_STD_RESOURCE_END" rather than something more standard like
"i < PCI_STD_NUM_BARS".
This patch adds the definition PCI_STD_NUM_BARS which is equivalent to
"PCI_STD_RESOURCE_END + 1". To iterate through all possible BARs, loop
conditions changed to the *number* of BARs "i < PCI_STD_NUM_BARS",
instead of the index of the last valid BAR "i <= PCI_STD_RESOURCE_END"
or PCI_ROM_RESOURCE. The magic constant (6) is also replaced with new
define PCI_STD_NUM_BARS.
Signed-off-by: Denis Efremov <efremov@linux.com>
---
drivers/pci/pci-sysfs.c | 4 ++--
drivers/pci/pci.c | 13 +++++++------
drivers/pci/proc.c | 4 ++--
drivers/pci/quirks.c | 4 ++--
include/linux/pci.h | 2 +-
include/uapi/linux/pci_regs.h | 1 +
6 files changed, 15 insertions(+), 13 deletions(-)
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 965c72104150..3e26b8e03bd5 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1257,7 +1257,7 @@ static void pci_remove_resource_files(struct pci_dev *pdev)
{
int i;
- for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
struct bin_attribute *res_attr;
res_attr = pdev->res_attr[i];
@@ -1328,7 +1328,7 @@ static int pci_create_resource_files(struct pci_dev *pdev)
int retval;
/* Expose the PCI resources from this device as files */
- for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
/* skip empty resources */
if (!pci_resource_len(pdev, i))
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 1b27b5af3d55..7d543986026b 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -674,7 +674,7 @@ struct resource *pci_find_resource(struct pci_dev *dev, struct resource *res)
{
int i;
- for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
struct resource *r = &dev->resource[i];
if (r->start && resource_contains(r, res))
@@ -3768,7 +3768,7 @@ void pci_release_selected_regions(struct pci_dev *pdev, int bars)
{
int i;
- for (i = 0; i < 6; i++)
+ for (i = 0; i < PCI_STD_NUM_BARS; i++)
if (bars & (1 << i))
pci_release_region(pdev, i);
}
@@ -3779,7 +3779,7 @@ static int __pci_request_selected_regions(struct pci_dev *pdev, int bars,
{
int i;
- for (i = 0; i < 6; i++)
+ for (i = 0; i < PCI_STD_NUM_BARS; i++)
if (bars & (1 << i))
if (__pci_request_region(pdev, i, res_name, excl))
goto err_out;
@@ -3827,7 +3827,7 @@ EXPORT_SYMBOL(pci_request_selected_regions_exclusive);
void pci_release_regions(struct pci_dev *pdev)
{
- pci_release_selected_regions(pdev, (1 << 6) - 1);
+ pci_release_selected_regions(pdev, (1 << PCI_STD_NUM_BARS) - 1);
}
EXPORT_SYMBOL(pci_release_regions);
@@ -3846,7 +3846,8 @@ EXPORT_SYMBOL(pci_release_regions);
*/
int pci_request_regions(struct pci_dev *pdev, const char *res_name)
{
- return pci_request_selected_regions(pdev, ((1 << 6) - 1), res_name);
+ return pci_request_selected_regions(pdev,
+ ((1 << PCI_STD_NUM_BARS) - 1), res_name);
}
EXPORT_SYMBOL(pci_request_regions);
@@ -3868,7 +3869,7 @@ EXPORT_SYMBOL(pci_request_regions);
int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name)
{
return pci_request_selected_regions_exclusive(pdev,
- ((1 << 6) - 1), res_name);
+ ((1 << PCI_STD_NUM_BARS) - 1), res_name);
}
EXPORT_SYMBOL(pci_request_regions_exclusive);
diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c
index fe7fe678965b..cb61ec2c24e8 100644
--- a/drivers/pci/proc.c
+++ b/drivers/pci/proc.c
@@ -248,13 +248,13 @@ static int proc_bus_pci_mmap(struct file *file, struct vm_area_struct *vma)
}
/* Make sure the caller is mapping a real resource for this device */
- for (i = 0; i < PCI_ROM_RESOURCE; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
if (dev->resource[i].flags & res_bit &&
pci_mmap_fits(dev, i, vma, PCI_MMAP_PROCFS))
break;
}
- if (i >= PCI_ROM_RESOURCE)
+ if (i >= PCI_STD_NUM_BARS)
return -ENODEV;
if (fpriv->mmap_state == pci_mmap_mem &&
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 44c4ae1abd00..998454b0ae8d 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -475,7 +475,7 @@ static void quirk_extend_bar_to_page(struct pci_dev *dev)
{
int i;
- for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
+ for (i = 0; i < PCI_STD_NUM_BARS; i++) {
struct resource *r = &dev->resource[i];
if (r->flags & IORESOURCE_MEM && resource_size(r) < PAGE_SIZE) {
@@ -1810,7 +1810,7 @@ static void quirk_alder_ioapic(struct pci_dev *pdev)
* The next five BARs all seem to be rubbish, so just clean
* them out.
*/
- for (i = 1; i < 6; i++)
+ for (i = 1; i < PCI_STD_NUM_BARS; i++)
memset(&pdev->resource[i], 0, sizeof(pdev->resource[i]));
}
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_EESSC, quirk_alder_ioapic);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 82e4cd1b7ac3..cf7d16305243 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -76,7 +76,7 @@ enum pci_mmap_state {
enum {
/* #0-5: standard PCI resources */
PCI_STD_RESOURCES,
- PCI_STD_RESOURCE_END = 5,
+ PCI_STD_RESOURCE_END = PCI_STD_RESOURCES + PCI_STD_NUM_BARS - 1,
/* #6: expansion ROM resource */
PCI_ROM_RESOURCE,
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index f28e562d7ca8..68b571d491eb 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -34,6 +34,7 @@
* of which the first 64 bytes are standardized as follows:
*/
#define PCI_STD_HEADER_SIZEOF 64
+#define PCI_STD_NUM_BARS 6 /* Number of standard BARs */
#define PCI_VENDOR_ID 0x00 /* 16 bits */
#define PCI_DEVICE_ID 0x02 /* 16 bits */
#define PCI_COMMAND 0x04 /* 16 bits */
--
2.21.0
^ permalink raw reply related
* Re: [PATCH] Input: hyperv-keyboard: Add the support of hibernation
From: dmitry.torokhov @ 2019-09-28 0:31 UTC (permalink / raw)
To: Dexuan Cui
Cc: KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
sashal@kernel.org, linux-hyperv@vger.kernel.org,
linux-input@vger.kernel.org, linux-kernel@vger.kernel.org,
Michael Kelley
In-Reply-To: <PU1P153MB016914A7C827CA35D7FEB66ABF8B0@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM>
On Sat, Sep 21, 2019 at 06:56:04AM +0000, Dexuan Cui wrote:
> > From: dmitry.torokhov@gmail.com <dmitry.torokhov@gmail.com>
> > Sent: Thursday, September 19, 2019 9:18 AM
> >
> > Hi Dexuan,
> >
> > On Wed, Sep 11, 2019 at 11:36:20PM +0000, Dexuan Cui wrote:
> > > We need hv_kbd_pm_notify() to make sure the pm_wakeup_hard_event()
> > call
> > > does not prevent the system from entering hibernation: the hibernation
> > > is a relatively long process, which can be aborted by the call
> > > pm_wakeup_hard_event(), which is invoked upon keyboard events.
> > >
> > > diff --git a/drivers/input/serio/hyperv-keyboard.c
> > b/drivers/input/serio/hyperv-keyboard.c
> > > index 88ae7c2..277dc4c 100644
> > > --- a/drivers/input/serio/hyperv-keyboard.c
> > > +++ b/drivers/input/serio/hyperv-keyboard.c
> > > @@ -10,6 +10,7 @@
> > > #include <linux/hyperv.h>
> > > #include <linux/serio.h>
> > > #include <linux/slab.h>
> > > +#include <linux/suspend.h>
> > >
> > > /*
> > > * Current version 1.0
> > > @@ -95,6 +96,9 @@ struct hv_kbd_dev {
> > > struct completion wait_event;
> > > spinlock_t lock; /* protects 'started' field */
> > > bool started;
> > > +
> > > + struct notifier_block pm_nb;
> > > + bool hibernation_in_progress;
> >
> > Why do you use notifier block instead of exposing proper PM methods if
> > you want to support hibernation?
> >
> > Dmitry
>
> Hi,
> In the patch I do implement hv_kbd_suspend() and hv_kbd_resume(), and
> add them into the hv_kbd_drv struct:
>
> @@ -416,6 +472,8 @@ static struct hv_driver hv_kbd_drv = {
> .id_table = id_table,
> .probe = hv_kbd_probe,
> .remove = hv_kbd_remove,
> + .suspend = hv_kbd_suspend,
> + .resume = hv_kbd_resume,
>
> The .suspend and .resume callbacks are inroduced by another patch (which
> uses the dev_pm_ops struct):
> 271b2224d42f ("Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation")
> (which is on the Hyper-V tree's hyperv-next branch:
> https://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git/commit/?h=hyperv-next&id=271b2224d42f88870e6b060924ee374871c131fc )
>
> The only purpose of the notifier is to set the variable
> kbd_dev->hibernation_in_progress to true during the hibernation process.
>
> As I explained in the changelog, the hibernation is a long process (which
> can take 10+ seconds), during which the user may unintentionally touch
> the keyboard, causing key up/down events, which are still handled by
> hv_kbd_on_receive(), which calls pm_wakeup_hard_event(), which
> calls some other functions which increase the global counter
> "pm_abort_suspend", and hence pm_wakeup_pending() becomes true.
>
> pm_wakeup_pending() is tested in a lot of places in the suspend
> process and eventually an unintentional keystroke (or mouse movement,
> when it comes to the Hyper-V mouse driver drivers/hid/hid-hyperv.c)
> causes the whole hibernation process to be aborted. Usually this
> behavior is not expected by the user, I think.
Why not? If a device is configured as wakeup source, then it activity
should wake up the system, unless you disable it.
>
> So, I use the notifier to set the flag variable and with it the driver can
> know when it should not call pm_wakeup_hard_event().
No, please implement hibernation support properly, as notifier + flag is
a hack. In this particular case you do not want to have your
hv_kbd_resume() to be called in place of pm_ops->thaw() as that is what
reenables the keyboard vmbus channel and causes the undesired wakeup
events. Your vmbus implementation should allow individual drivers to
control the set of PM operations that they wish to use, instead of
forcing everything through suspend/resume.
Thanks.
--
Dmitry
^ permalink raw reply
* [PATCH AUTOSEL 5.2 10/42] PCI: pci-hyperv: Fix build errors on non-SYSFS config
From: Sasha Levin @ 2019-09-29 17:32 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Randy Dunlap, Lorenzo Pieralisi, Haiyang Zhang, Matthew Wilcox,
Jake Oshins, K. Y. Srinivasan, Stephen Hemminger,
Stephen Hemminger, Sasha Levin, Bjorn Helgaas, linux-pci,
linux-hyperv, Dexuan Cui
In-Reply-To: <20190929173244.8918-1-sashal@kernel.org>
From: Randy Dunlap <rdunlap@infradead.org>
[ Upstream commit f58ba5e3f6863ea4486952698898848a6db726c2 ]
Fix build errors when building almost-allmodconfig but with SYSFS
not set (not enabled). Fixes these build errors:
ERROR: "pci_destroy_slot" [drivers/pci/controller/pci-hyperv.ko] undefined!
ERROR: "pci_create_slot" [drivers/pci/controller/pci-hyperv.ko] undefined!
drivers/pci/slot.o is only built when SYSFS is enabled, so
pci-hyperv.o has an implicit dependency on SYSFS.
Make that explicit.
Also, depending on X86 && X86_64 is not needed, so just change that
to depend on X86_64.
Fixes: a15f2c08c708 ("PCI: hv: support reporting serial number as slot information")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: linux-pci@vger.kernel.org
Cc: linux-hyperv@vger.kernel.org
Cc: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/pci/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab92409210af..297bf928d6522 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -181,7 +181,7 @@ config PCI_LABEL
config PCI_HYPERV
tristate "Hyper-V PCI Frontend"
- depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ depends on X86_64 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && SYSFS
help
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
--
2.20.1
^ permalink raw reply related
* [PATCH AUTOSEL 5.3 11/49] PCI: pci-hyperv: Fix build errors on non-SYSFS config
From: Sasha Levin @ 2019-09-29 17:30 UTC (permalink / raw)
To: linux-kernel, stable
Cc: Randy Dunlap, Lorenzo Pieralisi, Haiyang Zhang, Matthew Wilcox,
Jake Oshins, K. Y. Srinivasan, Stephen Hemminger,
Stephen Hemminger, Sasha Levin, Bjorn Helgaas, linux-pci,
linux-hyperv, Dexuan Cui
In-Reply-To: <20190929173053.8400-1-sashal@kernel.org>
From: Randy Dunlap <rdunlap@infradead.org>
[ Upstream commit f58ba5e3f6863ea4486952698898848a6db726c2 ]
Fix build errors when building almost-allmodconfig but with SYSFS
not set (not enabled). Fixes these build errors:
ERROR: "pci_destroy_slot" [drivers/pci/controller/pci-hyperv.ko] undefined!
ERROR: "pci_create_slot" [drivers/pci/controller/pci-hyperv.ko] undefined!
drivers/pci/slot.o is only built when SYSFS is enabled, so
pci-hyperv.o has an implicit dependency on SYSFS.
Make that explicit.
Also, depending on X86 && X86_64 is not needed, so just change that
to depend on X86_64.
Fixes: a15f2c08c708 ("PCI: hv: support reporting serial number as slot information")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Reviewed-by: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Jake Oshins <jakeo@microsoft.com>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Cc: Sasha Levin <sashal@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: linux-pci@vger.kernel.org
Cc: linux-hyperv@vger.kernel.org
Cc: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
drivers/pci/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 2ab92409210af..297bf928d6522 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -181,7 +181,7 @@ config PCI_LABEL
config PCI_HYPERV
tristate "Hyper-V PCI Frontend"
- depends on X86 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && X86_64
+ depends on X86_64 && HYPERV && PCI_MSI && PCI_MSI_IRQ_DOMAIN && SYSFS
help
The PCI device frontend driver allows the kernel to import arbitrary
PCI devices from a PCI backend to support PCI driver domains.
--
2.20.1
^ permalink raw reply related
* RE: [PATCH] HID: hyperv: Add the support of hibernation
From: Dexuan Cui @ 2019-09-29 17:44 UTC (permalink / raw)
To: Sasha Levin
Cc: Jiri Kosina, KY Srinivasan, Haiyang Zhang, Stephen Hemminger,
benjamin.tissoires@redhat.com, linux-hyperv@vger.kernel.org,
linux-input@vger.kernel.org, linux-kernel@vger.kernel.org,
Michael Kelley
In-Reply-To: <20190927120513.GM8171@sasha-vm>
> From: Sasha Levin <sashal@kernel.org>
> Sent: Friday, September 27, 2019 5:05 AM
> To: Dexuan Cui <decui@microsoft.com>
> Cc: Jiri Kosina <jikos@kernel.org>; KY Srinivasan <kys@microsoft.com>;
> Haiyang Zhang <haiyangz@microsoft.com>; Stephen Hemminger
> <sthemmin@microsoft.com>; benjamin.tissoires@redhat.com;
> linux-hyperv@vger.kernel.org; linux-input@vger.kernel.org;
> linux-kernel@vger.kernel.org; Michael Kelley <mikelley@microsoft.com>
> Subject: Re: [PATCH] HID: hyperv: Add the support of hibernation
>
> On Fri, Sep 27, 2019 at 05:42:31AM +0000, Dexuan Cui wrote:
> >> From: Jiri Kosina <jikos@kernel.org>
> >> Sent: Thursday, September 26, 2019 6:23 AM
> >> To: Dexuan Cui <decui@microsoft.com>
> >>
> >> On Thu, 26 Sep 2019, Jiri Kosina wrote:
> >>
> >> > > > This patch is basically a pure Hyper-V specific change and it has a
> >> > > > build dependency on the commit 271b2224d42f ("Drivers: hv: vmbus:
> >> > > > Implement
> >> > > > suspend/resume for VSC drivers for hibernation"), which is on Sasha
> >> Levin's
> >> > > > Hyper-V tree's hyperv-next branch [ ... snipped ...]
> >> > > >
> >> > > > I request this patch should go through Sasha's tree rather than the
> >> > > > input subsystem's tree.
> >> > > >
> >> > > > Hi Jiri, Benjamin, can you please Ack?
> >> > >
> >> > > Hi Jiri, Benjamin,
> >> > > Can you please take a look at the patch?
> >> >
> >> > Hi Dexuan,
> >> >
> >> > I am planning to process it once 5.4 merge window is over and thus hid.git
> >> > is open again for 5.5 material.
> >>
> >> Ah, now I see you asked for this go through hyperv tree. For that, feel
> >> free to add
> >> Acked-by: Jiri Kosina <jkosina@suse.cz>
> >> Jiri Kosina
> >
> >Thanks for the Ack, Jiri!
> >
> >I have a bunch of patches, including this one, to support Linux VM's
> hibernation
> >when the VM runs on Hyper-V. I just feel it would be better for all of them to
> >go through the Hyper-V tree. :-)
>
> Thank Dexuan, Jiri,
>
> Dexuan, I've been silently ignoring your patches for the past few weeks
> for the same reason as Jiri has mentioned. I'll pick them all up once
> the 5.4 merge window closes in a few days.
>
> Thanks,
> Sasha
Thanks, Sasha!
BTW, I'll post a v2 for this patch, as IMO I may be able to get rid of the
mousevsc_pm_notify in this patch by disabling the channel callback
in the suspend function.
Thanks,
-- Dexuan
^ permalink raw reply
* Re: [PATCH net v2] vsock: Fix a lockdep warning in __vsock_release()
From: Stefano Garzarella @ 2019-09-30 13:51 UTC (permalink / raw)
To: Dexuan Cui
Cc: davem@davemloft.net, KY Srinivasan, Haiyang Zhang,
Stephen Hemminger, sashal@kernel.org, stefanha@redhat.com,
gregkh@linuxfoundation.org, arnd@arndb.de, deepa.kernel@gmail.com,
ytht.net@gmail.com, tglx@linutronix.de, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-hyperv@vger.kernel.org,
kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
Michael Kelley, jhansen@vmware.com
In-Reply-To: <PU1P153MB01698C46C9348B9762D5E122BF810@PU1P153MB0169.APCP153.PROD.OUTLOOK.COM>
On Fri, Sep 27, 2019 at 05:37:20AM +0000, Dexuan Cui wrote:
> > From: linux-hyperv-owner@vger.kernel.org
> > <linux-hyperv-owner@vger.kernel.org> On Behalf Of Stefano Garzarella
> > Sent: Thursday, September 26, 2019 12:48 AM
> >
> > Hi Dexuan,
> >
> > On Thu, Sep 26, 2019 at 01:11:27AM +0000, Dexuan Cui wrote:
> > > ...
> > > NOTE: I only tested the code on Hyper-V. I can not test the code for
> > > virtio socket, as I don't have a KVM host. :-( Sorry.
> > >
> > > @Stefan, @Stefano: please review & test the patch for virtio socket,
> > > and let me know if the patch breaks anything. Thanks!
> >
> > Comment below, I'll test it ASAP!
>
> Stefano, Thank you!
>
> BTW, this is how I tested the patch:
> 1. write a socket server program in the guest. The program calls listen()
> and then calls sleep(10000 seconds). Note: accept() is not called.
>
> 2. create some connections to the server program in the guest.
>
> 3. kill the server program by Ctrl+C, and "dmesg" will show the scary
> call-trace, if the kernel is built with
> CONFIG_LOCKDEP=y
> CONFIG_LOCKDEP_SUPPORT=y
>
> 4. Apply the patch, do the same test and we should no longer see the call-trace.
>
Hi Dexuan,
I tested on virtio socket and it works as expected!
With your patch applied I don't have issues and call-trace. Without
the patch I have a very similar call-trace (as expected):
============================================
WARNING: possible recursive locking detected
5.3.0-vsock #17 Not tainted
--------------------------------------------
python3/872 is trying to acquire lock:
ffff88802b650110 (sk_lock-AF_VSOCK){+.+.}, at: virtio_transport_release+0x34/0x330 [vmw_vsock_virtio_transport_common]
but task is already holding lock:
ffff88803597ce10 (sk_lock-AF_VSOCK){+.+.}, at: __vsock_release+0x3f/0x130 [vsock]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(sk_lock-AF_VSOCK);
lock(sk_lock-AF_VSOCK);
*** DEADLOCK ***
May be due to missing lock nesting notation
2 locks held by python3/872:
#0: ffff88802c957180 (&sb->s_type->i_mutex_key#8){+.+.}, at: __sock_release+0x2d/0xb0
#1: ffff88803597ce10 (sk_lock-AF_VSOCK){+.+.}, at: __vsock_release+0x3f/0x130 [vsock]
stack backtrace:
CPU: 0 PID: 872 Comm: python3 Not tainted 5.3.0-vsock #17
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-2.fc30 04/01/2014
Call Trace:
dump_stack+0x85/0xc0
__lock_acquire.cold+0xad/0x22b
lock_acquire+0xc4/0x1a0
? virtio_transport_release+0x34/0x330 [vmw_vsock_virtio_transport_common]
lock_sock_nested+0x5d/0x80
? virtio_transport_release+0x34/0x330 [vmw_vsock_virtio_transport_common]
virtio_transport_release+0x34/0x330 [vmw_vsock_virtio_transport_common]
? mark_held_locks+0x49/0x70
? _raw_spin_unlock_irqrestore+0x44/0x60
__vsock_release+0x2d/0x130 [vsock]
__vsock_release+0xb9/0x130 [vsock]
vsock_release+0x12/0x30 [vsock]
__sock_release+0x3d/0xb0
sock_close+0x14/0x20
__fput+0xc1/0x250
task_work_run+0x93/0xb0
exit_to_usermode_loop+0xd3/0xe0
syscall_return_slowpath+0x205/0x310
entry_SYSCALL_64_after_hwframe+0x49/0xbe
Feel free to add:
Tested-by: Stefano Garzarella <sgarzare@redhat.com>
^ permalink raw reply
* [PATCH] x86/hyperv: make vapic support x2apic mode
From: Roman Kagan @ 2019-09-30 17:33 UTC (permalink / raw)
To: Michael Kelley, Lan Tianyu, Joerg Roedel, K. Y. Srinivasan,
Haiyang Zhang, Stephen Hemminger, Sasha Levin, Thomas Gleixner,
Ingo Molnar, Borislav Petkov, H. Peter Anvin, x86@kernel.org,
linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org
Cc: kvm@vger.kernel.org
Now that there's Hyper-V IOMMU driver, Linux can switch to x2apic mode
when supported by the vcpus.
However, the apic access functions for Hyper-V enlightened apic assume
xapic mode only.
As a result, Linux fails to bring up secondary cpus when run as a guest
in QEMU/KVM with both hv_apic and x2apic enabled.
I didn't manage to make my instance of Hyper-V expose x2apic to the
guest; nor does Hyper-V spec document the expected behavior. However,
a Windows guest running in QEMU/KVM with hv_apic and x2apic and a big
number of vcpus (so that it turns on x2apic mode) does use enlightened
apic MSRs passing unshifted 32bit destination id and falls back to the
regular x2apic MSRs for less frequently used apic fields.
So implement the same behavior, by replacing enlightened apic access
functions (only those where it makes a difference) with their
x2apic-aware versions when x2apic is in use.
Fixes: 29217a474683 ("iommu/hyper-v: Add Hyper-V stub IOMMU driver")
Fixes: 6b48cb5f8347 ("X86/Hyper-V: Enlighten APIC access")
Cc: stable@vger.kernel.org
Signed-off-by: Roman Kagan <rkagan@virtuozzo.com>
---
arch/x86/hyperv/hv_apic.c | 48 ++++++++++++++++++++++++++++++++++++---
1 file changed, 45 insertions(+), 3 deletions(-)
diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c
index 5c056b8aebef..9564fec00375 100644
--- a/arch/x86/hyperv/hv_apic.c
+++ b/arch/x86/hyperv/hv_apic.c
@@ -53,6 +53,11 @@ static void hv_apic_icr_write(u32 low, u32 id)
wrmsrl(HV_X64_MSR_ICR, reg_val);
}
+static void hv_x2apic_icr_write(u32 low, u32 id)
+{
+ wrmsr(HV_X64_MSR_ICR, low, id);
+}
+
static u32 hv_apic_read(u32 reg)
{
u32 reg_val, hi;
@@ -70,6 +75,23 @@ static u32 hv_apic_read(u32 reg)
}
}
+static u32 hv_x2apic_read(u32 reg)
+{
+ u32 reg_val, hi;
+
+ switch (reg) {
+ case APIC_EOI:
+ rdmsr(HV_X64_MSR_EOI, reg_val, hi);
+ return reg_val;
+ case APIC_TASKPRI:
+ rdmsr(HV_X64_MSR_TPR, reg_val, hi);
+ return reg_val;
+
+ default:
+ return native_apic_msr_read(reg);
+ }
+}
+
static void hv_apic_write(u32 reg, u32 val)
{
switch (reg) {
@@ -84,6 +106,20 @@ static void hv_apic_write(u32 reg, u32 val)
}
}
+static void hv_x2apic_write(u32 reg, u32 val)
+{
+ switch (reg) {
+ case APIC_EOI:
+ wrmsr(HV_X64_MSR_EOI, val, 0);
+ break;
+ case APIC_TASKPRI:
+ wrmsr(HV_X64_MSR_TPR, val, 0);
+ break;
+ default:
+ native_apic_msr_write(reg, val);
+ }
+}
+
static void hv_apic_eoi_write(u32 reg, u32 val)
{
struct hv_vp_assist_page *hvp = hv_vp_assist_page[smp_processor_id()];
@@ -262,9 +298,15 @@ void __init hv_apic_init(void)
if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) {
pr_info("Hyper-V: Using MSR based APIC access\n");
apic_set_eoi_write(hv_apic_eoi_write);
- apic->read = hv_apic_read;
- apic->write = hv_apic_write;
- apic->icr_write = hv_apic_icr_write;
+ if (x2apic_enabled()) {
+ apic->read = hv_x2apic_read;
+ apic->write = hv_x2apic_write;
+ apic->icr_write = hv_x2apic_icr_write;
+ } else {
+ apic->read = hv_apic_read;
+ apic->write = hv_apic_write;
+ apic->icr_write = hv_apic_icr_write;
+ }
apic->icr_read = hv_apic_icr_read;
}
}
--
2.21.0
^ permalink raw reply related
* RE: [PATCH net v2] vsock: Fix a lockdep warning in __vsock_release()
From: Dexuan Cui @ 2019-09-30 18:33 UTC (permalink / raw)
To: Stefano Garzarella
Cc: davem@davemloft.net, KY Srinivasan, Haiyang Zhang,
Stephen Hemminger, sashal@kernel.org, stefanha@redhat.com,
gregkh@linuxfoundation.org, arnd@arndb.de, deepa.kernel@gmail.com,
ytht.net@gmail.com, tglx@linutronix.de, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-hyperv@vger.kernel.org,
kvm@vger.kernel.org, virtualization@lists.linux-foundation.org,
Michael Kelley, jhansen@vmware.com
In-Reply-To: <20190930135125.prztj336splp74wq@steredhat>
> From: Stefano Garzarella <sgarzare@redhat.com>
> Sent: Monday, September 30, 2019 6:51 AM
> ...
> Feel free to add:
>
> Tested-by: Stefano Garzarella <sgarzare@redhat.com>
Thanks, Stefano!
I'll post a v3 with your suggestion "lock_sock_nested(sk, level);".
It does look better than v2 to me. :-)
Thanks,
-- Dexuan
^ permalink raw reply
* [PATCH net v3] vsock: Fix a lockdep warning in __vsock_release()
From: Dexuan Cui @ 2019-09-30 18:43 UTC (permalink / raw)
To: davem@davemloft.net, KY Srinivasan, Haiyang Zhang,
Stephen Hemminger, sashal@kernel.org, stefanha@redhat.com,
gregkh@linuxfoundation.org, arnd@arndb.de, deepa.kernel@gmail.com,
Dexuan Cui, ytht.net@gmail.com, tglx@linutronix.de,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
linux-hyperv@vger.kernel.org, kvm@vger.kernel.org,
virtualization@lists.linux-foundation.org, Michael Kelley,
sgarzare@redhat.com, jhansen@vmware.com
Lockdep is unhappy if two locks from the same class are held.
Fix the below warning for hyperv and virtio sockets (vmci socket code
doesn't have the issue) by using lock_sock_nested() when __vsock_release()
is called recursively:
============================================
WARNING: possible recursive locking detected
5.3.0+ #1 Not tainted
--------------------------------------------
server/1795 is trying to acquire lock:
ffff8880c5158990 (sk_lock-AF_VSOCK){+.+.}, at: hvs_release+0x10/0x120 [hv_sock]
but task is already holding lock:
ffff8880c5158150 (sk_lock-AF_VSOCK){+.+.}, at: __vsock_release+0x2e/0xf0 [vsock]
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(sk_lock-AF_VSOCK);
lock(sk_lock-AF_VSOCK);
*** DEADLOCK ***
May be due to missing lock nesting notation
2 locks held by server/1795:
#0: ffff8880c5d05ff8 (&sb->s_type->i_mutex_key#10){+.+.}, at: __sock_release+0x2d/0xa0
#1: ffff8880c5158150 (sk_lock-AF_VSOCK){+.+.}, at: __vsock_release+0x2e/0xf0 [vsock]
stack backtrace:
CPU: 5 PID: 1795 Comm: server Not tainted 5.3.0+ #1
Call Trace:
dump_stack+0x67/0x90
__lock_acquire.cold.67+0xd2/0x20b
lock_acquire+0xb5/0x1c0
lock_sock_nested+0x6d/0x90
hvs_release+0x10/0x120 [hv_sock]
__vsock_release+0x24/0xf0 [vsock]
__vsock_release+0xa0/0xf0 [vsock]
vsock_release+0x12/0x30 [vsock]
__sock_release+0x37/0xa0
sock_close+0x14/0x20
__fput+0xc1/0x250
task_work_run+0x98/0xc0
do_exit+0x344/0xc60
do_group_exit+0x47/0xb0
get_signal+0x15c/0xc50
do_signal+0x30/0x720
exit_to_usermode_loop+0x50/0xa0
do_syscall_64+0x24e/0x270
entry_SYSCALL_64_after_hwframe+0x49/0xbe
RIP: 0033:0x7f4184e85f31
Tested-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Dexuan Cui <decui@microsoft.com>
---
Changes in v2:
Avoid the duplication of code in v1.
Also fix virtio socket code.
Changes in v3:
Use "lock_sock_nested(sk, level);" -- suggested by Stefano.
Add Stefano's Tested-by.
net/vmw_vsock/af_vsock.c | 16 ++++++++++++----
net/vmw_vsock/hyperv_transport.c | 2 +-
net/vmw_vsock/virtio_transport_common.c | 2 +-
3 files changed, 14 insertions(+), 6 deletions(-)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index ab47bf3ab66e..2ab43b2bba31 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -638,7 +638,7 @@ struct sock *__vsock_create(struct net *net,
}
EXPORT_SYMBOL_GPL(__vsock_create);
-static void __vsock_release(struct sock *sk)
+static void __vsock_release(struct sock *sk, int level)
{
if (sk) {
struct sk_buff *skb;
@@ -648,9 +648,17 @@ static void __vsock_release(struct sock *sk)
vsk = vsock_sk(sk);
pending = NULL; /* Compiler warning. */
+ /* The release call is supposed to use lock_sock_nested()
+ * rather than lock_sock(), if a sock lock should be acquired.
+ */
transport->release(vsk);
- lock_sock(sk);
+ /* When "level" is SINGLE_DEPTH_NESTING, use the nested
+ * version to avoid the warning "possible recursive locking
+ * detected". When "level" is 0, lock_sock_nested(sk, level)
+ * is the same as lock_sock(sk).
+ */
+ lock_sock_nested(sk, level);
sock_orphan(sk);
sk->sk_shutdown = SHUTDOWN_MASK;
@@ -659,7 +667,7 @@ static void __vsock_release(struct sock *sk)
/* Clean up any sockets that never were accepted. */
while ((pending = vsock_dequeue_accept(sk)) != NULL) {
- __vsock_release(pending);
+ __vsock_release(pending, SINGLE_DEPTH_NESTING);
sock_put(pending);
}
@@ -708,7 +716,7 @@ EXPORT_SYMBOL_GPL(vsock_stream_has_space);
static int vsock_release(struct socket *sock)
{
- __vsock_release(sock->sk);
+ __vsock_release(sock->sk, 0);
sock->sk = NULL;
sock->state = SS_FREE;
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 261521d286d6..c443db7af8d4 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -559,7 +559,7 @@ static void hvs_release(struct vsock_sock *vsk)
struct sock *sk = sk_vsock(vsk);
bool remove_sock;
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
remove_sock = hvs_close_lock_held(vsk);
release_sock(sk);
if (remove_sock)
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 5bb70c692b1e..a666ef8fc54e 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -820,7 +820,7 @@ void virtio_transport_release(struct vsock_sock *vsk)
struct sock *sk = &vsk->sk;
bool remove_sock = true;
- lock_sock(sk);
+ lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
if (sk->sk_type == SOCK_STREAM)
remove_sock = virtio_transport_close(vsk);
--
2.19.1
^ permalink raw reply related
* RE: [PATCH v5 1/3] x86/hyper-v: Suspend/resume the hypercall page for hibernation
From: Dexuan Cui @ 2019-09-30 18:49 UTC (permalink / raw)
To: vkuznets
Cc: linux-arch@vger.kernel.org, arnd@arndb.de, bp@alien8.de,
daniel.lezcano@linaro.org, Haiyang Zhang, hpa@zytor.com,
KY Srinivasan, linux-hyperv@vger.kernel.org,
linux-kernel@vger.kernel.org, mingo@redhat.com, sashal@kernel.org,
Stephen Hemminger, tglx@linutronix.de, x86@kernel.org,
Michael Kelley, Sasha Levin
In-Reply-To: <877e5u6re3.fsf@vitty.brq.redhat.com>
> From: Vitaly Kuznetsov <vkuznets@redhat.com>
> Sent: Friday, September 27, 2019 2:05 AM
> To: Dexuan Cui <decui@microsoft.com>
>
> Dexuan Cui <decui@microsoft.com> writes:
> ...
> > So, I'm pretty sure no IPI can happen between hv_suspend() and
> hv_resume().
> > self-IPI is not supposed to happen either, since interrupts are disabled.
> >
> > IMO TLB flush should not be an issue either, unless the kernel changes page
> > tables between hv_suspend() and hv_resume(), which is not the case as I
> > checked the related code, but it looks in theory that might happen, say, in
> > the future, so if you insist we should save the variable "hv_hypercall_pg"
> > to a temporary variable and set the "hv_hypercall_pg" to NULL before we
> > disable the hypercall page
>
> Let's do it as a future proof so we can keep relying on !hv_hypercall_pg
> everywhere we need. No need to change this patch IMO, a follow-up would
> do.
> Vitaly
Now I think it would be better to do it in this patch. :-)
I'll post a v6 to follow your suggestion.
Thanks,
-- Dexuan
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox