* [PATCH v2 4/5] VSOCK: add sock_diag interface
From: Stefan Hajnoczi @ 2017-10-04 16:37 UTC (permalink / raw)
To: netdev; +Cc: David S . Miller, Jorgen Hansen, Dexuan Cui, Stefan Hajnoczi
In-Reply-To: <20171004163716.3964-1-stefanha@redhat.com>
This patch adds the sock_diag interface for querying sockets from
userspace. Tools like ss(8) and netstat(8) can use this interface to
list open sockets.
The userspace ABI is defined in <linux/vm_sockets_diag.h> and includes
netlink request and response structs. The request can query sockets
based on their sk_state (e.g. listening sockets only) and the response
contains socket information fields including the local/remote addresses,
inode number, etc.
This patch does not dump VMCI pending sockets because I have only tested
the virtio transport, which does not use pending sockets. Support can
be added later by extending vsock_diag_dump() if needed by VMCI users.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
MAINTAINERS | 2 +
net/vmw_vsock/Makefile | 3 +
include/uapi/linux/vm_sockets_diag.h | 33 +++++++
net/vmw_vsock/diag.c | 186 +++++++++++++++++++++++++++++++++++
net/vmw_vsock/Kconfig | 10 ++
5 files changed, 234 insertions(+)
create mode 100644 include/uapi/linux/vm_sockets_diag.h
create mode 100644 net/vmw_vsock/diag.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 1c3feffb1c1c..200dac93f34b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13975,6 +13975,8 @@ S: Maintained
F: include/linux/virtio_vsock.h
F: include/uapi/linux/virtio_vsock.h
F: include/uapi/linux/vsockmon.h
+F: include/uapi/linux/vm_sockets_diag.h
+F: net/vmw_vsock/diag.c
F: net/vmw_vsock/af_vsock_tap.c
F: net/vmw_vsock/virtio_transport_common.c
F: net/vmw_vsock/virtio_transport.c
diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile
index 09fc2eb29dc8..e5dbf153aff0 100644
--- a/net/vmw_vsock/Makefile
+++ b/net/vmw_vsock/Makefile
@@ -1,10 +1,13 @@
obj-$(CONFIG_VSOCKETS) += vsock.o
+obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o
obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o
obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o
vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o
+vsock_diag-y += diag.o
+
vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \
vmci_transport_notify_qstate.o
diff --git a/include/uapi/linux/vm_sockets_diag.h b/include/uapi/linux/vm_sockets_diag.h
new file mode 100644
index 000000000000..14cd7dc5a187
--- /dev/null
+++ b/include/uapi/linux/vm_sockets_diag.h
@@ -0,0 +1,33 @@
+/* AF_VSOCK sock_diag(7) interface for querying open sockets */
+
+#ifndef _UAPI__VM_SOCKETS_DIAG_H__
+#define _UAPI__VM_SOCKETS_DIAG_H__
+
+#include <linux/types.h>
+
+/* Request */
+struct vsock_diag_req {
+ __u8 sdiag_family; /* must be AF_VSOCK */
+ __u8 sdiag_protocol; /* must be 0 */
+ __u16 pad; /* must be 0 */
+ __u32 vdiag_states; /* query bitmap (e.g. 1 << TCP_LISTEN) */
+ __u32 vdiag_ino; /* must be 0 (reserved) */
+ __u32 vdiag_show; /* must be 0 (reserved) */
+ __u32 vdiag_cookie[2];
+};
+
+/* Response */
+struct vsock_diag_msg {
+ __u8 vdiag_family; /* AF_VSOCK */
+ __u8 vdiag_type; /* SOCK_STREAM or SOCK_DGRAM */
+ __u8 vdiag_state; /* sk_state (e.g. TCP_LISTEN) */
+ __u8 vdiag_shutdown; /* local RCV_SHUTDOWN | SEND_SHUTDOWN */
+ __u32 vdiag_src_cid;
+ __u32 vdiag_src_port;
+ __u32 vdiag_dst_cid;
+ __u32 vdiag_dst_port;
+ __u32 vdiag_ino;
+ __u32 vdiag_cookie[2];
+};
+
+#endif /* _UAPI__VM_SOCKETS_DIAG_H__ */
diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c
new file mode 100644
index 000000000000..31b567652250
--- /dev/null
+++ b/net/vmw_vsock/diag.c
@@ -0,0 +1,186 @@
+/*
+ * vsock sock_diag(7) module
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ * Author: Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation version 2 and no later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/module.h>
+#include <linux/sock_diag.h>
+#include <linux/vm_sockets_diag.h>
+#include <net/af_vsock.h>
+
+static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
+ u32 portid, u32 seq, u32 flags)
+{
+ struct vsock_sock *vsk = vsock_sk(sk);
+ struct vsock_diag_msg *rep;
+ struct nlmsghdr *nlh;
+
+ nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep),
+ flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ rep = nlmsg_data(nlh);
+ rep->vdiag_family = AF_VSOCK;
+
+ /* Lock order dictates that sk_lock is acquired before
+ * vsock_table_lock, so we cannot lock here. Simply don't take
+ * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is
+ * held.
+ */
+ rep->vdiag_type = sk->sk_type;
+ rep->vdiag_state = sk->sk_state;
+ rep->vdiag_shutdown = sk->sk_shutdown;
+ rep->vdiag_src_cid = vsk->local_addr.svm_cid;
+ rep->vdiag_src_port = vsk->local_addr.svm_port;
+ rep->vdiag_dst_cid = vsk->remote_addr.svm_cid;
+ rep->vdiag_dst_port = vsk->remote_addr.svm_port;
+ rep->vdiag_ino = sock_i_ino(sk);
+
+ sock_diag_save_cookie(sk, rep->vdiag_cookie);
+
+ return 0;
+}
+
+static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct vsock_diag_req *req;
+ struct vsock_sock *vsk;
+ unsigned int bucket;
+ unsigned int last_i;
+ unsigned int table;
+ struct net *net;
+ unsigned int i;
+
+ req = nlmsg_data(cb->nlh);
+ net = sock_net(skb->sk);
+
+ /* State saved between calls: */
+ table = cb->args[0];
+ bucket = cb->args[1];
+ i = last_i = cb->args[2];
+
+ /* TODO VMCI pending sockets? */
+
+ spin_lock_bh(&vsock_table_lock);
+
+ /* Bind table (locally created sockets) */
+ if (table == 0) {
+ while (bucket < ARRAY_SIZE(vsock_bind_table)) {
+ struct list_head *head = &vsock_bind_table[bucket];
+
+ i = 0;
+ list_for_each_entry(vsk, head, bound_table) {
+ struct sock *sk = sk_vsock(vsk);
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (i < last_i)
+ goto next_bind;
+ if (!(req->vdiag_states & (1 << sk->sk_state)))
+ goto next_bind;
+ if (sk_diag_fill(sk, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next_bind:
+ i++;
+ }
+ last_i = 0;
+ bucket++;
+ }
+
+ table++;
+ bucket = 0;
+ }
+
+ /* Connected table (accepted connections) */
+ while (bucket < ARRAY_SIZE(vsock_connected_table)) {
+ struct list_head *head = &vsock_connected_table[bucket];
+
+ i = 0;
+ list_for_each_entry(vsk, head, connected_table) {
+ struct sock *sk = sk_vsock(vsk);
+
+ /* Skip sockets we've already seen above */
+ if (__vsock_in_bound_table(vsk))
+ continue;
+
+ if (!net_eq(sock_net(sk), net))
+ continue;
+ if (i < last_i)
+ goto next_connected;
+ if (!(req->vdiag_states & (1 << sk->sk_state)))
+ goto next_connected;
+ if (sk_diag_fill(sk, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI) < 0)
+ goto done;
+next_connected:
+ i++;
+ }
+ last_i = 0;
+ bucket++;
+ }
+
+done:
+ spin_unlock_bh(&vsock_table_lock);
+
+ cb->args[0] = table;
+ cb->args[1] = bucket;
+ cb->args[2] = i;
+
+ return skb->len;
+}
+
+static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+ int hdrlen = sizeof(struct vsock_diag_req);
+ struct net *net = sock_net(skb->sk);
+
+ if (nlmsg_len(h) < hdrlen)
+ return -EINVAL;
+
+ if (h->nlmsg_flags & NLM_F_DUMP) {
+ struct netlink_dump_control c = {
+ .dump = vsock_diag_dump,
+ };
+ return netlink_dump_start(net->diag_nlsk, skb, h, &c);
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static const struct sock_diag_handler vsock_diag_handler = {
+ .family = AF_VSOCK,
+ .dump = vsock_diag_handler_dump,
+};
+
+static int __init vsock_diag_init(void)
+{
+ return sock_diag_register(&vsock_diag_handler);
+}
+
+static void __exit vsock_diag_exit(void)
+{
+ sock_diag_unregister(&vsock_diag_handler);
+}
+
+module_init(vsock_diag_init);
+module_exit(vsock_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG,
+ 40 /* AF_VSOCK */);
diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig
index 8831e7c42167..829cb7c8f14c 100644
--- a/net/vmw_vsock/Kconfig
+++ b/net/vmw_vsock/Kconfig
@@ -15,6 +15,16 @@ config VSOCKETS
To compile this driver as a module, choose M here: the module
will be called vsock. If unsure, say N.
+config VSOCKETS_DIAG
+ tristate "Virtual Sockets monitoring interface"
+ depends on VSOCKETS
+ default y
+ help
+ Support for PF_VSOCK sockets monitoring interface used by the ss tool.
+ If unsure, say Y.
+
+ Enable this module so userspace applications can query open sockets.
+
config VMWARE_VMCI_VSOCKETS
tristate "VMware VMCI transport for Virtual Sockets"
depends on VSOCKETS && VMWARE_VMCI
--
2.13.6
^ permalink raw reply related
* [PATCH v2 3/5] VSOCK: use TCP state constants for sk_state
From: Stefan Hajnoczi @ 2017-10-04 16:37 UTC (permalink / raw)
To: netdev; +Cc: David S . Miller, Jorgen Hansen, Dexuan Cui, Stefan Hajnoczi
In-Reply-To: <20171004163716.3964-1-stefanha@redhat.com>
There are two state fields: socket->state and sock->sk_state. The
socket->state field uses SS_UNCONNECTED, SS_CONNECTED, etc while the
sock->sk_state typically uses values that match TCP state constants
(TCP_CLOSE, TCP_ESTABLISHED). AF_VSOCK does not follow this convention
and instead uses SS_* constants for both fields.
The sk_state field will be exposed to userspace through the vsock_diag
interface for ss(8), netstat(8), and other programs.
This patch switches sk_state to TCP state constants so that the meaning
of this field is consistent with other address families. Not just
AF_INET and AF_INET6 use the TCP constants, AF_UNIX and others do too.
The following mapping was used to convert the code:
SS_FREE -> TCP_CLOSE
SS_UNCONNECTED -> TCP_CLOSE
SS_CONNECTING -> TCP_SYN_SENT
SS_CONNECTED -> TCP_ESTABLISHED
SS_DISCONNECTING -> TCP_CLOSING
VSOCK_SS_LISTEN -> TCP_LISTEN
In __vsock_create() the sk_state initialization was dropped because
sock_init_data() already initializes sk_state to TCP_CLOSE.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
include/net/af_vsock.h | 3 --
net/vmw_vsock/af_vsock.c | 46 ++++++++++++++++------------
net/vmw_vsock/virtio_transport.c | 2 +-
net/vmw_vsock/virtio_transport_common.c | 22 ++++++-------
net/vmw_vsock/vmci_transport.c | 34 ++++++++++----------
net/vmw_vsock/vmci_transport_notify.c | 2 +-
net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +-
7 files changed, 58 insertions(+), 53 deletions(-)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 3dd217718a2f..9324ac2d9ff2 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -22,9 +22,6 @@
#include "vsock_addr.h"
-/* vsock-specific sock->sk_state constants */
-#define VSOCK_SS_LISTEN 255
-
#define LAST_RESERVED_PORT 1023
#define VSOCK_HASH_SIZE 251
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9b179a0081b3..98359c19522f 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -36,7 +36,7 @@
* not support simultaneous connects (two "client" sockets connecting).
*
* - "Server" sockets are referred to as listener sockets throughout this
- * implementation because they are in the VSOCK_SS_LISTEN state. When a
+ * implementation because they are in the TCP_LISTEN state. When a
* connection request is received (the second kind of socket mentioned above),
* we create a new socket and refer to it as a pending socket. These pending
* sockets are placed on the pending connection list of the listener socket.
@@ -82,6 +82,15 @@
* argument, we must ensure the reference count is increased to ensure the
* socket isn't freed before the function is run; the deferred function will
* then drop the reference.
+ *
+ * - sk->sk_state uses the TCP state constants because they are widely used by
+ * other address families and exposed to userspace tools like ss(8):
+ *
+ * TCP_CLOSE - unconnected
+ * TCP_SYN_SENT - connecting
+ * TCP_ESTABLISHED - connected
+ * TCP_CLOSING - disconnecting
+ * TCP_LISTEN - listening
*/
#include <linux/types.h>
@@ -477,7 +486,7 @@ void vsock_pending_work(struct work_struct *work)
if (vsock_in_connected_table(vsk))
vsock_remove_connected(vsk);
- sk->sk_state = SS_FREE;
+ sk->sk_state = TCP_CLOSE;
out:
release_sock(sk);
@@ -617,7 +626,6 @@ struct sock *__vsock_create(struct net *net,
sk->sk_destruct = vsock_sk_destruct;
sk->sk_backlog_rcv = vsock_queue_rcv_skb;
- sk->sk_state = 0;
sock_reset_flag(sk, SOCK_DONE);
INIT_LIST_HEAD(&vsk->bound_table);
@@ -891,7 +899,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
/* Listening sockets that have connections in their accept
* queue can be read.
*/
- if (sk->sk_state == VSOCK_SS_LISTEN
+ if (sk->sk_state == TCP_LISTEN
&& !vsock_is_accept_queue_empty(sk))
mask |= POLLIN | POLLRDNORM;
@@ -920,7 +928,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
}
/* Connected sockets that can produce data can be written. */
- if (sk->sk_state == SS_CONNECTED) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
bool space_avail_now = false;
int ret = transport->notify_poll_out(
@@ -942,7 +950,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock,
* POLLOUT|POLLWRNORM when peer is closed and nothing to read,
* but local send is not shutdown.
*/
- if (sk->sk_state == SS_UNCONNECTED) {
+ if (sk->sk_state == TCP_CLOSE) {
if (!(sk->sk_shutdown & SEND_SHUTDOWN))
mask |= POLLOUT | POLLWRNORM;
@@ -1112,9 +1120,9 @@ static void vsock_connect_timeout(struct work_struct *work)
sk = sk_vsock(vsk);
lock_sock(sk);
- if (sk->sk_state == SS_CONNECTING &&
+ if (sk->sk_state == TCP_SYN_SENT &&
(sk->sk_shutdown != SHUTDOWN_MASK)) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ETIMEDOUT;
sk->sk_error_report(sk);
cancel = 1;
@@ -1160,7 +1168,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
err = -EALREADY;
break;
default:
- if ((sk->sk_state == VSOCK_SS_LISTEN) ||
+ if ((sk->sk_state == TCP_LISTEN) ||
vsock_addr_cast(addr, addr_len, &remote_addr) != 0) {
err = -EINVAL;
goto out;
@@ -1183,7 +1191,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (err)
goto out;
- sk->sk_state = SS_CONNECTING;
+ sk->sk_state = TCP_SYN_SENT;
err = transport->connect(vsk);
if (err < 0)
@@ -1203,7 +1211,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
timeout = vsk->connect_timeout;
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) {
+ while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) {
if (flags & O_NONBLOCK) {
/* If we're not going to block, we schedule a timeout
* function to generate a timeout on the connection
@@ -1226,13 +1234,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (signal_pending(current)) {
err = sock_intr_errno(timeout);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
} else if (timeout == 0) {
err = -ETIMEDOUT;
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
vsock_transport_cancel_pkt(vsk);
goto out_wait;
@@ -1243,7 +1251,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr,
if (sk->sk_err) {
err = -sk->sk_err;
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sock->state = SS_UNCONNECTED;
} else {
err = 0;
@@ -1276,7 +1284,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags,
goto out;
}
- if (listener->sk_state != VSOCK_SS_LISTEN) {
+ if (listener->sk_state != TCP_LISTEN) {
err = -EINVAL;
goto out;
}
@@ -1366,7 +1374,7 @@ static int vsock_listen(struct socket *sock, int backlog)
}
sk->sk_max_ack_backlog = backlog;
- sk->sk_state = VSOCK_SS_LISTEN;
+ sk->sk_state = TCP_LISTEN;
err = 0;
@@ -1546,7 +1554,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
/* Callers should not provide a destination with stream sockets. */
if (msg->msg_namelen) {
- err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP;
+ err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
goto out;
}
@@ -1557,7 +1565,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg,
goto out;
}
- if (sk->sk_state != SS_CONNECTED ||
+ if (sk->sk_state != TCP_ESTABLISHED ||
!vsock_addr_bound(&vsk->local_addr)) {
err = -ENOTCONN;
goto out;
@@ -1681,7 +1689,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
lock_sock(sk);
- if (sk->sk_state != SS_CONNECTED) {
+ if (sk->sk_state != TCP_ESTABLISHED) {
/* Recvmsg is supposed to return 0 if a peer performs an
* orderly shutdown. Differentiate between that case and when a
* peer has not connected or a local shutdown occured with the
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 403d86e80162..8e03bd3f3668 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock)
static void virtio_vsock_reset_sock(struct sock *sk)
{
lock_sock(sk);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
release_sock(sk);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index edba7ab97563..3ae3a33da70b 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk,
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
if (vsk->close_work_scheduled &&
@@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk)
{
struct sock *sk = &vsk->sk;
- if (!(sk->sk_state == SS_CONNECTED ||
- sk->sk_state == SS_DISCONNECTING))
+ if (!(sk->sk_state == TCP_ESTABLISHED ||
+ sk->sk_state == TCP_CLOSING))
return true;
/* Already received SHUTDOWN from peer, reply with RST */
@@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk,
switch (le16_to_cpu(pkt->hdr.op)) {
case VIRTIO_VSOCK_OP_RESPONSE:
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
@@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk,
destroy:
virtio_transport_reset(vsk, pkt);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
@@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk,
vsk->peer_shutdown |= SEND_SHUTDOWN;
if (vsk->peer_shutdown == SHUTDOWN_MASK &&
vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
if (le32_to_cpu(pkt->hdr.flags))
sk->sk_state_change(sk);
break;
@@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt)
lock_sock_nested(child, SINGLE_DEPTH_NESTING);
- child->sk_state = SS_CONNECTED;
+ child->sk_state = TCP_ESTABLISHED;
vchild = vsock_sk(child);
vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
@@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt)
sk->sk_write_space(sk);
switch (sk->sk_state) {
- case VSOCK_SS_LISTEN:
+ case TCP_LISTEN:
virtio_transport_recv_listen(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
virtio_transport_recv_connecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
- case SS_CONNECTED:
+ case TCP_ESTABLISHED:
virtio_transport_recv_connected(sk, pkt);
break;
- case SS_DISCONNECTING:
+ case TCP_CLOSING:
virtio_transport_recv_disconnecting(sk, pkt);
virtio_transport_free_pkt(pkt);
break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 10ae7823a19d..9cb3a6c780aa 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -743,7 +743,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg)
/* The local context ID may be out of date, update it. */
vsk->local_addr.svm_cid = dst.svm_cid;
- if (sk->sk_state == SS_CONNECTED)
+ if (sk->sk_state == TCP_ESTABLISHED)
vmci_trans(vsk)->notify_ops->handle_notify_pkt(
sk, pkt, true, &dst, &src,
&bh_process_pkt);
@@ -801,7 +801,9 @@ static void vmci_transport_handle_detach(struct sock *sk)
* left in our consume queue.
*/
if (vsock_stream_has_data(vsk) <= 0) {
- if (sk->sk_state == SS_CONNECTING) {
+ sk->sk_state = TCP_CLOSE;
+
+ if (sk->sk_state == TCP_SYN_SENT) {
/* The peer may detach from a queue pair while
* we are still in the connecting state, i.e.,
* if the peer VM is killed after attaching to
@@ -810,12 +812,10 @@ static void vmci_transport_handle_detach(struct sock *sk)
* event like a reset.
*/
- sk->sk_state = SS_UNCONNECTED;
sk->sk_err = ECONNRESET;
sk->sk_error_report(sk);
return;
}
- sk->sk_state = SS_UNCONNECTED;
}
sk->sk_state_change(sk);
}
@@ -883,17 +883,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work)
vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context;
switch (sk->sk_state) {
- case VSOCK_SS_LISTEN:
+ case TCP_LISTEN:
vmci_transport_recv_listen(sk, pkt);
break;
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
/* Processing of pending connections for servers goes through
* the listening socket, so see vmci_transport_recv_listen()
* for that path.
*/
vmci_transport_recv_connecting_client(sk, pkt);
break;
- case SS_CONNECTED:
+ case TCP_ESTABLISHED:
vmci_transport_recv_connected(sk, pkt);
break;
default:
@@ -942,7 +942,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context;
switch (pending->sk_state) {
- case SS_CONNECTING:
+ case TCP_SYN_SENT:
err = vmci_transport_recv_connecting_server(sk,
pending,
pkt);
@@ -1072,7 +1072,7 @@ static int vmci_transport_recv_listen(struct sock *sk,
vsock_add_pending(sk, pending);
sk->sk_ack_backlog++;
- pending->sk_state = SS_CONNECTING;
+ pending->sk_state = TCP_SYN_SENT;
vmci_trans(vpending)->produce_size =
vmci_trans(vpending)->consume_size = qp_size;
vmci_trans(vpending)->queue_pair_size = qp_size;
@@ -1197,11 +1197,11 @@ vmci_transport_recv_connecting_server(struct sock *listener,
* the socket will be valid until it is removed from the queue.
*
* If we fail sending the attach below, we remove the socket from the
- * connected list and move the socket to SS_UNCONNECTED before
+ * connected list and move the socket to TCP_CLOSE before
* releasing the lock, so a pending slow path processing of an incoming
* packet will not see the socket in the connected state in that case.
*/
- pending->sk_state = SS_CONNECTED;
+ pending->sk_state = TCP_ESTABLISHED;
vsock_insert_connected(vpending);
@@ -1232,7 +1232,7 @@ vmci_transport_recv_connecting_server(struct sock *listener,
destroy:
pending->sk_err = skerr;
- pending->sk_state = SS_UNCONNECTED;
+ pending->sk_state = TCP_CLOSE;
/* As long as we drop our reference, all necessary cleanup will handle
* when the cleanup function drops its reference and our destruct
* implementation is called. Note that since the listen handler will
@@ -1270,7 +1270,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
* accounting (it can already be found since it's in the bound
* table).
*/
- sk->sk_state = SS_CONNECTED;
+ sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
vsock_insert_connected(vsk);
sk->sk_state_change(sk);
@@ -1338,7 +1338,7 @@ vmci_transport_recv_connecting_client(struct sock *sk,
destroy:
vmci_transport_send_reset(sk, pkt);
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk->sk_error_report(sk);
return err;
@@ -1526,7 +1526,7 @@ static int vmci_transport_recv_connected(struct sock *sk,
sock_set_flag(sk, SOCK_DONE);
vsk->peer_shutdown = SHUTDOWN_MASK;
if (vsock_stream_has_data(vsk) <= 0)
- sk->sk_state = SS_DISCONNECTING;
+ sk->sk_state = TCP_CLOSING;
sk->sk_state_change(sk);
break;
@@ -1790,7 +1790,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
err = vmci_transport_send_conn_request(
sk, vmci_trans(vsk)->queue_pair_size);
if (err < 0) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
return err;
}
} else {
@@ -1800,7 +1800,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk)
sk, vmci_trans(vsk)->queue_pair_size,
supported_proto_versions);
if (err < 0) {
- sk->sk_state = SS_UNCONNECTED;
+ sk->sk_state = TCP_CLOSE;
return err;
}
diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c
index 1406db4d97d1..41fb427f150a 100644
--- a/net/vmw_vsock/vmci_transport_notify.c
+++ b/net/vmw_vsock/vmci_transport_notify.c
@@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
* queue. Ask for notifications when there is something to
* read.
*/
- if (sk->sk_state == SS_CONNECTED) {
+ if (sk->sk_state == TCP_ESTABLISHED) {
if (!send_waiting_read(sk, 1))
return -1;
diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c
index f3a0afc46208..0cc84f2bb05e 100644
--- a/net/vmw_vsock/vmci_transport_notify_qstate.c
+++ b/net/vmw_vsock/vmci_transport_notify_qstate.c
@@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk,
* queue. Ask for notifications when there is something to
* read.
*/
- if (sk->sk_state == SS_CONNECTED)
+ if (sk->sk_state == TCP_ESTABLISHED)
vsock_block_update_write_window(sk);
*data_ready_now = false;
}
--
2.13.6
^ permalink raw reply related
* [PATCH v2 2/5] VSOCK: move __vsock_in_bound/connected_table() to af_vsock.h
From: Stefan Hajnoczi @ 2017-10-04 16:37 UTC (permalink / raw)
To: netdev; +Cc: David S . Miller, Jorgen Hansen, Dexuan Cui, Stefan Hajnoczi
In-Reply-To: <20171004163716.3964-1-stefanha@redhat.com>
The vsock_diag.ko module will need to check socket table membership.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
include/net/af_vsock.h | 12 ++++++++++++
net/vmw_vsock/af_vsock.c | 10 ----------
2 files changed, 12 insertions(+), 10 deletions(-)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index 30cba806e344..3dd217718a2f 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -180,6 +180,18 @@ const struct vsock_transport *vsock_core_get_transport(void);
/**** UTILS ****/
+/* vsock_table_lock must be held */
+static inline bool __vsock_in_bound_table(struct vsock_sock *vsk)
+{
+ return !list_empty(&vsk->bound_table);
+}
+
+/* vsock_table_lock must be held */
+static inline bool __vsock_in_connected_table(struct vsock_sock *vsk)
+{
+ return !list_empty(&vsk->connected_table);
+}
+
void vsock_release_pending(struct sock *pending);
void vsock_add_pending(struct sock *listener, struct sock *pending);
void vsock_remove_pending(struct sock *listener, struct sock *pending);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 9afe4da8c67d..9b179a0081b3 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -250,16 +250,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src,
return NULL;
}
-static bool __vsock_in_bound_table(struct vsock_sock *vsk)
-{
- return !list_empty(&vsk->bound_table);
-}
-
-static bool __vsock_in_connected_table(struct vsock_sock *vsk)
-{
- return !list_empty(&vsk->connected_table);
-}
-
static void vsock_insert_unbound(struct vsock_sock *vsk)
{
spin_lock_bh(&vsock_table_lock);
--
2.13.6
^ permalink raw reply related
* [PATCH v2 1/5] VSOCK: export socket tables for sock_diag interface
From: Stefan Hajnoczi @ 2017-10-04 16:37 UTC (permalink / raw)
To: netdev; +Cc: David S . Miller, Jorgen Hansen, Dexuan Cui, Stefan Hajnoczi
In-Reply-To: <20171004163716.3964-1-stefanha@redhat.com>
The socket table symbols need to be exported from vsock.ko so that the
vsock_diag.ko module will be able to traverse sockets.
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
include/net/af_vsock.h | 5 +++++
net/vmw_vsock/af_vsock.c | 10 ++++++----
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/include/net/af_vsock.h b/include/net/af_vsock.h
index f9fb566e75cf..30cba806e344 100644
--- a/include/net/af_vsock.h
+++ b/include/net/af_vsock.h
@@ -27,6 +27,11 @@
#define LAST_RESERVED_PORT 1023
+#define VSOCK_HASH_SIZE 251
+extern struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+extern struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+extern spinlock_t vsock_table_lock;
+
#define vsock_sk(__sk) ((struct vsock_sock *)__sk)
#define sk_vsock(__vsk) (&(__vsk)->sk)
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index dfc8c51e4d74..9afe4da8c67d 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -153,7 +153,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
* vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function
* mods with VSOCK_HASH_SIZE to ensure this.
*/
-#define VSOCK_HASH_SIZE 251
#define MAX_PORT_RETRIES 24
#define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE)
@@ -168,9 +167,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid);
#define vsock_connected_sockets_vsk(vsk) \
vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr)
-static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
-static struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
-static DEFINE_SPINLOCK(vsock_table_lock);
+struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1];
+EXPORT_SYMBOL_GPL(vsock_bind_table);
+struct list_head vsock_connected_table[VSOCK_HASH_SIZE];
+EXPORT_SYMBOL_GPL(vsock_connected_table);
+DEFINE_SPINLOCK(vsock_table_lock);
+EXPORT_SYMBOL_GPL(vsock_table_lock);
/* Autobind this socket to the local address if necessary. */
static int vsock_auto_bind(struct vsock_sock *vsk)
--
2.13.6
^ permalink raw reply related
* [PATCH v2 0/5] VSOCK: add sock_diag interface
From: Stefan Hajnoczi @ 2017-10-04 16:37 UTC (permalink / raw)
To: netdev; +Cc: David S . Miller, Jorgen Hansen, Dexuan Cui, Stefan Hajnoczi
v2:
* Moved tests to tools/testing/vsock/. I was unable to put them in selftests/
because they require manual setup of a VMware/KVM guest.
* Moved to __vsock_in_bound/connected_table() to af_vsock.h
* Fixed local variable ordering in Patch 4
There is currently no way for userspace to query open AF_VSOCK sockets. This
means ss(8), netstat(8), and other utilities cannot display AF_VSOCK sockets.
This patch series adds the netlink sock_diag interface for AF_VSOCK. Userspace
programs sent a DUMP request including an sk_state bitmap to filter sockets
based on their state (connected, listening, etc). The vsock_diag.ko module
replies with information about matching sockets. This userspace ABI is defined
in <linux/vm_sockets_diag.h>.
The final patch adds a test suite that exercises the basic cases.
Jorgen and Dexuan: I have only tested the virtio transport but this should also
work for VMCI and Hyper-V. Please give it a shot if you have time.
Stefan Hajnoczi (5):
VSOCK: export socket tables for sock_diag interface
VSOCK: move __vsock_in_bound/connected_table() to af_vsock.h
VSOCK: use TCP state constants for sk_state
VSOCK: add sock_diag interface
VSOCK: add tools/testing/vsock/vsock_diag_test
MAINTAINERS | 3 +
net/vmw_vsock/Makefile | 3 +
tools/testing/vsock/Makefile | 9 +
include/net/af_vsock.h | 20 +-
include/uapi/linux/vm_sockets_diag.h | 33 ++
tools/testing/vsock/control.h | 13 +
tools/testing/vsock/timeout.h | 14 +
net/vmw_vsock/af_vsock.c | 66 +--
net/vmw_vsock/diag.c | 186 ++++++++
net/vmw_vsock/virtio_transport.c | 2 +-
net/vmw_vsock/virtio_transport_common.c | 22 +-
net/vmw_vsock/vmci_transport.c | 34 +-
net/vmw_vsock/vmci_transport_notify.c | 2 +-
net/vmw_vsock/vmci_transport_notify_qstate.c | 2 +-
tools/testing/vsock/control.c | 219 +++++++++
tools/testing/vsock/timeout.c | 64 +++
tools/testing/vsock/vsock_diag_test.c | 681 +++++++++++++++++++++++++++
net/vmw_vsock/Kconfig | 10 +
tools/testing/vsock/.gitignore | 2 +
tools/testing/vsock/README | 36 ++
20 files changed, 1354 insertions(+), 67 deletions(-)
create mode 100644 tools/testing/vsock/Makefile
create mode 100644 include/uapi/linux/vm_sockets_diag.h
create mode 100644 tools/testing/vsock/control.h
create mode 100644 tools/testing/vsock/timeout.h
create mode 100644 net/vmw_vsock/diag.c
create mode 100644 tools/testing/vsock/control.c
create mode 100644 tools/testing/vsock/timeout.c
create mode 100644 tools/testing/vsock/vsock_diag_test.c
create mode 100644 tools/testing/vsock/.gitignore
create mode 100644 tools/testing/vsock/README
--
2.13.6
^ permalink raw reply
* Re: [PATCH 1/3] wireless: iwlwifi: use bool instead of int
From: Joe Perches @ 2017-10-04 16:26 UTC (permalink / raw)
To: Christoph Böhmwalder, johannes.berg, emmanuel.grumbach,
luciano.coelho, kvalo
Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20171004155700.18048-2-christoph@boehmwalder.at>
On Wed, 2017-10-04 at 17:56 +0200, Christoph Böhmwalder wrote:
> Change a usage of int in a boolean context to use the bool type instead, as it
> makes the intent of the function clearer and helps clarify its semantics.
>
> Also eliminate the if/else and just return the boolean result directly,
> making the code more readable.
>
> Signed-off-by: Christoph Böhmwalder <christoph@boehmwalder.at>
> ---
> drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 12 +++++-------
> 1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> index b7cd813ba70f..0eb815ae97e8 100644
> --- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> +++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> @@ -267,14 +267,12 @@ int iwl_phy_db_set_section(struct iwl_phy_db *phy_db,
> }
> IWL_EXPORT_SYMBOL(iwl_phy_db_set_section);
>
> -static int is_valid_channel(u16 ch_id)
> +static bool is_valid_channel(u16 ch_id)
> {
> - if (ch_id <= 14 ||
> - (36 <= ch_id && ch_id <= 64 && ch_id % 4 == 0) ||
> - (100 <= ch_id && ch_id <= 140 && ch_id % 4 == 0) ||
> - (145 <= ch_id && ch_id <= 165 && ch_id % 4 == 1))
> - return 1;
> - return 0;
> + return (ch_id <= 14 ||
> + (36 <= ch_id && ch_id <= 64 && ch_id % 4 == 0) ||
> + (100 <= ch_id && ch_id <= 140 && ch_id % 4 == 0) ||
> + (145 <= ch_id && ch_id <= 165 && ch_id % 4 == 1));
> }
This might be more intelligble as separate tests
static bool is_valid_channel(u16 ch_id)
{
if (ch_id <= 14)
return true;
if ((ch_id % 4 == 0) &&
((ch_id >= 36 && ch_id <= 64) ||
(ch_id >= 100 && ch_id <= 140)))
return true;
if ((ch_id % 4 == 1) &&
(chid >= 145 && ch_id <= 165))
return true;
return false;
}
The compiler should produce the same object code.
^ permalink raw reply
* Re: [PATCH] net: phy: DP83822 initial driver submission
From: Dan Murphy @ 2017-10-04 16:22 UTC (permalink / raw)
To: Florian Fainelli, andrew; +Cc: netdev
In-Reply-To: <35e1541d-1cad-8d72-300f-b9c4d0c27f11@gmail.com>
Florian
On 10/04/2017 11:18 AM, Florian Fainelli wrote:
> On 10/04/2017 08:41 AM, Dan Murphy wrote:
>> Florian
>>
>> On 10/03/2017 01:31 PM, Florian Fainelli wrote:
>>> On 10/03/2017 11:03 AM, Dan Murphy wrote:
>>>> Florian
>>>>
>>>> Thanks for the review
>>>>
>>>> On 10/03/2017 12:15 PM, Florian Fainelli wrote:
>>>>>> + } else {
>>>>>> + value &= ~DP83822_WOL_SECURE_ON;
>>>>>> + }
>>>>>> +
>>>>>> + value |= (DP83822_WOL_EN | DP83822_WOL_CLR_INDICATION |
>>>>>> + DP83822_WOL_CLR_INDICATION);
>>>>>
>>>>> The extra parenthesis should not be required here.
>>>>
>>>> I did not code that in. I had to add it after Checkpatch cribbed about it.
>>>> Let me know if you want me to remove it.
>>>
>>> Let's keep those, that does not change much.
>>>
>>>>
>>>>>
>>>>>> + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
>>>>>> + value);
>>>>>> + } else {
>>>>>> + value =
>>>>>> + phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
>>>>>> + value &= (~DP83822_WOL_EN);
>>>>>
>>>>> Same here, parenthesis should not be needed.
>>>>
>>>> There are three lines of code in the else. This code all needs to be excuted in the else case.
>>>> I might reformat it to read better. Lindent messed that one up.
>>>
>>> sorry, I meant to write that you don't need the parenthesis around
>>> DP83822_WOL_EN since that is just a single bit here.
>>>
>>> [snip]
>>>
>>>>>> +
>>>>>> + mutex_unlock(&phydev->lock);
>>>>>> +
>>>>>> + return 0;
>>>>>> +}
>>>>>> +
>>>>>> +static int dp83822_resume(struct phy_device *phydev)
>>>>>> +{
>>>>>> + int value;
>>>>>> +
>>>>>> + mutex_lock(&phydev->lock);
>>>>>> +
>>>>>> + value = phy_read(phydev, MII_BMCR);
>>>>>> + phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
>>>>>
>>>>> And genphy_resume() here as well?
>>>>
>>>> genphy_resume does not have WoL.
>>>
>>> I should have been cleared, I meant using genphy_{suspend,resume} to
>>> avoid open coding the setting of the BMCR_PDOWN bit, conversely clearing
>>> of that bit. Because of the locking, maybe you could introduce unlocked
>>> versions of these two routines, or you acquire and release the lock
>>> outside of genphy_{suspend,resume}?
>>
>> OK I have addressed all of the open comments and will be posting v2 shortly.
>>
>> I do have a question on this request.
>> Are you indicating to exclusively call genphy_(suspend/resume) from within the over ridden
>> routine and then take care of the WoL bits in the phy specific code?
>>
>> Since the genphy code is duplicated here for the BMCR value that would make the most sense
>> to me. The genphy code is exported so I can call it explicitly then do the WoL
>
> I would expect you to wrap your own calls around genphy_suspend and
> genphy_resume, such your functions become:
>
> static int dp83822_suspend(struct phy_device *phydev)
> {
> int value;
>
> mutex_lock(&phydev->lock);
> value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
> mutex_unlock(&phydev->lock);
>
> if (~value & DP83822_WOL_EN)
> genphy_suspend(phydev);
> }
>
> static int dp83822_resume(struct phy_device *phydev)
> {
> int value;
>
> genphy_resume(phydev);
>
> mutex_lock(&phydev->lock);
> value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
>
> phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, value |
> DP83822_WOL_CLR_INDICATION);
>
> mutex_unlock(&phydev->lock);
>
> return 0;
> }
>
Great thats exactly what I did.
Dan
>>
>> Dan
>>
>> [snip]--
>> ------------------
>> Dan Murphy
>>
>
>
--
------------------
Dan Murphy
^ permalink raw reply
* Re: multi producer / multi consumer lock-free queue with ptr_ring
From: Michael S. Tsirkin @ 2017-10-04 16:23 UTC (permalink / raw)
To: Jason A. Donenfeld; +Cc: LKML, Netdev, Samuel Holland, WireGuard mailing list
In-Reply-To: <CAHmME9qPYL-wzyX0Q7RG7Uk9kAeWbzNN5=0YrmbniUR2O3rSxA@mail.gmail.com>
On Wed, Oct 04, 2017 at 01:18:24PM +0200, Jason A. Donenfeld wrote:
> Hey Michael,
>
> Thanks for your work on ptr_ring.h. I'm interested in using it, but in
> a multi-producer, multi-consumer context. I realize it's been designed
> for a single-producer, single-consumer context, and thus uses a
> spinlock. I'm wondering if you'd be happy to receive patches that
> implement things in a lock-free way, in order to make the data
> structure more broadly usable.
>
> In case you're curious, this would be used for the multi-core
> algorithms in WireGuard.
>
> Thanks,
> Jason
Hi!
We'll have to look at the performance and how much code is shared.
Especially resize support might be tricky without a lock.
One interesting consideration is that currently we use spinlocks which
are very expensive but fair when contended.
And lock-free mechanisms are very rarely fair.
Further ring isn't really fair when full in the sense that
you might get the lock first but the result will only
be that you see a full ring and can not add an entry.
So maybe replacing (optionally?) a spinlock with just a trylock and fail
on contention would already speed up things for multi-consumer/producer
without the overhead and complexity of lock-free.
Or tweak spin lock to poll for a bit until going slow path
or until failure.
Or even try a bit lock.
I hope these ideas help.
One area where we definitely could share effort is adding some
micro-benchmarks under tools/. I currently (ab)use
tools/virtio/ringtest.
--
MST
^ permalink raw reply
* Re: [PATCH 0/3] iwlwifi: cosmetic fixes
From: Luciano Coelho @ 2017-10-04 16:21 UTC (permalink / raw)
To: Christoph Böhmwalder, johannes.berg, emmanuel.grumbach,
kvalo
Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20171004155700.18048-1-christoph@boehmwalder.at>
On Wed, 2017-10-04 at 17:56 +0200, Christoph Böhmwalder wrote:
> Fix several code style issues, some of which were reported by
> checkpatch.pl.
>
> The changes are:
> * One instance of an `int` variable being used in a boolean context,
> chaned to
> use the more appropriate `bool` type.
> * One very minor fix, removing a newline between a function
> definition and its
> associated `static` keyword
> * One fix wrapping a macro in curly braces
>
>
> Christoph Böhmwalder (3):
> wireless: iwlwifi: use bool instead of int
> wireless: iwlwifi: function definition cosmetic fix
> wireless: iwlwifi: wrap macro into braces
>
> drivers/net/wireless/intel/iwlwifi/iwl-io.c | 2 +-
> drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 16 +++++++------
> ---
> 2 files changed, 8 insertions(+), 10 deletions(-)
Sorry, but this kind of series just generates churn. Especially when 2
out of 3 patches are broken. I applied your previous patch because it
was really trivial, but I really don't want to encourage this kind of
drive-by "fixes" that only cause additional work.
I generally only accept this kind of changes when people are changing
code close or related to it.
--
Luca.
^ permalink raw reply
* Re: [PATCH] net: phy: DP83822 initial driver submission
From: Florian Fainelli @ 2017-10-04 16:18 UTC (permalink / raw)
To: Dan Murphy, andrew; +Cc: netdev
In-Reply-To: <86fdd4ac-428c-16a4-2b07-fe67765c6f13@ti.com>
On 10/04/2017 08:41 AM, Dan Murphy wrote:
> Florian
>
> On 10/03/2017 01:31 PM, Florian Fainelli wrote:
>> On 10/03/2017 11:03 AM, Dan Murphy wrote:
>>> Florian
>>>
>>> Thanks for the review
>>>
>>> On 10/03/2017 12:15 PM, Florian Fainelli wrote:
>>>>> + } else {
>>>>> + value &= ~DP83822_WOL_SECURE_ON;
>>>>> + }
>>>>> +
>>>>> + value |= (DP83822_WOL_EN | DP83822_WOL_CLR_INDICATION |
>>>>> + DP83822_WOL_CLR_INDICATION);
>>>>
>>>> The extra parenthesis should not be required here.
>>>
>>> I did not code that in. I had to add it after Checkpatch cribbed about it.
>>> Let me know if you want me to remove it.
>>
>> Let's keep those, that does not change much.
>>
>>>
>>>>
>>>>> + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
>>>>> + value);
>>>>> + } else {
>>>>> + value =
>>>>> + phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
>>>>> + value &= (~DP83822_WOL_EN);
>>>>
>>>> Same here, parenthesis should not be needed.
>>>
>>> There are three lines of code in the else. This code all needs to be excuted in the else case.
>>> I might reformat it to read better. Lindent messed that one up.
>>
>> sorry, I meant to write that you don't need the parenthesis around
>> DP83822_WOL_EN since that is just a single bit here.
>>
>> [snip]
>>
>>>>> +
>>>>> + mutex_unlock(&phydev->lock);
>>>>> +
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> +static int dp83822_resume(struct phy_device *phydev)
>>>>> +{
>>>>> + int value;
>>>>> +
>>>>> + mutex_lock(&phydev->lock);
>>>>> +
>>>>> + value = phy_read(phydev, MII_BMCR);
>>>>> + phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
>>>>
>>>> And genphy_resume() here as well?
>>>
>>> genphy_resume does not have WoL.
>>
>> I should have been cleared, I meant using genphy_{suspend,resume} to
>> avoid open coding the setting of the BMCR_PDOWN bit, conversely clearing
>> of that bit. Because of the locking, maybe you could introduce unlocked
>> versions of these two routines, or you acquire and release the lock
>> outside of genphy_{suspend,resume}?
>
> OK I have addressed all of the open comments and will be posting v2 shortly.
>
> I do have a question on this request.
> Are you indicating to exclusively call genphy_(suspend/resume) from within the over ridden
> routine and then take care of the WoL bits in the phy specific code?
>
> Since the genphy code is duplicated here for the BMCR value that would make the most sense
> to me. The genphy code is exported so I can call it explicitly then do the WoL
I would expect you to wrap your own calls around genphy_suspend and
genphy_resume, such your functions become:
static int dp83822_suspend(struct phy_device *phydev)
{
int value;
mutex_lock(&phydev->lock);
value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
mutex_unlock(&phydev->lock);
if (~value & DP83822_WOL_EN)
genphy_suspend(phydev);
}
static int dp83822_resume(struct phy_device *phydev)
{
int value;
genphy_resume(phydev);
mutex_lock(&phydev->lock);
value = phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG, value |
DP83822_WOL_CLR_INDICATION);
mutex_unlock(&phydev->lock);
return 0;
}
>
> Dan
>
> [snip]--
> ------------------
> Dan Murphy
>
--
Florian
^ permalink raw reply
* Re: [PATCH net-next 6/7] net: bridge: Pass extack to down to netdev_master_upper_dev_link
From: David Ahern @ 2017-10-04 16:16 UTC (permalink / raw)
To: Stephen Hemminger
Cc: netdev, j.vosburgh, vfalico, andy, jiri, idosch, davem, bridge
In-Reply-To: <20171004081357.37bfd941@xeon-e3>
On 10/4/17 8:13 AM, Stephen Hemminger wrote:
> On Tue, 3 Oct 2017 21:58:53 -0700
> David Ahern <dsahern@gmail.com> wrote:
>
>> Pass extack arg to br_add_if. Add messages for a couple of failures
>> and pass arg to netdev_master_upper_dev_link.
>>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
>
> This looks good. You might want to pass the netlink_ext_ack down as
> an immutable pointer (const).
Can't mark extack as const. The NL_SET_ERR_MSG sets _msg to the passed
in string.
>
> Acked-by: Stephen Hemminger <stephen@networkplumber.org>
>
^ permalink raw reply
* Re: [PATCH 1/3] wireless: iwlwifi: use bool instead of int
From: Luciano Coelho @ 2017-10-04 16:15 UTC (permalink / raw)
To: Christoph Böhmwalder, johannes.berg, emmanuel.grumbach,
kvalo
Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20171004155700.18048-2-christoph@boehmwalder.at>
On Wed, 2017-10-04 at 17:56 +0200, Christoph Böhmwalder wrote:
> Change a usage of int in a boolean context to use the bool type
> instead, as it
> makes the intent of the function clearer and helps clarify its
> semantics.
>
> Also eliminate the if/else and just return the boolean result
> directly,
> making the code more readable.
>
> Signed-off-by: Christoph Böhmwalder <christoph@boehmwalder.at>
> ---
> drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 12 +++++-------
> 1 file changed, 5 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> index b7cd813ba70f..0eb815ae97e8 100644
> --- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> +++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> @@ -267,14 +267,12 @@ int iwl_phy_db_set_section(struct iwl_phy_db
> *phy_db,
> }
> IWL_EXPORT_SYMBOL(iwl_phy_db_set_section);
>
> -static int is_valid_channel(u16 ch_id)
> +static bool is_valid_channel(u16 ch_id)
> {
> - if (ch_id <= 14 ||
> - (36 <= ch_id && ch_id <= 64 && ch_id % 4 == 0) ||
> - (100 <= ch_id && ch_id <= 140 && ch_id % 4 == 0) ||
> - (145 <= ch_id && ch_id <= 165 && ch_id % 4 == 1))
> - return 1;
> - return 0;
> + return (ch_id <= 14 ||
> + (36 <= ch_id && ch_id <= 64 && ch_id % 4 == 0) ||
> + (100 <= ch_id && ch_id <= 140 && ch_id % 4 == 0) ||
> + (145 <= ch_id && ch_id <= 165 && ch_id % 4 == 1));
> }
>
> static u8 ch_id_to_ch_index(u16 ch_id)
This actually makes some sense, and I would probably apply it if it
were part of a patchset that actually does something useful.
--
Luca.
^ permalink raw reply
* Re: [PATCH 3/3] wireless: iwlwifi: wrap macro into braces
From: Joe Perches @ 2017-10-04 16:14 UTC (permalink / raw)
To: Christoph Böhmwalder, johannes.berg, emmanuel.grumbach,
luciano.coelho, kvalo
Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20171004155700.18048-4-christoph@boehmwalder.at>
On Wed, 2017-10-04 at 17:57 +0200, Christoph Böhmwalder wrote:
> Macros should always be wrapped in braces, so fix this instance.
>
> Signed-off-by: Christoph Böhmwalder <christoph@boehmwalder.at>
> ---
> drivers/net/wireless/intel/iwlwifi/iwl-io.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
> index efb1998dcabd..0211963b3e71 100644
> --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c
> +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
> @@ -252,7 +252,7 @@ IWL_EXPORT_SYMBOL(iwl_force_nmi);
>
> static const char *get_rfh_string(int cmd)
> {
> -#define IWL_CMD(x) case x: return #x
> +#define IWL_CMD(x) { case x: return #x; }
I think this unnecessary. Maybe:
http://lists.infradead.org/pipermail/ath10k/2017-February/009335.html
> #define IWL_CMD_MQ(arg, reg, q) { if (arg == reg(q)) return #reg; }
But this should use do { ... } while (0)
^ permalink raw reply
* Re: [PATCH 2/3] wireless: iwlwifi: function definition cosmetic fix
From: Luciano Coelho @ 2017-10-04 16:13 UTC (permalink / raw)
To: Christoph Böhmwalder, johannes.berg, emmanuel.grumbach,
kvalo
Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20171004155700.18048-3-christoph@boehmwalder.at>
On Wed, 2017-10-04 at 17:56 +0200, Christoph Böhmwalder wrote:
> Separate the function from the previous definition with a newline and
> put the `static` keyword on the same line, as it just looks nicer.
>
> Signed-off-by: Christoph Böhmwalder <christoph@boehmwalder.at>
> ---
> drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> index 0eb815ae97e8..249ee1c7b02f 100644
> --- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> +++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
> @@ -325,8 +325,8 @@ static u16 channel_id_to_txp(struct iwl_phy_db *phy_db, u16 ch_id)
> }
> return 0xff;
> }
> -static
> -int iwl_phy_db_get_section_data(struct iwl_phy_db *phy_db,
> +
> +static int iwl_phy_db_get_section_data(struct iwl_phy_db *phy_db,
> u32 type, u8 **data, u16 *size, u16 ch_id)
> {
> struct iwl_phy_db_entry *entry;
Sorry, but this now looks much uglier because the second line is not
even aligned to the parenthesis.
NACK.
--
Luca.
^ permalink raw reply
* Re: [PATCH 3/3] wireless: iwlwifi: wrap macro into braces
From: Johannes Berg @ 2017-10-04 16:08 UTC (permalink / raw)
To: Christoph Böhmwalder, johannes.berg, emmanuel.grumbach,
luciano.coelho, kvalo
Cc: linux-wireless, netdev, linux-kernel
In-Reply-To: <20171004155700.18048-4-christoph@boehmwalder.at>
Please don't send obviously broken patches.
johannes
^ permalink raw reply
* Re: [Bridge] [PATCH net-next v4 1/3] bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood
From: Roopa Prabhu @ 2017-10-04 16:06 UTC (permalink / raw)
To: Toshiaki Makita
Cc: davem@davemloft.net, Nikolay Aleksandrov, netdev@vger.kernel.org,
bridge
In-Reply-To: <c4a04560-07c8-7d9e-d64c-558f99cfc653@lab.ntt.co.jp>
On Wed, Oct 4, 2017 at 12:21 AM, Toshiaki Makita
<makita.toshiaki@lab.ntt.co.jp> wrote:
> On 2017/10/04 14:12, Roopa Prabhu wrote:
>> From: Roopa Prabhu <roopa@cumulusnetworks.com>
>>
>> This patch adds a new bridge port flag BR_NEIGH_SUPPRESS to
>> suppress arp and nd flood on bridge ports. It implements
>> rfc7432, section 10.
>> https://tools.ietf.org/html/rfc7432#section-10
>> for ethernet VPN deployments. It is similar to the existing
>> BR_ARP_PROXY flag but has a few semantic differences to conform
>> to EVPN standard. In case of EVPN, it is mainly used to
>> avoid flooding to tunnel ports like vxlan. Unlike the existing
>> flags it suppresses flood of all neigh discovery packets
>> (arp, nd) to tunnel ports.
>>
>> This patch adds netlink and sysfs support to set this bridge port
>> flag.
>>
>> Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
>> ---
> ...
>> diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
>> index dea88a2..d8c2706 100644
>> --- a/net/bridge/br_netlink.c
>> +++ b/net/bridge/br_netlink.c
>> @@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void)
>> + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */
>> + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */
>> + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */
>> + + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */
>> + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */
>> + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */
>> + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */
>> @@ -210,7 +211,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
>> nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) ||
>> nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags &
>> BR_VLAN_TUNNEL)) ||
>> - nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask))
>> + nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
>> + nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS, !!(p->flags &
>> + BR_NEIGH_SUPPRESS)))
>
> Wouldn't it be better to make the indentation like this?
>
> ... !!(p->flags &
> BR_NEIGH_SUPPRESS)))
not intentional. I think i will actually move the full condition on
the next line.
>
>> return -EMSGSIZE;
>>
>> timerval = br_timer_value(&p->message_age_timer);
>> @@ -692,6 +695,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
>> {
>> unsigned long old_flags = p->flags;
>> bool br_vlan_tunnel_old = false;
>> + int neigh_suppress_old = 0;
>> int err;
>>
>> err = br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE);
>> @@ -785,6 +789,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
>> p->group_fwd_mask = fwd_mask;
>> }
>>
>> + neigh_suppress_old = (p->flags & BR_NEIGH_SUPPRESS);
>> + br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS,
>> + BR_NEIGH_SUPPRESS);
>> + if (neigh_suppress_old != (p->flags & BR_NEIGH_SUPPRESS))
>> + br_recalculate_neigh_suppress_enabled(p->br);
>> +
>
> You are calling br_recalculate_neigh_suppress_enabled() from within
> br_port_flags_change() immediately after this.
> I think you can just call br_set_port_flag() here.
>
>> br_port_flags_change(p, old_flags ^ p->flags);
>> return 0;
>> }
you are right, i will remove the redundant call to recalc neigh_suppress
^ permalink raw reply
* [PATCH 3/3] wireless: iwlwifi: wrap macro into braces
From: Christoph Böhmwalder @ 2017-10-04 15:57 UTC (permalink / raw)
To: johannes.berg, emmanuel.grumbach, luciano.coelho, kvalo
Cc: linux-wireless, netdev, linux-kernel, Christoph Böhmwalder
In-Reply-To: <20171004155700.18048-1-christoph@boehmwalder.at>
Macros should always be wrapped in braces, so fix this instance.
Signed-off-by: Christoph Böhmwalder <christoph@boehmwalder.at>
---
drivers/net/wireless/intel/iwlwifi/iwl-io.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
index efb1998dcabd..0211963b3e71 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c
@@ -252,7 +252,7 @@ IWL_EXPORT_SYMBOL(iwl_force_nmi);
static const char *get_rfh_string(int cmd)
{
-#define IWL_CMD(x) case x: return #x
+#define IWL_CMD(x) { case x: return #x; }
#define IWL_CMD_MQ(arg, reg, q) { if (arg == reg(q)) return #reg; }
int i;
--
2.13.5
^ permalink raw reply related
* [PATCH 2/3] wireless: iwlwifi: function definition cosmetic fix
From: Christoph Böhmwalder @ 2017-10-04 15:56 UTC (permalink / raw)
To: johannes.berg, emmanuel.grumbach, luciano.coelho, kvalo
Cc: linux-wireless, netdev, linux-kernel, Christoph Böhmwalder
In-Reply-To: <20171004155700.18048-1-christoph@boehmwalder.at>
Separate the function from the previous definition with a newline and
put the `static` keyword on the same line, as it just looks nicer.
Signed-off-by: Christoph Böhmwalder <christoph@boehmwalder.at>
---
drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
index 0eb815ae97e8..249ee1c7b02f 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
@@ -325,8 +325,8 @@ static u16 channel_id_to_txp(struct iwl_phy_db *phy_db, u16 ch_id)
}
return 0xff;
}
-static
-int iwl_phy_db_get_section_data(struct iwl_phy_db *phy_db,
+
+static int iwl_phy_db_get_section_data(struct iwl_phy_db *phy_db,
u32 type, u8 **data, u16 *size, u16 ch_id)
{
struct iwl_phy_db_entry *entry;
--
2.13.5
^ permalink raw reply related
* [PATCH 1/3] wireless: iwlwifi: use bool instead of int
From: Christoph Böhmwalder @ 2017-10-04 15:56 UTC (permalink / raw)
To: johannes.berg, emmanuel.grumbach, luciano.coelho, kvalo
Cc: linux-wireless, netdev, linux-kernel, Christoph Böhmwalder
In-Reply-To: <20171004155700.18048-1-christoph@boehmwalder.at>
Change a usage of int in a boolean context to use the bool type instead, as it
makes the intent of the function clearer and helps clarify its semantics.
Also eliminate the if/else and just return the boolean result directly,
making the code more readable.
Signed-off-by: Christoph Böhmwalder <christoph@boehmwalder.at>
---
drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
index b7cd813ba70f..0eb815ae97e8 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c
@@ -267,14 +267,12 @@ int iwl_phy_db_set_section(struct iwl_phy_db *phy_db,
}
IWL_EXPORT_SYMBOL(iwl_phy_db_set_section);
-static int is_valid_channel(u16 ch_id)
+static bool is_valid_channel(u16 ch_id)
{
- if (ch_id <= 14 ||
- (36 <= ch_id && ch_id <= 64 && ch_id % 4 == 0) ||
- (100 <= ch_id && ch_id <= 140 && ch_id % 4 == 0) ||
- (145 <= ch_id && ch_id <= 165 && ch_id % 4 == 1))
- return 1;
- return 0;
+ return (ch_id <= 14 ||
+ (36 <= ch_id && ch_id <= 64 && ch_id % 4 == 0) ||
+ (100 <= ch_id && ch_id <= 140 && ch_id % 4 == 0) ||
+ (145 <= ch_id && ch_id <= 165 && ch_id % 4 == 1));
}
static u8 ch_id_to_ch_index(u16 ch_id)
--
2.13.5
^ permalink raw reply related
* [PATCH 0/3] iwlwifi: cosmetic fixes
From: Christoph Böhmwalder @ 2017-10-04 15:56 UTC (permalink / raw)
To: johannes.berg, emmanuel.grumbach, luciano.coelho, kvalo
Cc: linux-wireless, netdev, linux-kernel, Christoph Böhmwalder
Fix several code style issues, some of which were reported by checkpatch.pl.
The changes are:
* One instance of an `int` variable being used in a boolean context, chaned to
use the more appropriate `bool` type.
* One very minor fix, removing a newline between a function definition and its
associated `static` keyword
* One fix wrapping a macro in curly braces
Christoph Böhmwalder (3):
wireless: iwlwifi: use bool instead of int
wireless: iwlwifi: function definition cosmetic fix
wireless: iwlwifi: wrap macro into braces
drivers/net/wireless/intel/iwlwifi/iwl-io.c | 2 +-
drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c | 16 +++++++---------
2 files changed, 8 insertions(+), 10 deletions(-)
--
2.13.5
^ permalink raw reply
* [PATCH] PCI: Check/Set ARI capability before setting numVFs
From: Tony Nguyen @ 2017-10-04 15:52 UTC (permalink / raw)
To: linux-pci, intel-wired-lan, linux-kernel, netdev
Cc: bhelgaas, Tony Nguyen, Alexander Duyck, Emil Tantilov
This fixes a bug that can occur if an AER error is encountered while SRIOV
devices are present.
This issue was seen by doing the following. Inject an AER error to a device
that has SRIOV devices. After the device has recovered, remove the driver.
Reload the driver and enable SRIOV which causes the following crash to
occur:
kernel BUG at drivers/pci/iov.c:157!
invalid opcode: 0000 [#1] SMP
CPU: 36 PID: 2295 Comm: bash Not tainted 4.14.0-rc1+ #74
Hardware name: Supermicro X9DAi/X9DAi, BIOS 3.0a 04/29/2014
task: ffff9fa41cd45a00 task.stack: ffffb4b2036e8000
RIP: 0010:pci_iov_add_virtfn+0x2eb/0x350
RSP: 0018:ffffb4b2036ebcb8 EFLAGS: 00010286
RAX: 00000000fffffff0 RBX: ffff9fa42c1c8800 RCX: ffff9fa421ce2388
RDX: 00000000df900000 RSI: ffff9fa8214fb388 RDI: 00000000df903fff
RBP: ffffb4b2036ebd18 R08: ffff9fa421ce23b8 R09: ffffb4b2036ebc2c
R10: ffff9fa42c1a5548 R11: 000000000000058e R12: ffff9fa8214fb000
R13: ffff9fa42c1a5000 R14: ffff9fa8214fb388 R15: 0000000000000000
FS: 00007f60724b6700(0000) GS:ffff9fa82f300000(0000)
knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000559eca8b0f40 CR3: 0000000864146000 CR4: 00000000001606e0
Call Trace:
pci_enable_sriov+0x353/0x440
ixgbe_pci_sriov_configure+0xd5/0x1f0 [ixgbe]
sriov_numvfs_store+0xf7/0x170
dev_attr_store+0x18/0x30
sysfs_kf_write+0x37/0x40
kernfs_fop_write+0x120/0x1b0
__vfs_write+0x37/0x170
? __alloc_fd+0x3f/0x170
? set_close_on_exec+0x30/0x70
vfs_write+0xb5/0x1a0
SyS_write+0x55/0xc0
entry_SYSCALL_64_fastpath+0x1a/0xa5
RIP: 0033:0x7f6071bafc20
RSP: 002b:00007ffe7d42ba48 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000559eca8b0f30 RCX: 00007f6071bafc20
RDX: 0000000000000002 RSI: 0000559eca961f60 RDI: 0000000000000001
RBP: 00007f6071e78ae0 R08: 00007f6071e7a740 R09: 00007f60724b6700
R10: 0000000000000073 R11: 0000000000000246 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: 0000559eca892170
RIP: pci_iov_add_virtfn+0x2eb/0x350 RSP: ffffb4b2036ebcb8
The occurs since during AER recovery the ARI Capable Hierarchy bit,
which can affect the values for First VF Offset and VF Stride, is not set
until after pci_iov_set_numvfs() is called. This can cause the iov
structure to be populated with values that are incorrect if the bit is
later set. Check and set this bit, if needed, before calling
pci_iov_set_numvfs() so that the values being populated properly take
the ARI bit into account.
CC: Alexander Duyck <alexander.h.duyck@intel.com>
CC: Emil Tantilov <emil.s.tantilov@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
drivers/pci/iov.c | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index 7492a65..a8896c7 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -497,6 +497,10 @@ static void sriov_restore_state(struct pci_dev *dev)
if (ctrl & PCI_SRIOV_CTRL_VFE)
return;
+ if ((iov->ctrl & PCI_SRIOV_CTRL_ARI) && !(ctrl & PCI_SRIOV_CTRL_ARI))
+ pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL,
+ ctrl | PCI_SRIOV_CTRL_ARI);
+
for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++)
pci_update_resource(dev, i);
--
2.9.5
^ permalink raw reply related
* Re: [PATCH net-next 2/2] flow_dissector: dissect tunnel info
From: Tom Herbert @ 2017-10-04 15:52 UTC (permalink / raw)
To: Jiri Pirko
Cc: Simon Horman, David Miller, Jiri Pirko, Jamal Hadi Salim,
Cong Wang, Linux Kernel Network Developers, oss-drivers
In-Reply-To: <20171004081558.GE1895@nanopsycho>
On Wed, Oct 4, 2017 at 1:15 AM, Jiri Pirko <jiri@resnulli.us> wrote:
> Wed, Oct 04, 2017 at 10:08:57AM CEST, simon.horman@netronome.com wrote:
>>On Tue, Oct 03, 2017 at 11:17:46AM -0700, Tom Herbert wrote:
>>> On Tue, Oct 3, 2017 at 2:40 AM, Simon Horman <simon.horman@netronome.com> wrote:
>>> > On Mon, Oct 02, 2017 at 01:37:55PM -0700, Tom Herbert wrote:
>>> >> On Mon, Oct 2, 2017 at 1:41 AM, Simon Horman <simon.horman@netronome.com> wrote:
>>> >> > Move dissection of tunnel info from the flower classifier to the flow
>>> >> > dissector where all other dissection occurs. This should not have any
>>> >> > behavioural affect on other users of the flow dissector.
>>> >
>>> > ...
>>>
>>> > I feel that we are circling back the perennial issue of flower using the
>>> > flow dissector in a somewhat broader/different way than many/all other
>>> > users of the flow dissector.
>>> >
>>> Simon,
>>>
>>> It's more like __skb_flow_dissect is already an incredibly complex
>>> function and because of that it's difficult to maintain. We need to
>>> measure changes against that fact. For this patch, there is precisely
>>> one user (cls_flower.c) and it's not at all clear to me if there will
>>> be ever any more (e.g. for hashing we don't need tunnel info). IMO, it
>>> should be just as easy and less convolution for everyone to have
>>> flower call __skb_flow_dissect_tunnel_info directly and not call if
>>> from __skb_flow_dissect.
>>
>>Hi Tom,
>>
>>my original suggestion was just that, but Jiri indicated a strong preference
>>for the approach taken by this patch. I think we need to widen the
>>participants in this discussion.
>
> I like the __skb_flow_dissect to be the function to call and it will do
> the job according to the configuration. I don't like to split in
> multiple calls.
Those are not technical arguments. As I already mentioned, I don't
like it when we add stuff for the benefit of a 1% use case that
negatively impacts the rest of the 99% cases which is what I believe
is happening here.
> Does not make sense in the most of the cases as the
> dissection state would have to be carried in between calls.
Please elaborate. This code is being moved into __skb_flow_dissect, so
the functionality was already there. I don't see any description in
this discussion that things were broken and that this patch is a
necessary fix.
Thanks,
Tom
^ permalink raw reply
* [jkirsher/net-queue PATCH] i40e: Fix memory leak related filter programming status
From: Alexander Duyck @ 2017-10-04 15:44 UTC (permalink / raw)
To: netdev, intel-wired-lan; +Cc: akp
From: Alexander Duyck <alexander.h.duyck@intel.com>
It looks like we weren't correctly placing the pages from buffers that had
been used to return a filter programming status back on the ring. As a
result they were being overwritten and tracking of the pages was lost.
This change works to correct that by incorporating part of
i40e_put_rx_buffer into the programming status handler code. As a result we
should now be correctly placing the pages for those buffers on the
re-allocation list instead of letting them stay in place.
Fixes: 0e626ff7ccbf ("i40e: Fix support for flow director programming status")
Reported-by: Anders K. Pedersen <akp@cohaesio.com>
Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
I'm submitting this for Jeff's net queue to undergo some additional testing
before being submitted for net or stable to address the memory leak isue. The
testing for this should be pretty straight forward since ATR filters seem
to cause the issue it should be possible to trigger a pretty signficant
amount of memory loss running something like a netperf TCP_CRR test for an
extended period of time which will trigger multiple socket
creation/destruction.
Anders, feel free to test this patch. If you need to grab a copy of the
diff instead of trying to work with applying the patch through email you
should be able to find a copy at the following URL shortly after I submit
this to intel-wired-lan:
http://patchwork.ozlabs.org/project/intel-wired-lan/list/
drivers/net/ethernet/intel/i40e/i40e_txrx.c | 63 +++++++++++++++------------
1 file changed, 36 insertions(+), 27 deletions(-)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 94311e3e4f43..d4ae24674a70 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1038,6 +1038,32 @@ static bool i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
}
/**
+ * i40e_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
+ struct i40e_rx_buffer *old_buff)
+{
+ struct i40e_rx_buffer *new_buff;
+ u16 nta = rx_ring->next_to_alloc;
+
+ new_buff = &rx_ring->rx_bi[nta];
+
+ /* update, and store next to alloc */
+ nta++;
+ rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+ /* transfer page from old buffer to new buffer */
+ new_buff->dma = old_buff->dma;
+ new_buff->page = old_buff->page;
+ new_buff->page_offset = old_buff->page_offset;
+ new_buff->pagecnt_bias = old_buff->pagecnt_bias;
+}
+
+/**
* i40e_rx_is_programming_status - check for programming status descriptor
* @qw: qword representing status_error_len in CPU ordering
*
@@ -1071,15 +1097,24 @@ static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
union i40e_rx_desc *rx_desc,
u64 qw)
{
- u32 ntc = rx_ring->next_to_clean + 1;
+ struct i40e_rx_buffer *rx_buffer;
+ u32 ntc = rx_ring->next_to_clean;
u8 id;
/* fetch, update, and store next to clean */
+ rx_buffer = &rx_ring->rx_bi[ntc++];
ntc = (ntc < rx_ring->count) ? ntc : 0;
rx_ring->next_to_clean = ntc;
prefetch(I40E_RX_DESC(rx_ring, ntc));
+ /* place unused page back on the ring */
+ i40e_reuse_rx_page(rx_ring, rx_buffer);
+ rx_ring->rx_stats.page_reuse_count++;
+
+ /* clear contents of buffer_info */
+ rx_buffer->page = NULL;
+
id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
@@ -1648,32 +1683,6 @@ static bool i40e_cleanup_headers(struct i40e_ring *rx_ring, struct sk_buff *skb,
}
/**
- * i40e_reuse_rx_page - page flip buffer and store it back on the ring
- * @rx_ring: rx descriptor ring to store buffers on
- * @old_buff: donor buffer to have page reused
- *
- * Synchronizes page for reuse by the adapter
- **/
-static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
- struct i40e_rx_buffer *old_buff)
-{
- struct i40e_rx_buffer *new_buff;
- u16 nta = rx_ring->next_to_alloc;
-
- new_buff = &rx_ring->rx_bi[nta];
-
- /* update, and store next to alloc */
- nta++;
- rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
- /* transfer page from old buffer to new buffer */
- new_buff->dma = old_buff->dma;
- new_buff->page = old_buff->page;
- new_buff->page_offset = old_buff->page_offset;
- new_buff->pagecnt_bias = old_buff->pagecnt_bias;
-}
-
-/**
* i40e_page_is_reusable - check if any reuse is possible
* @page: page struct to check
*
^ permalink raw reply related
* Re: [PATCH net-next 7/7] mlxsw: spectrum: Add extack messages for enslave failures
From: David Ahern @ 2017-10-04 15:44 UTC (permalink / raw)
To: Ido Schimmel
Cc: netdev, j.vosburgh, vfalico, andy, jiri, idosch, davem, bridge
In-Reply-To: <20171004132403.GA14864@splinter>
On 10/4/17 6:24 AM, Ido Schimmel wrote:
> On Tue, Oct 03, 2017 at 09:58:54PM -0700, David Ahern wrote:
>> mlxsw fails device enslavement for a number of reasons. Use the extack
>> facility to return an error message to the user stating why the enslave
>> is failing.
>>
>> Messages are prefixed with "spectrum" so users know it is a constraint
>> imposed by the hardware driver. For example:
>> $ ip li add br0.11 link br0 type vlan id 11
>> $ ip li set swp11 master br0
>> Error: spectrum: Enslaving a port to a device that already has an upper device is not supported.
>>
>> Signed-off-by: David Ahern <dsahern@gmail.com>
>
> Great stuff, thanks David!
>
> Reviewed-by: Ido Schimmel <idosch@mellanox.com>
> Tested-by: Ido Schimmel <idosch@mellanox.com>
>
> See one small nit below, but I would like to patch
> mlxsw_sp_netdevice_port_vlan_event() as well, so I can take care of it
> then.
ok. Works for me. I was planning to add more error messages to mlxsw;
happy to concede that to you or someone else. For example a bridge
without vlan filtering:
$ ip li add br1 type bridge
$ ip li set swp20 master br1
RTNETLINK answers: Invalid argument
>
>> ---
>> drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 46 ++++++++++++++++++++------
>> 1 file changed, 36 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
>> index 3adf237c951a..a6cc00e4e543 100644
>> --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
>> +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
>> @@ -4019,14 +4019,20 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
>> static bool
>> mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
>> struct net_device *lag_dev,
>> - struct netdev_lag_upper_info *lag_upper_info)
>> + struct netdev_lag_upper_info *lag_upper_info,
>> + struct netlink_ext_ack *extack)
>> {
>> u16 lag_id;
>>
>> - if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0)
>> + if (mlxsw_sp_lag_index_get(mlxsw_sp, lag_dev, &lag_id) != 0) {
>> + NL_SET_ERR_MSG(extack, "spectrum: Unknown LAG device");
>
> A more accurate message would be:
> "spectrum: Exceeded number of supported LAG devices"
Right. will update.
>
>> return false;
>> - if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH)
>> + }
>> + if (lag_upper_info->tx_type != NETDEV_LAG_TX_TYPE_HASH) {
>> + NL_SET_ERR_MSG(extack,
>> + "spectrum: LAG device using unsupported Tx type");
>> return false;
>> + }
>> return true;
>> }
^ permalink raw reply
* Re: [PATCH] net: phy: DP83822 initial driver submission
From: Dan Murphy @ 2017-10-04 15:41 UTC (permalink / raw)
To: Florian Fainelli, andrew; +Cc: netdev
In-Reply-To: <fd7379ac-d617-1fed-45a2-6698256cf160@gmail.com>
Florian
On 10/03/2017 01:31 PM, Florian Fainelli wrote:
> On 10/03/2017 11:03 AM, Dan Murphy wrote:
>> Florian
>>
>> Thanks for the review
>>
>> On 10/03/2017 12:15 PM, Florian Fainelli wrote:
>>>> + } else {
>>>> + value &= ~DP83822_WOL_SECURE_ON;
>>>> + }
>>>> +
>>>> + value |= (DP83822_WOL_EN | DP83822_WOL_CLR_INDICATION |
>>>> + DP83822_WOL_CLR_INDICATION);
>>>
>>> The extra parenthesis should not be required here.
>>
>> I did not code that in. I had to add it after Checkpatch cribbed about it.
>> Let me know if you want me to remove it.
>
> Let's keep those, that does not change much.
>
>>
>>>
>>>> + phy_write_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG,
>>>> + value);
>>>> + } else {
>>>> + value =
>>>> + phy_read_mmd(phydev, DP83822_DEVADDR, MII_DP83822_WOL_CFG);
>>>> + value &= (~DP83822_WOL_EN);
>>>
>>> Same here, parenthesis should not be needed.
>>
>> There are three lines of code in the else. This code all needs to be excuted in the else case.
>> I might reformat it to read better. Lindent messed that one up.
>
> sorry, I meant to write that you don't need the parenthesis around
> DP83822_WOL_EN since that is just a single bit here.
>
> [snip]
>
>>>> +
>>>> + mutex_unlock(&phydev->lock);
>>>> +
>>>> + return 0;
>>>> +}
>>>> +
>>>> +static int dp83822_resume(struct phy_device *phydev)
>>>> +{
>>>> + int value;
>>>> +
>>>> + mutex_lock(&phydev->lock);
>>>> +
>>>> + value = phy_read(phydev, MII_BMCR);
>>>> + phy_write(phydev, MII_BMCR, value & ~BMCR_PDOWN);
>>>
>>> And genphy_resume() here as well?
>>
>> genphy_resume does not have WoL.
>
> I should have been cleared, I meant using genphy_{suspend,resume} to
> avoid open coding the setting of the BMCR_PDOWN bit, conversely clearing
> of that bit. Because of the locking, maybe you could introduce unlocked
> versions of these two routines, or you acquire and release the lock
> outside of genphy_{suspend,resume}?
OK I have addressed all of the open comments and will be posting v2 shortly.
I do have a question on this request.
Are you indicating to exclusively call genphy_(suspend/resume) from within the over ridden
routine and then take care of the WoL bits in the phy specific code?
Since the genphy code is duplicated here for the BMCR value that would make the most sense
to me. The genphy code is exported so I can call it explicitly then do the WoL
Dan
[snip]--
------------------
Dan Murphy
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox