Netdev List
 help / color / mirror / Atom feed
* [PATCH net-next v7 5/6] net: dccp: Add DCCP sendmsg trace event
From: Masami Hiramatsu @ 2017-12-29  2:47 UTC (permalink / raw)
  To: Ingo Molnar, David S . Miller, Ian McDonald, Vlad Yasevich,
	Stephen Hemminger, Steven Rostedt
  Cc: Peter Zijlstra, Thomas Gleixner, LKML, H . Peter Anvin,
	Gerrit Renker, Neil Horman, dccp, netdev, linux-sctp,
	Stephen Rothwell, mhiramat
In-Reply-To: <151451552014.17912.11834170408829155608.stgit@devbox>

Add DCCP sendmsg trace event (dccp/dccp_probe) for
replacing dccpprobe. User can trace this event via
ftrace or perftools.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
  Changes in v5:
   - Fix to add local directory to include for trace.h.
     Thanks Steven!
  Changes in v7:
   - Avoid preprocessor directives in tracepoint macro args
     by sharing TP_STORE_ADDR_PORTS() macro with tcp.h.
---
 include/trace/events/net_probe_common.h |   44 ++++++++++++++++
 include/trace/events/tcp.h              |   39 --------------
 net/dccp/Makefile                       |    3 +
 net/dccp/proto.c                        |    5 ++
 net/dccp/trace.h                        |   84 +++++++++++++++++++++++++++++++
 5 files changed, 137 insertions(+), 38 deletions(-)
 create mode 100644 include/trace/events/net_probe_common.h
 create mode 100644 net/dccp/trace.h

diff --git a/include/trace/events/net_probe_common.h b/include/trace/events/net_probe_common.h
new file mode 100644
index 000000000000..3930119cab08
--- /dev/null
+++ b/include/trace/events/net_probe_common.h
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if !defined(_TRACE_NET_PROBE_COMMON_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_NET_PROBE_COMMON_H
+
+#define TP_STORE_ADDR_PORTS_V4(__entry, inet, sk)			\
+	do {								\
+		struct sockaddr_in *v4 = (void *)__entry->saddr;	\
+									\
+		v4->sin_family = AF_INET;				\
+		v4->sin_port = inet->inet_sport;			\
+		v4->sin_addr.s_addr = inet->inet_saddr;			\
+		v4 = (void *)__entry->daddr;				\
+		v4->sin_family = AF_INET;				\
+		v4->sin_port = inet->inet_dport;			\
+		v4->sin_addr.s_addr = inet->inet_daddr;			\
+	} while (0)
+
+#if IS_ENABLED(CONFIG_IPV6)
+
+#define TP_STORE_ADDR_PORTS(__entry, inet, sk)				\
+	do {								\
+		if (sk->sk_family == AF_INET6) {			\
+			struct sockaddr_in6 *v6 = (void *)__entry->saddr; \
+									\
+			v6->sin6_family = AF_INET6;			\
+			v6->sin6_port = inet->inet_sport;		\
+			v6->sin6_addr = inet6_sk(sk)->saddr;		\
+			v6 = (void *)__entry->daddr;			\
+			v6->sin6_family = AF_INET6;			\
+			v6->sin6_port = inet->inet_dport;		\
+			v6->sin6_addr = sk->sk_v6_daddr;		\
+		} else							\
+			TP_STORE_ADDR_PORTS_V4(__entry, inet, sk);	\
+	} while (0)
+
+#else
+
+#define TP_STORE_ADDR_PORTS(__entry, inet, sk)		\
+	TP_STORE_ADDR_PORTS_V4(__entry, inet, sk);
+
+#endif
+
+#endif
diff --git a/include/trace/events/tcp.h b/include/trace/events/tcp.h
index 4dea6342f7d4..1501ca91814f 100644
--- a/include/trace/events/tcp.h
+++ b/include/trace/events/tcp.h
@@ -279,44 +279,7 @@ TRACE_EVENT(tcp_retransmit_synack,
 		  __entry->saddr_v6, __entry->daddr_v6)
 );
 
-
-#define TP_STORE_ADDR_PORTS_V4(__entry, inet, sk)			\
-	do {								\
-		struct sockaddr_in *v4 = (void *)__entry->saddr;	\
-									\
-		v4->sin_family = AF_INET;				\
-		v4->sin_port = inet->inet_sport;			\
-		v4->sin_addr.s_addr = inet->inet_saddr;			\
-		v4 = (void *)__entry->daddr;				\
-		v4->sin_family = AF_INET;				\
-		v4->sin_port = inet->inet_dport;			\
-		v4->sin_addr.s_addr = inet->inet_daddr;			\
-	} while (0)
-
-#if IS_ENABLED(CONFIG_IPV6)
-
-#define TP_STORE_ADDR_PORTS(__entry, inet, sk)				\
-	do {								\
-		if (sk->sk_family == AF_INET6) {			\
-			struct sockaddr_in6 *v6 = (void *)__entry->saddr; \
-									\
-			v6->sin6_family = AF_INET6;			\
-			v6->sin6_port = inet->inet_sport;		\
-			v6->sin6_addr = inet6_sk(sk)->saddr;		\
-			v6 = (void *)__entry->daddr;			\
-			v6->sin6_family = AF_INET6;			\
-			v6->sin6_port = inet->inet_dport;		\
-			v6->sin6_addr = sk->sk_v6_daddr;		\
-		} else							\
-			TP_STORE_ADDR_PORTS_V4(__entry, inet, sk);	\
-	} while (0)
-
-#else
-
-#define TP_STORE_ADDR_PORTS(__entry, inet, sk)		\
-	TP_STORE_ADDR_PORTS_V4(__entry, inet, sk);
-
-#endif
+#include <trace/events/net_probe_common.h>
 
 TRACE_EVENT(tcp_probe,
 
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 2e7b56097bc4..4215f13a63af 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -27,3 +27,6 @@ dccp-$(CONFIG_SYSCTL) += sysctl.o
 
 dccp_diag-y := diag.o
 dccp_probe-y := probe.o
+
+# build with local directory for trace.h
+CFLAGS_proto.o := -I$(src)
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 7a75a1d3568b..fa7e92e08920 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -38,6 +38,9 @@
 #include "dccp.h"
 #include "feat.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace.h"
+
 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
 
 EXPORT_SYMBOL_GPL(dccp_statistics);
@@ -761,6 +764,8 @@ int dccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	int rc, size;
 	long timeo;
 
+	trace_dccp_probe(sk, len);
+
 	if (len > dp->dccps_mss_cache)
 		return -EMSGSIZE;
 
diff --git a/net/dccp/trace.h b/net/dccp/trace.h
new file mode 100644
index 000000000000..5062421beee9
--- /dev/null
+++ b/net/dccp/trace.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM dccp
+
+#if !defined(_TRACE_DCCP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_DCCP_H
+
+#include <net/sock.h>
+#include "dccp.h"
+#include "ccids/ccid3.h"
+#include <linux/tracepoint.h>
+#include <trace/events/net_probe_common.h>
+
+TRACE_EVENT(dccp_probe,
+
+	TP_PROTO(struct sock *sk, size_t size),
+
+	TP_ARGS(sk, size),
+
+	TP_STRUCT__entry(
+		/* sockaddr_in6 is always bigger than sockaddr_in */
+		__array(__u8, saddr, sizeof(struct sockaddr_in6))
+		__array(__u8, daddr, sizeof(struct sockaddr_in6))
+		__field(__u16, sport)
+		__field(__u16, dport)
+		__field(__u16, size)
+		__field(__u16, tx_s)
+		__field(__u32, tx_rtt)
+		__field(__u32, tx_p)
+		__field(__u32, tx_x_calc)
+		__field(__u64, tx_x_recv)
+		__field(__u64, tx_x)
+		__field(__u32, tx_t_ipi)
+	),
+
+	TP_fast_assign(
+		const struct inet_sock *inet = inet_sk(sk);
+		struct ccid3_hc_tx_sock *hc = NULL;
+
+		if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
+			hc = ccid3_hc_tx_sk(sk);
+
+		memset(__entry->saddr, 0, sizeof(struct sockaddr_in6));
+		memset(__entry->daddr, 0, sizeof(struct sockaddr_in6));
+
+		TP_STORE_ADDR_PORTS(__entry, inet, sk);
+
+		/* For filtering use */
+		__entry->sport = ntohs(inet->inet_sport);
+		__entry->dport = ntohs(inet->inet_dport);
+
+		__entry->size = size;
+		if (hc) {
+			__entry->tx_s = hc->tx_s;
+			__entry->tx_rtt = hc->tx_rtt;
+			__entry->tx_p = hc->tx_p;
+			__entry->tx_x_calc = hc->tx_x_calc;
+			__entry->tx_x_recv = hc->tx_x_recv >> 6;
+			__entry->tx_x = hc->tx_x >> 6;
+			__entry->tx_t_ipi = hc->tx_t_ipi;
+		} else {
+			__entry->tx_s = 0;
+			memset(&__entry->tx_rtt, 0, (void *)&__entry->tx_t_ipi -
+			       (void *)&__entry->tx_rtt +
+			       sizeof(__entry->tx_t_ipi));
+		}
+	),
+
+	TP_printk("src=%pISpc dest=%pISpc size=%d tx_s=%d tx_rtt=%d "
+		  "tx_p=%d tx_x_calc=%u tx_x_recv=%llu tx_x=%llu tx_t_ipi=%d",
+		  __entry->saddr, __entry->daddr, __entry->size,
+		  __entry->tx_s, __entry->tx_rtt, __entry->tx_p,
+		  __entry->tx_x_calc, __entry->tx_x_recv, __entry->tx_x,
+		  __entry->tx_t_ipi)
+);
+
+#endif /* _TRACE_TCP_H */
+
+/* This part must be outside protection */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>

^ permalink raw reply related

* [PATCH net-next v7 6/6] net: dccp: Remove dccpprobe module
From: Masami Hiramatsu @ 2017-12-29  2:48 UTC (permalink / raw)
  To: Ingo Molnar, David S . Miller, Ian McDonald, Vlad Yasevich,
	Stephen Hemminger, Steven Rostedt
  Cc: Peter Zijlstra, Thomas Gleixner, LKML, H . Peter Anvin,
	Gerrit Renker, Neil Horman, dccp, netdev, linux-sctp,
	Stephen Rothwell, mhiramat
In-Reply-To: <151451552014.17912.11834170408829155608.stgit@devbox>

Remove DCCP probe module since jprobe has been deprecated.
That function is now replaced by dccp/dccp_probe trace-event.
You can use it via ftrace or perftools.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
---
 Changes in v5:
  - Fix a conflict with previous change in Makefile.
---
 net/dccp/Kconfig  |   17 ----
 net/dccp/Makefile |    2 -
 net/dccp/probe.c  |  203 -----------------------------------------------------
 3 files changed, 222 deletions(-)
 delete mode 100644 net/dccp/probe.c

diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
index 8c0ef71bed2f..b270e84d9c13 100644
--- a/net/dccp/Kconfig
+++ b/net/dccp/Kconfig
@@ -39,23 +39,6 @@ config IP_DCCP_DEBUG
 
 	  Just say N.
 
-config NET_DCCPPROBE
-	tristate "DCCP connection probing"
-	depends on PROC_FS && KPROBES
-	---help---
-	This module allows for capturing the changes to DCCP connection
-	state in response to incoming packets. It is used for debugging
-	DCCP congestion avoidance modules. If you don't understand
-	what was just said, you don't need it: say N.
-
-	Documentation on how to use DCCP connection probing can be found
-	at:
-	
-	  http://www.linuxfoundation.org/collaborate/workgroups/networking/dccpprobe
-
-	To compile this code as a module, choose M here: the
-	module will be called dccp_probe.
-
 
 endmenu
 
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
index 4215f13a63af..5b4ff37bc806 100644
--- a/net/dccp/Makefile
+++ b/net/dccp/Makefile
@@ -21,12 +21,10 @@ obj-$(subst y,$(CONFIG_IP_DCCP),$(CONFIG_IPV6)) += dccp_ipv6.o
 dccp_ipv6-y := ipv6.o
 
 obj-$(CONFIG_INET_DCCP_DIAG) += dccp_diag.o
-obj-$(CONFIG_NET_DCCPPROBE) += dccp_probe.o
 
 dccp-$(CONFIG_SYSCTL) += sysctl.o
 
 dccp_diag-y := diag.o
-dccp_probe-y := probe.o
 
 # build with local directory for trace.h
 CFLAGS_proto.o := -I$(src)
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
deleted file mode 100644
index 3d3fda05b32d..000000000000
--- a/net/dccp/probe.c
+++ /dev/null
@@ -1,203 +0,0 @@
-/*
- * dccp_probe - Observe the DCCP flow with kprobes.
- *
- * The idea for this came from Werner Almesberger's umlsim
- * Copyright (C) 2004, Stephen Hemminger <shemminger@osdl.org>
- *
- * Modified for DCCP from Stephen Hemminger's code
- * Copyright (C) 2006, Ian McDonald <ian.mcdonald@jandi.co.nz>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#include <linux/kernel.h>
-#include <linux/kprobes.h>
-#include <linux/socket.h>
-#include <linux/dccp.h>
-#include <linux/proc_fs.h>
-#include <linux/module.h>
-#include <linux/kfifo.h>
-#include <linux/vmalloc.h>
-#include <linux/time64.h>
-#include <linux/gfp.h>
-#include <net/net_namespace.h>
-
-#include "dccp.h"
-#include "ccid.h"
-#include "ccids/ccid3.h"
-
-static int port;
-
-static int bufsize = 64 * 1024;
-
-static const char procname[] = "dccpprobe";
-
-static struct {
-	struct kfifo	  fifo;
-	spinlock_t	  lock;
-	wait_queue_head_t wait;
-	struct timespec64 tstart;
-} dccpw;
-
-static void printl(const char *fmt, ...)
-{
-	va_list args;
-	int len;
-	struct timespec64 now;
-	char tbuf[256];
-
-	va_start(args, fmt);
-	getnstimeofday64(&now);
-
-	now = timespec64_sub(now, dccpw.tstart);
-
-	len = sprintf(tbuf, "%lu.%06lu ",
-		      (unsigned long) now.tv_sec,
-		      (unsigned long) now.tv_nsec / NSEC_PER_USEC);
-	len += vscnprintf(tbuf+len, sizeof(tbuf)-len, fmt, args);
-	va_end(args);
-
-	kfifo_in_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
-	wake_up(&dccpw.wait);
-}
-
-static int jdccp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	struct ccid3_hc_tx_sock *hc = NULL;
-
-	if (ccid_get_current_tx_ccid(dccp_sk(sk)) == DCCPC_CCID3)
-		hc = ccid3_hc_tx_sk(sk);
-
-	if (port == 0 || ntohs(inet->inet_dport) == port ||
-	    ntohs(inet->inet_sport) == port) {
-		if (hc)
-			printl("%pI4:%u %pI4:%u %d %d %d %d %u %llu %llu %d\n",
-			       &inet->inet_saddr, ntohs(inet->inet_sport),
-			       &inet->inet_daddr, ntohs(inet->inet_dport), size,
-			       hc->tx_s, hc->tx_rtt, hc->tx_p,
-			       hc->tx_x_calc, hc->tx_x_recv >> 6,
-			       hc->tx_x >> 6, hc->tx_t_ipi);
-		else
-			printl("%pI4:%u %pI4:%u %d\n",
-			       &inet->inet_saddr, ntohs(inet->inet_sport),
-			       &inet->inet_daddr, ntohs(inet->inet_dport),
-			       size);
-	}
-
-	jprobe_return();
-	return 0;
-}
-
-static struct jprobe dccp_send_probe = {
-	.kp	= {
-		.symbol_name = "dccp_sendmsg",
-	},
-	.entry	= jdccp_sendmsg,
-};
-
-static int dccpprobe_open(struct inode *inode, struct file *file)
-{
-	kfifo_reset(&dccpw.fifo);
-	getnstimeofday64(&dccpw.tstart);
-	return 0;
-}
-
-static ssize_t dccpprobe_read(struct file *file, char __user *buf,
-			      size_t len, loff_t *ppos)
-{
-	int error = 0, cnt = 0;
-	unsigned char *tbuf;
-
-	if (!buf)
-		return -EINVAL;
-
-	if (len == 0)
-		return 0;
-
-	tbuf = vmalloc(len);
-	if (!tbuf)
-		return -ENOMEM;
-
-	error = wait_event_interruptible(dccpw.wait,
-					 kfifo_len(&dccpw.fifo) != 0);
-	if (error)
-		goto out_free;
-
-	cnt = kfifo_out_locked(&dccpw.fifo, tbuf, len, &dccpw.lock);
-	error = copy_to_user(buf, tbuf, cnt) ? -EFAULT : 0;
-
-out_free:
-	vfree(tbuf);
-
-	return error ? error : cnt;
-}
-
-static const struct file_operations dccpprobe_fops = {
-	.owner	 = THIS_MODULE,
-	.open	 = dccpprobe_open,
-	.read    = dccpprobe_read,
-	.llseek  = noop_llseek,
-};
-
-static __init int dccpprobe_init(void)
-{
-	int ret = -ENOMEM;
-
-	init_waitqueue_head(&dccpw.wait);
-	spin_lock_init(&dccpw.lock);
-	if (kfifo_alloc(&dccpw.fifo, bufsize, GFP_KERNEL))
-		return ret;
-	if (!proc_create(procname, S_IRUSR, init_net.proc_net, &dccpprobe_fops))
-		goto err0;
-
-	ret = register_jprobe(&dccp_send_probe);
-	if (ret) {
-		ret = request_module("dccp");
-		if (!ret)
-			ret = register_jprobe(&dccp_send_probe);
-	}
-
-	if (ret)
-		goto err1;
-
-	pr_info("DCCP watch registered (port=%d)\n", port);
-	return 0;
-err1:
-	remove_proc_entry(procname, init_net.proc_net);
-err0:
-	kfifo_free(&dccpw.fifo);
-	return ret;
-}
-module_init(dccpprobe_init);
-
-static __exit void dccpprobe_exit(void)
-{
-	kfifo_free(&dccpw.fifo);
-	remove_proc_entry(procname, init_net.proc_net);
-	unregister_jprobe(&dccp_send_probe);
-
-}
-module_exit(dccpprobe_exit);
-
-MODULE_PARM_DESC(port, "Port to match (0=all)");
-module_param(port, int, 0);
-
-MODULE_PARM_DESC(bufsize, "Log buffer size (default 64k)");
-module_param(bufsize, int, 0);
-
-MODULE_AUTHOR("Ian McDonald <ian.mcdonald@jandi.co.nz>");
-MODULE_DESCRIPTION("DCCP snooper");
-MODULE_LICENSE("GPL");

^ permalink raw reply related

* Re: [PATCH net-next] virtio_net: implement VIRTIO_CONFIG_S_NEEDS_RESET
From: Jason Wang @ 2017-12-29  3:10 UTC (permalink / raw)
  To: Willem de Bruijn, Michael S. Tsirkin
  Cc: Network Development, David Miller, virtualization,
	Willem de Bruijn
In-Reply-To: <CAF=yD-+z_3Pmsa=6_j4Yzt1QxfgbVwNsDoKLc-2rb7rH63p1WQ@mail.gmail.com>



On 2017年12月29日 03:11, Willem de Bruijn wrote:
> On Mon, Oct 16, 2017 at 11:44 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>> On Tue, Oct 17, 2017 at 11:05:07AM +0800, Jason Wang wrote:
>>>
>>> On 2017年10月17日 06:34, Willem de Bruijn wrote:
>>>> On Mon, Oct 16, 2017 at 12:38 PM, Michael S. Tsirkin <mst@redhat.com> wrote:
>>>>> On Mon, Oct 16, 2017 at 12:04:57PM -0400, Willem de Bruijn wrote:
>>>>>> On Mon, Oct 16, 2017 at 11:31 AM, Michael S. Tsirkin <mst@redhat.com> wrote:
>>>>>>> On Mon, Oct 16, 2017 at 11:03:18AM -0400, Willem de Bruijn wrote:
>>>>>>>>>> +static int virtnet_reset(struct virtnet_info *vi)
>>>>>>>>>> +{
>>>>>>>>>> +     struct virtio_device *dev = vi->vdev;
>>>>>>>>>> +     int ret;
>>>>>>>>>> +
>>>>>>>>>> +     virtio_config_disable(dev);
>>>>>>>>>> +     dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
>>>>>>>>>> +     virtnet_freeze_down(dev, true);
>>>>>>>>>> +     remove_vq_common(vi);
>>>>>>>>>> +
>>>>>>>>>> +     virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
>>>>>>>>>> +     virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
>>>>>>>>>> +
>>>>>>>>>> +     ret = virtio_finalize_features(dev);
>>>>>>>>>> +     if (ret)
>>>>>>>>>> +             goto err;
>>>>>>>>>> +
>>>>>>>>>> +     ret = virtnet_restore_up(dev);
>>>>>>>>>> +     if (ret)
>>>>>>>>>> +             goto err;
>>>>>>>>>> +
>>>>>>>>>> +     ret = virtnet_set_queues(vi, vi->curr_queue_pairs);
>>>>>>>>>> +     if (ret)
>>>>>>>>>> +             goto err;
>>>>>>>>>> +
>>>>>>>>>> +     virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
>>>>>>>>>> +     virtio_config_enable(dev);
>>>>>>>>>> +     return 0;
>>>>>>>>>> +
>>>>>>>>>> +err:
>>>>>>>>>> +     virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
>>>>>>>>>> +     return ret;
>>>>>>>>>> +}
>>>>>>>>>> +
>>>>>>>>>>    static int virtnet_set_guest_offloads(struct virtnet_info *vi, u64 offloads)
>>>>>>>>>>    {
>>>>>>>>>>         struct scatterlist sg;
>>>>>>>>> I have a question here though. How do things like MAC address
>>>>>>>>> get restored?
>>>>>>>>>
>>>>>>>>> What about the rx mode?
>>>>>>>>>
>>>>>>>>> vlans?
>>>>>>>> The function as is releases and reinitializes only ring state.
>>>>>>>> Device configuration such as mac and vlan persist across
>>>>>>>> the reset.
>>>>>>> What gave you this impression? Take a look at e.g. this
>>>>>>> code in qemu:
>>>>>>>
>>>>>>> static void virtio_net_reset(VirtIODevice *vdev)
>>>>>>> {
>>>>>>>       VirtIONet *n = VIRTIO_NET(vdev);
>>>>>>>
>>>>>>>       /* Reset back to compatibility mode */
>>>>>>>       n->promisc = 1;
>>>>>>>       n->allmulti = 0;
>>>>>>>       n->alluni = 0;
>>>>>>>       n->nomulti = 0;
>>>>>>>       n->nouni = 0;
>>>>>>>       n->nobcast = 0;
>>>>>>>       /* multiqueue is disabled by default */
>>>>>>>       n->curr_queues = 1;
>>>>>>>       timer_del(n->announce_timer);
>>>>>>>       n->announce_counter = 0;
>>>>>>>       n->status &= ~VIRTIO_NET_S_ANNOUNCE;
>>>>>>>
>>>>>>>       /* Flush any MAC and VLAN filter table state */
>>>>>>>       n->mac_table.in_use = 0;
>>>>>>>       n->mac_table.first_multi = 0;
>>>>>>>       n->mac_table.multi_overflow = 0;
>>>>>>>       n->mac_table.uni_overflow = 0;
>>>>>>>       memset(n->mac_table.macs, 0, MAC_TABLE_ENTRIES * ETH_ALEN);
>>>>>>>       memcpy(&n->mac[0], &n->nic->conf->macaddr, sizeof(n->mac));
>>>>>>>       qemu_format_nic_info_str(qemu_get_queue(n->nic), n->mac);
>>>>>>>       memset(n->vlans, 0, MAX_VLAN >> 3);
>>>>>>> }
>>>>>>>
>>>>>>> So device seems to lose all state, you have to re-program it.
>>>>>> Oh, indeed! The guest does not reset its state, so it might
>>>>>> be out of sync with the host after the operation. Was this not
>>>>>> an issue when previously resetting in the context of xdp?
>>>>> I suspect it was broken back then, too.
>>>> Okay. I guess that in principle this is all programmable through
>>>> virtnet_set_rx_mode, virtnet_vlan_rx_add_vid, etc. But it's a
>>>> lot more complex than just restoring virtnet_reset. Will need to
>>>> be careful about concurrency issues at the least. Similar to the
>>>> ones you point out below.
>>>>
>>> The problem has been pointed out during developing virtio-net XDP. But it
>>> may not be a big issue since vhost_net ignores all kinds of the filters now.
>>>
>>> Thanks
>> It might not keep doing that in the future though.
>> And virtio-net in userspace doesn't ignore the filters.
> How about the guest honor the request only if no state has been
> offloaded to the host?
>
> This is the common case for vhost_net, and not expected to change
> soon.

FYI, I'm implementing to use tun eBPF filter for virtio-net. So 
recovering filter should be considered.

Thanks

>
> Even when it does, we have a graceful degradation strategy. Guest
> revert state prior to reset and reapply. Though for the time being,
> solving this only in the case without state offload would be solve my
> use case.

^ permalink raw reply

* Re: [PATCH v6 0/6] Add M_CAN Support for Dra76 platform
From: Yang, Wenyou @ 2017-12-29  3:38 UTC (permalink / raw)
  To: Faiz Abbas, wg, mkl, robh+dt, mark.rutland
  Cc: linux-can, netdev, devicetree, linux-kernel, nsekhar, fcooper,
	robh, sergei.shtylyov
In-Reply-To: <1513949488-13026-1-git-send-email-faiz_abbas@ti.com>



On 2017/12/22 21:31, Faiz Abbas wrote:
> This patch series adds support for M_CAN on the TI Dra76
> platform. Device tree patches will be sent separately.
> A bunch of patches were sent before by
> Franklin Cooper <fcooper@ti.com>. I have clubbed the
> series together and rebased to the latest kernel.
Tested this series on SAMA5D2 Xplained board.

Tested-by: Wenyou Yang <wenyou.yang@microchip.com>

>
> v6 changes:
> Dropped the patches to make hclk optional. Drivers
> which enable hclk as the interface clock using
> pm_runtime calls must still provide a hclk in the
> clocks property.
>
> Support higher speed CAN-FD bitrate:
> The community decided that data sampling point be used
> for the secondary sampling point here
> https://patchwork.kernel.org/patch/9909845/
>
> Franklin S Cooper Jr (6):
>    can: dev: Add support for limiting configured bitrate
>    can: m_can: Add call to of_can_transceiver
>    can: m_can: Add PM Runtime
>    can: m_can: Support higher speed CAN-FD bitrates
>    dt-bindings: can: m_can: Document new can transceiver binding
>    dt-bindings: can: can-transceiver: Document new binding
>
>   .../bindings/net/can/can-transceiver.txt           | 24 +++++++
>   .../devicetree/bindings/net/can/m_can.txt          |  9 +++
>   drivers/net/can/dev.c                              | 39 +++++++++++
>   drivers/net/can/m_can/m_can.c                      | 81 ++++++++++++++++++++--
>   include/linux/can/dev.h                            |  8 +++
>   5 files changed, 156 insertions(+), 5 deletions(-)
>   create mode 100644 Documentation/devicetree/bindings/net/can/can-transceiver.txt
>

Best Regards,
Wenyou Yang

^ permalink raw reply

* [PATCH net-next] cxgb4: Check alignment constraint for T6
From: Ganesh Goudar @ 2017-12-29  7:18 UTC (permalink / raw)
  To: netdev, davem
  Cc: nirranjan, indranil, venkatesh, Ganesh Goudar, Arjun Vynipadath

Update the check for setting  IPV4 filters and align filter_id
to multiple of 2, only for IPv6 filters in case of T6.

Signed-off-by: Arjun Vynipadath <arjun@chelsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@chelsio.com>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
index 5980f30..29178cf 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c
@@ -1189,6 +1189,7 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 		       struct filter_ctx *ctx)
 {
 	struct adapter *adapter = netdev2adap(dev);
+	unsigned int chip_ver = CHELSIO_CHIP_VERSION(adapter->params.chip);
 	unsigned int max_fidx, fidx;
 	struct filter_entry *f;
 	u32 iconf;
@@ -1225,12 +1226,18 @@ int __cxgb4_set_filter(struct net_device *dev, int filter_id,
 	 * insertion.
 	 */
 	if (fs->type == 0) { /* IPv4 */
-		/* If our IPv4 filter isn't being written to a
-		 * multiple of four filter index and there's an IPv6
-		 * filter at the multiple of 4 base slot, then we
-		 * prevent insertion.
+		/* For T6, If our IPv4 filter isn't being written to a
+		 * multiple of two filter index and there's an IPv6
+		 * filter at the multiple of 2 base slot, then we need
+		 * to delete that IPv6 filter ...
+		 * For adapters below T6, IPv6 filter occupies 4 entries.
+		 * Hence we need to delete the filter in multiple of 4 slot.
 		 */
-		fidx = filter_id & ~0x3;
+		if (chip_ver < CHELSIO_T6)
+			fidx = filter_id & ~0x3;
+		else
+			fidx = filter_id & ~0x1;
+
 		if (fidx != filter_id &&
 		    adapter->tids.ftid_tab[fidx].fs.type) {
 			f = &adapter->tids.ftid_tab[fidx];
-- 
2.1.0

^ permalink raw reply related

* [PATCH RESEND 1/3] net: Fix possible race in peernet2id_alloc()
From: Kirill Tkhai @ 2017-12-29  7:29 UTC (permalink / raw)
  To: netdev, davem; +Cc: eric.dumazet, ktkhai, ebiederm

peernet2id_alloc() is racy without rtnl_lock() as atomic_read(&peer->count)
under net->nsid_lock does not guarantee, peer is alive:

rcu_read_lock()
peernet2id_alloc()                            ..
  spin_lock_bh(&net->nsid_lock)               ..
  atomic_read(&peer->count) == 1              ..
  ..                                          put_net()
  ..                                            cleanup_net()
  ..                                              for_each_net(tmp)
  ..                                                spin_lock_bh(&tmp->nsid_lock)
  ..                                                __peernet2id(tmp, net) == -1
  ..                                                    ..
  ..                                                    ..
    __peernet2id_alloc(alloc == true)                   ..
  ..                                                    ..
rcu_read_unlock()                                       ..
..                                                synchronize_rcu()
..                                                kmem_cache_free(net)

After the above situation, net::netns_id contains id pointing to freed memory,
and any other dereferencing by the id will operate with this freed memory.

Currently, peernet2id_alloc() is used under rtnl_lock() everywhere except
ovs_vport_cmd_fill_info(), and this race can't occur. But peernet2id_alloc()
is generic interface, and better we fix it before someone really starts
use it in wrong context.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 net/core/net_namespace.c |   23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 60a71be75aea..6a4eab438221 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -221,17 +221,32 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id);
  */
 int peernet2id_alloc(struct net *net, struct net *peer)
 {
-	bool alloc;
+	bool alloc = false, alive = false;
 	int id;
 
-	if (atomic_read(&net->count) == 0)
-		return NETNSA_NSID_NOT_ASSIGNED;
 	spin_lock_bh(&net->nsid_lock);
-	alloc = atomic_read(&peer->count) == 0 ? false : true;
+	/* Spinlock guarantees we never hash a peer to net->netns_ids
+	 * after idr_destroy(&net->netns_ids) occurs in cleanup_net().
+	 */
+	if (atomic_read(&net->count) == 0) {
+		id = NETNSA_NSID_NOT_ASSIGNED;
+		goto unlock;
+	}
+	/*
+	 * When peer is obtained from RCU lists, we may race with
+	 * its cleanup. Check whether it's alive, and this guarantees
+	 * we never hash a peer back to net->netns_ids, after it has
+	 * just been idr_remove()'d from there in cleanup_net().
+	 */
+	if (maybe_get_net(peer))
+		alive = alloc = true;
 	id = __peernet2id_alloc(net, peer, &alloc);
+unlock:
 	spin_unlock_bh(&net->nsid_lock);
 	if (alloc && id >= 0)
 		rtnl_net_notifyid(net, RTM_NEWNSID, id);
+	if (alive)
+		put_net(peer);
 	return id;
 }
 EXPORT_SYMBOL_GPL(peernet2id_alloc);

^ permalink raw reply related

* [PATCH RESEND 2/3] net: Add BUG_ON() to get_net()
From: Kirill Tkhai @ 2017-12-29  7:29 UTC (permalink / raw)
  To: netdev, davem; +Cc: eric.dumazet, ktkhai, ebiederm
In-Reply-To: <151453250786.12258.8455863810071017385.stgit@localhost.localdomain>

Since people may mistakenly obtain destroying net
from net_namespace_list and from net::netns_ids
without checking for its net::counter, let's protect
against such situations and insert BUG_ON() to stop
move on after this.

Panic is better, than memory corruption and undefined
behavior.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 include/net/net_namespace.h |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 10f99dafd5ac..ff0e47471d5b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -195,7 +195,7 @@ void __put_net(struct net *net);
 
 static inline struct net *get_net(struct net *net)
 {
-	atomic_inc(&net->count);
+	BUG_ON(atomic_inc_return(&net->count) <= 1);
 	return net;
 }
 

^ permalink raw reply related

* [PATCH RESEND 3/3] net: Remove spinlock from get_net_ns_by_id()
From: Kirill Tkhai @ 2017-12-29  7:30 UTC (permalink / raw)
  To: netdev, davem; +Cc: eric.dumazet, ktkhai, ebiederm
In-Reply-To: <151453250786.12258.8455863810071017385.stgit@localhost.localdomain>

idr_find() is safe under rcu_read_lock() and
maybe_get_net() guarantees that net is alive.

Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
---
 net/core/net_namespace.c |    2 --
 1 file changed, 2 deletions(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 6a4eab438221..a675f35a18ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -279,11 +279,9 @@ struct net *get_net_ns_by_id(struct net *net, int id)
 		return NULL;
 
 	rcu_read_lock();
-	spin_lock_bh(&net->nsid_lock);
 	peer = idr_find(&net->netns_ids, id);
 	if (peer)
 		peer = maybe_get_net(peer);
-	spin_unlock_bh(&net->nsid_lock);
 	rcu_read_unlock();
 
 	return peer;

^ permalink raw reply related

* Re: [RFC PATCH bpf-next v2 4/4] error-injection: Support fault injection framework
From: Masami Hiramatsu @ 2017-12-29  7:34 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Alexei Starovoitov, Josef Bacik, rostedt, mingo, davem, netdev,
	linux-kernel, ast, kernel-team, daniel, linux-btrfs, darrick.wong,
	Josef Bacik, Akinobu Mita
In-Reply-To: <099fa7d4-435a-3fa4-841c-17603a45e77e@fb.com>

On Thu, 28 Dec 2017 17:11:31 -0800
Alexei Starovoitov <ast@fb.com> wrote:

> On 12/27/17 11:51 PM, Masami Hiramatsu wrote:
> >
> > Then what happen if the user set invalid retval to those functions?
> > even if we limit the injectable functions, it can cause a problem,
> >
> > for example,
> >
> >  obj = func_return_object();
> >  if (!obj) {
> >     handling_error...;
> >  }
> >  obj->field = x;
> >
> > In this case, obviously func_return_object() must return NULL if there is
> > an error, not -ENOMEM. But without the correct retval information, how would
> > you check the BPF code doesn't cause a trouble?
> > Currently it seems you are expecting only the functions which return error code.
> >
> >  ret = func_return_state();
> >  if (ret < 0) {
> >     handling_error...;
> >  }
> >
> > But how we can distinguish those?
> >
> > If we have the error range for each function, we can ensure what is
> > *correct* error code, NULL or errno, or any other error numbers. :)
> 
> messing up return values may cause problems and range check is
> not going to magically help.
> The caller may handle only a certain set of errors or interpret
> some of them like EBUSY as a signal to retry.
> It's plain impossible to make sure that kernel will be functional
> after error injection has been made.

Hmm, if so, why we need this injectable table?
If we can not make sure the safeness of the error injection (of course, yes)
why we need to limit the error injection on such limited functions?
I think we don't need it anymore. Any function can be injectable, and no
need to make sure the safeness.

Thank you,

> Like kmalloc() unconditionally returning NULL will be deadly
> for the kernel, hence this patch 4/4 has very limited practical
> use. The bpf program need to make intelligent decisions when
> to return an error and what kind of error to return.
> Doing blank range check adds a false sense of additional safety.
> More so it wastes kilobytes of memory to do this check, hence nack.
> 


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply

* Re: [PATCH][next] wcn36xx: remove redundant assignment to msg_body.min_ch_time
From: Loic Poulain @ 2017-12-29  7:44 UTC (permalink / raw)
  To: Bjorn Andersson
  Cc: Colin King, Eugene Krasnikov, Kalle Valo, wcn36xx, linux-wireless,
	netdev, kernel-janitors, linux-kernel
In-Reply-To: <20171226201312.GA7480@builder>

Hi Colin, Bjorn,

On 26 December 2017 at 21:13, Bjorn Andersson
<bjorn.andersson@linaro.org> wrote:
> On Tue 19 Dec 09:04 PST 2017, Colin King wrote:
>
>> From: Colin Ian King <colin.king@canonical.com>
>>
>> msg_body.min_ch_time is being assigned twice; remove the redundant
>> first assignment.
>>
>> Detected by CoverityScan, CID#1463042 ("Unused Value")
>>
>
> Happy to see Coverity working for us :)
>
>
> This should have had a:
>
> Fixes: 2f3bef4b247e ("wcn36xx: Add hardware scan offload support")
>
>> Signed-off-by: Colin Ian King <colin.king@canonical.com>
>> ---
>>  drivers/net/wireless/ath/wcn36xx/smd.c | 1 -
>>  1 file changed, 1 deletion(-)
>>
>> diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
>> index 2914618a0335..bab2eca5fcac 100644
>> --- a/drivers/net/wireless/ath/wcn36xx/smd.c
>> +++ b/drivers/net/wireless/ath/wcn36xx/smd.c
>> @@ -625,7 +625,6 @@ int wcn36xx_smd_start_hw_scan(struct wcn36xx *wcn, struct ieee80211_vif *vif,
>>       INIT_HAL_MSG(msg_body, WCN36XX_HAL_START_SCAN_OFFLOAD_REQ);
>>
>>       msg_body.scan_type = WCN36XX_HAL_SCAN_TYPE_ACTIVE;
>> -     msg_body.min_ch_time = 30;
>>       msg_body.min_ch_time = 100;
>
> But I strongly suspect the second line is supposed to be max_ch_time.
>
> @Loic, do you agree?

You're absolutely right.
Colin could you please update your patch accordingly?

Regards,
Loic

^ permalink raw reply

* Re: [PATCH net 3/3] eet: ena: invoke netif_carrier_off() only after netdev registered
From: Jakub Kicinski @ 2017-12-29  7:46 UTC (permalink / raw)
  To: netanel
  Cc: davem, netdev, dwmw, zorik, matua, saeedb, msw, aliguori, nafea,
	evgenys, gtzalik
In-Reply-To: <1514496620-69953-4-git-send-email-netanel@amazon.com>

On Thu, 28 Dec 2017 21:30:20 +0000, netanel@amazon.com wrote:
> From: Netanel Belgazal <netanel@amazon.com>
> 
> netif_carrier_off() should be called only after register netdev.
> Move the function's call after the registration.

By "should" you mean in your driver, right?  I think calling
netif_carrier_off() on an unregistered netdev is a pretty standard
thing to do for drivers which manage carrier state.

> Signed-off-by: Netanel Belgazal <netanel@amazon.com>
> ---
>  drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> index fbe21a817bd8..ee50c56765a4 100644
> --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
> @@ -3276,14 +3276,14 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>  
>  	memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len);
>  
> -	netif_carrier_off(netdev);
> -
>  	rc = register_netdev(netdev);
>  	if (rc) {
>  		dev_err(&pdev->dev, "Cannot register net device\n");
>  		goto err_rss;
>  	}
>  
> +	netif_carrier_off(netdev);
> +
>  	INIT_WORK(&adapter->reset_task, ena_fw_reset_device);

This looks suspicious.  After you call register_netdev() someone can
open the device and link may come up before you clear it again with
carrier off.  Leading to netdev without a carrier until it's reopened.

>  	adapter->last_keep_alive_jiffies = jiffies;

^ permalink raw reply

* Re: [RFT net-next v3 0/5] dwmac-meson8b: RGMII clock fixes for Meson8b
From: Martin Blumenstingl @ 2017-12-29  7:48 UTC (permalink / raw)
  To: Emiliano Ingrassia
  Cc: netdev, linus.luessing, khilman, linux-amlogic, jbrunet,
	Neil Armstrong, peppe.cavallaro, alexandre.torgue
In-Reply-To: <20171229013116.GA5446@ingrassia.epigenesys.com>

Hi Emiliano,

On Fri, Dec 29, 2017 at 2:31 AM, Emiliano Ingrassia
<ingrassia@epigenesys.com> wrote:
> Hi Martin, Hi Dave,
>
> On Thu, Dec 28, 2017 at 11:21:23PM +0100, Martin Blumenstingl wrote:
>> Hi Dave,
>>
>> please do not apply this series until it got a Tested-by from Emiliano.
>>
>>
>> Hi Emiliano,
>>
>> you reported [0] that you couldn't get dwmac-meson8b to work on your
>> Odroid-C1. With your findings (register dumps, clk_summary output, etc.)
>> I think I was able to find a fix: it consists of two patches (which you
>> find in this series)
>>
>> Unfortunately I don't have any Meson8b boards with RGMII PHY so I could
>> only partially test this (I could only check if the clocks were
>> calculated correctly when using a dummy 500002394Hz input clock instead
>> of MPLL2).
>>
>> Could you please give this series a try and let me know about the
>> results?
>> You obviously still need your two "ARM: dts: meson8b" patches which
>> - add the amlogic,meson8b-dwmac" compatible to meson8b.dtsi
>> - enable Ethernet on the Odroid-C1
>>
>> When testing on Meson8b this also needs a fix for the MPLL clock driver:
>> "clk: meson: mpll: use 64-bit maths in params_from_rate", see:
>> https://patchwork.kernel.org/patch/10131677/
>>
>>
>> I have tested this myself on a Khadas VIM (GXL SoC, internal RMII PHY)
>> and a Khadas VIM2 (GXM SoC, external RGMII PHY). Both are still working
>> fine (so let's hope that this also fixes your Meson8b issue :)).
>>
>>
>> changes since v1 at [1]:
>> - changed the subject of the cover-letter to indicate that this is all
>>   about the RGMII clock
>> - added PATCH #1 which ensures that we don't unnecessarily change the
>>   parent clocks in RMII mode (and also makes the code easier to
>>   understand)
>> - changed subject of PATCH #2 (formerly PATCH #1) to state that this
>>   is about the RGMII clock
>> - added Jerome's Reviewed-by to PATCH #2 (formerly PATCH #1)
>> - replaced PATCH #3 (formerly PATCH #2) with one that sets
>>   CLK_SET_RATE_PARENT on the mux and thus re-configures the MPLL2 clock
>>   on Meson8b correctly
>>
>> changes since v2 at [2]:
>> - added PATCH #2 to make the following patch easier
>> - Emiliano reported that there's currently another bug in the
>>   dwmac-meson8b driver which prevents it from working with RGMII PHYs on
>>   Meson8b: bit 10 of the PRG_ETH0 register is configures a clock gate
>>   (instead of a divide by 5 or divide by 10 clock divider). This has not
>>   been visible on GXBB and later due to the input clock which always led
>>   to a selection of "divide by 10" (which is done internally in the IP
>>   block, but the bit actually means "enable RGMII clock output").
>>   PATCH #3 was added to address this issue.
>> - the commit message of PATCH #4 and #5 (formerly PATCH #2 and #3) were
>>   updated and the patch itself rebased because the m25_div clock was
>>   removed with the new PATCH #3 (so some of the statements were not
>>   valid anymore)
>>
>
> Here is the clk_summary relative to ethernet on Odroid-C1+
> with this new series applied:
>
> xtal                                1            1    24000000          0 0
>  sys_pll                            0            0  1200000000          0 0
>   cpu_clk                           0            0  1200000000          0 0
>  vid_pll                            0            0   732000000          0 0
>  fixed_pll                          2            2  2550000000          0 0
>   mpll2                             1            1   249999701          0 0
>    c9410000.ethernet#m250_sel       1            1   249999701          0 0
>     c9410000.ethernet#m250_div      1            1   249999701          0 0
>      c9410000.ethernet#fixed_div10  1            1    24999970          0 0
>       c9410000.ethernet#m25_en      1            1    24999970          0 0
>
> The ethernet prg0 register is set to 0x74A1 which should be correct with
> respect to the information contained in the S805 SoC manual.
> Actually, the ethernet is not yet fully functional.
> Trying to ping the board, I can see ARP request from host to board using
> tcpdump. However, the host can't see any response.
great - we're getting closer!

> Following the U-Boot value for prg0 register, which is 0x7d21, I also
> tried to set bit 11. As expected, this did not have any influence.
it *may* be something outside the PRG_ETH0 register than
to confirm that: could you temporarily revert the last patch from this
series ("net: stmmac: dwmac-meson8b: propagate rate changes to the
parent clock")? this way MPLL2 will stay at ~500MHz and PRG_ETH0
should be identical to what u-boot sets (apart from bit 11, but that
is only relevant in RMII mode according to the datasheet)

> Another thing that we should check is the "Ethernet Memory PD" (see S805
> manual - sec. 5.4) register which bits 3-2 enable/disable ethernet
> normal operation. However, those bits are already cleared by U-Boot.
if the peripheral registers itself are configured correctly it's
typically one of these issues:
- gate clock not being enabled (can you confirm that you hav the
"stmmaceth" with CLKID_ETH in the ethmac node?)
- incorrect pinmux settings (as a hack I would remove all ethernet
pinctrl properties/nodes from meson8b.dtsi and meson8b-odroidc1.dts.
before booting the mainline kernel you'll need to use Ethernet from
within u-boot once)
- incorrect TX delay (amlogic,tx-delay-ns = <2> should be defined in
meson8b-odroidc1.dts, but the driver should auto-select that value if
it's missing)
- IP block being in some undefined state which can be brought back
into a working state by adding the reset line (RESET_ETHERNET)
- Ethernet PHY being in some undefined state  can be brought back into
a working state by adding the reset line (GPIOH_4, see
meson-gxbb-odroidc2.dts how to use that)
- I have not seen that the power-domains ("Ethernet Memory PD") were a
problem yet, but you already checked that

maybe you can share your current .dts patch and a boot-log so others
can have a look as well?

> Thank you for the support.
thank you for your patience as well, most people would have given up by now

> Best regards,
>
> Emiliano
>
>>
>> [0] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005596.html
>> [1] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005848.html
>> [2] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005861.html
>>
>>
>> Martin Blumenstingl (5):
>>   net: stmmac: dwmac-meson8b: only configure the clocks in RGMII mode
>>   net: stmmac: dwmac-meson8b: simplify generating the clock names
>>   net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration
>>   net: stmmac: dwmac-meson8b: fix setting the RGMII clock on Meson8b
>>   net: stmmac: dwmac-meson8b: propagate rate changes to the parent clock
>>
>>  .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c    | 119 +++++++++++----------
>>  1 file changed, 63 insertions(+), 56 deletions(-)
>>
>> --
>> 2.15.1
>>


Regards
Martin

^ permalink raw reply

* Re: [RFT net-next v3 0/5] dwmac-meson8b: RGMII clock fixes for Meson8b
From: Martin Blumenstingl @ 2017-12-29  7:52 UTC (permalink / raw)
  To: Emiliano Ingrassia
  Cc: netdev, linus.luessing, khilman, linux-amlogic, jbrunet,
	Neil Armstrong, peppe.cavallaro, alexandre.torgue
In-Reply-To: <CAFBinCDVRNKBTG0CHcmV9iJ9gb2VeODCfU6BO+dyRU6bVwkffg@mail.gmail.com>

On Fri, Dec 29, 2017 at 8:48 AM, Martin Blumenstingl
<martin.blumenstingl@googlemail.com> wrote:
> Hi Emiliano,
>
> On Fri, Dec 29, 2017 at 2:31 AM, Emiliano Ingrassia
> <ingrassia@epigenesys.com> wrote:
>> Hi Martin, Hi Dave,
>>
>> On Thu, Dec 28, 2017 at 11:21:23PM +0100, Martin Blumenstingl wrote:
>>> Hi Dave,
>>>
>>> please do not apply this series until it got a Tested-by from Emiliano.
>>>
>>>
>>> Hi Emiliano,
>>>
>>> you reported [0] that you couldn't get dwmac-meson8b to work on your
>>> Odroid-C1. With your findings (register dumps, clk_summary output, etc.)
>>> I think I was able to find a fix: it consists of two patches (which you
>>> find in this series)
>>>
>>> Unfortunately I don't have any Meson8b boards with RGMII PHY so I could
>>> only partially test this (I could only check if the clocks were
>>> calculated correctly when using a dummy 500002394Hz input clock instead
>>> of MPLL2).
>>>
>>> Could you please give this series a try and let me know about the
>>> results?
>>> You obviously still need your two "ARM: dts: meson8b" patches which
>>> - add the amlogic,meson8b-dwmac" compatible to meson8b.dtsi
>>> - enable Ethernet on the Odroid-C1
>>>
>>> When testing on Meson8b this also needs a fix for the MPLL clock driver:
>>> "clk: meson: mpll: use 64-bit maths in params_from_rate", see:
>>> https://patchwork.kernel.org/patch/10131677/
>>>
>>>
>>> I have tested this myself on a Khadas VIM (GXL SoC, internal RMII PHY)
>>> and a Khadas VIM2 (GXM SoC, external RGMII PHY). Both are still working
>>> fine (so let's hope that this also fixes your Meson8b issue :)).
>>>
>>>
>>> changes since v1 at [1]:
>>> - changed the subject of the cover-letter to indicate that this is all
>>>   about the RGMII clock
>>> - added PATCH #1 which ensures that we don't unnecessarily change the
>>>   parent clocks in RMII mode (and also makes the code easier to
>>>   understand)
>>> - changed subject of PATCH #2 (formerly PATCH #1) to state that this
>>>   is about the RGMII clock
>>> - added Jerome's Reviewed-by to PATCH #2 (formerly PATCH #1)
>>> - replaced PATCH #3 (formerly PATCH #2) with one that sets
>>>   CLK_SET_RATE_PARENT on the mux and thus re-configures the MPLL2 clock
>>>   on Meson8b correctly
>>>
>>> changes since v2 at [2]:
>>> - added PATCH #2 to make the following patch easier
>>> - Emiliano reported that there's currently another bug in the
>>>   dwmac-meson8b driver which prevents it from working with RGMII PHYs on
>>>   Meson8b: bit 10 of the PRG_ETH0 register is configures a clock gate
>>>   (instead of a divide by 5 or divide by 10 clock divider). This has not
>>>   been visible on GXBB and later due to the input clock which always led
>>>   to a selection of "divide by 10" (which is done internally in the IP
>>>   block, but the bit actually means "enable RGMII clock output").
>>>   PATCH #3 was added to address this issue.
>>> - the commit message of PATCH #4 and #5 (formerly PATCH #2 and #3) were
>>>   updated and the patch itself rebased because the m25_div clock was
>>>   removed with the new PATCH #3 (so some of the statements were not
>>>   valid anymore)
>>>
>>
>> Here is the clk_summary relative to ethernet on Odroid-C1+
>> with this new series applied:
>>
>> xtal                                1            1    24000000          0 0
>>  sys_pll                            0            0  1200000000          0 0
>>   cpu_clk                           0            0  1200000000          0 0
>>  vid_pll                            0            0   732000000          0 0
>>  fixed_pll                          2            2  2550000000          0 0
>>   mpll2                             1            1   249999701          0 0
>>    c9410000.ethernet#m250_sel       1            1   249999701          0 0
>>     c9410000.ethernet#m250_div      1            1   249999701          0 0
>>      c9410000.ethernet#fixed_div10  1            1    24999970          0 0
>>       c9410000.ethernet#m25_en      1            1    24999970          0 0
>>
>> The ethernet prg0 register is set to 0x74A1 which should be correct with
>> respect to the information contained in the S805 SoC manual.
>> Actually, the ethernet is not yet fully functional.
>> Trying to ping the board, I can see ARP request from host to board using
>> tcpdump. However, the host can't see any response.
> great - we're getting closer!
>
>> Following the U-Boot value for prg0 register, which is 0x7d21, I also
>> tried to set bit 11. As expected, this did not have any influence.
> it *may* be something outside the PRG_ETH0 register than
> to confirm that: could you temporarily revert the last patch from this
> series ("net: stmmac: dwmac-meson8b: propagate rate changes to the
> parent clock")? this way MPLL2 will stay at ~500MHz and PRG_ETH0
> should be identical to what u-boot sets (apart from bit 11, but that
> is only relevant in RMII mode according to the datasheet)
>
>> Another thing that we should check is the "Ethernet Memory PD" (see S805
>> manual - sec. 5.4) register which bits 3-2 enable/disable ethernet
>> normal operation. However, those bits are already cleared by U-Boot.
> if the peripheral registers itself are configured correctly it's
> typically one of these issues:
> - gate clock not being enabled (can you confirm that you hav the
> "stmmaceth" with CLKID_ETH in the ethmac node?)
> - incorrect pinmux settings (as a hack I would remove all ethernet
> pinctrl properties/nodes from meson8b.dtsi and meson8b-odroidc1.dts.
> before booting the mainline kernel you'll need to use Ethernet from
> within u-boot once)
> - incorrect TX delay (amlogic,tx-delay-ns = <2> should be defined in
> meson8b-odroidc1.dts, but the driver should auto-select that value if
> it's missing)
> - IP block being in some undefined state which can be brought back
> into a working state by adding the reset line (RESET_ETHERNET)
> - Ethernet PHY being in some undefined state  can be brought back into
> a working state by adding the reset line (GPIOH_4, see
> meson-gxbb-odroidc2.dts how to use that)
> - I have not seen that the power-domains ("Ethernet Memory PD") were a
> problem yet, but you already checked that
and I forgot the "eee-broken-1000t;" property on the PHY! have a look
at meson-gxbb-odroidc2.dts - I would assume that the ethmac node in
your meson8b-odroidc1.dts looks almost identical (no interrupt
configuration inside the PHY node, different reset-GPIO)

>
> maybe you can share your current .dts patch and a boot-log so others
> can have a look as well?
>
>> Thank you for the support.
> thank you for your patience as well, most people would have given up by now
>
>> Best regards,
>>
>> Emiliano
>>
>>>
>>> [0] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005596.html
>>> [1] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005848.html
>>> [2] http://lists.infradead.org/pipermail/linux-amlogic/2017-December/005861.html
>>>
>>>
>>> Martin Blumenstingl (5):
>>>   net: stmmac: dwmac-meson8b: only configure the clocks in RGMII mode
>>>   net: stmmac: dwmac-meson8b: simplify generating the clock names
>>>   net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration
>>>   net: stmmac: dwmac-meson8b: fix setting the RGMII clock on Meson8b
>>>   net: stmmac: dwmac-meson8b: propagate rate changes to the parent clock
>>>
>>>  .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c    | 119 +++++++++++----------
>>>  1 file changed, 63 insertions(+), 56 deletions(-)
>>>
>>> --
>>> 2.15.1
>>>
>
>
> Regards
> Martin

^ permalink raw reply

* Re: [PATCH net 3/3] eet: ena: invoke netif_carrier_off() only after netdev registered
From: Belgazal, Netanel @ 2017-12-29  8:00 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: davem@davemloft.net, netdev@vger.kernel.org, Woodhouse, David,
	Machulsky, Zorik, Matushevsky, Alexander, Bshara, Saeed,
	Wilson, Matt, Liguori, Anthony, Bshara, Nafea, Schmeilin, Evgeny,
	Tzalik, Guy
In-Reply-To: <20171228234603.436ba946@cakuba.netronome.com>

Yes, I mean in my driver.
netif_carrier_off() have no effect when netdev is uninitialized.
So I must call it after register_netdev().

On 12/29/17, 9:46 AM, "Jakub Kicinski" <kubakici@wp.pl> wrote:

    By "should" you mean in your driver, right?  I think calling
    netif_carrier_off() on an unregistered netdev is a pretty standard
    thing to do for drivers which manage carrier state.


^ permalink raw reply

* Re: [PATCH net 3/3] eet: ena: invoke netif_carrier_off() only after netdev registered
From: Jakub Kicinski @ 2017-12-29  8:09 UTC (permalink / raw)
  To: Belgazal, Netanel
  Cc: davem@davemloft.net, netdev@vger.kernel.org, Woodhouse, David,
	Machulsky, Zorik, Matushevsky, Alexander, Bshara, Saeed,
	Wilson, Matt, Liguori, Anthony, Bshara, Nafea, Schmeilin, Evgeny,
	Tzalik, Guy
In-Reply-To: <B0951795-C990-49ED-9972-CDEED0F160A6@amazon.com>

On Fri, 29 Dec 2017 08:00:33 +0000, Belgazal, Netanel wrote:
> Yes, I mean in my driver.
> netif_carrier_off() have no effect when netdev is uninitialized.

Please look at the implementation again, test_*and_set*_bit().

> So I must call it after register_netdev().

Is there a user-visible problem you're trying to solve here?

^ permalink raw reply

* Re: [RFC PATCH bpf-next v2 1/4] tracing/kprobe: bpf: Check error injectable event is on function entry
From: Masami Hiramatsu @ 2017-12-29  8:20 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Steven Rostedt, Alexei Starovoitov, Josef Bacik, mingo, davem,
	netdev, linux-kernel, ast, kernel-team, daniel, linux-btrfs,
	darrick.wong, Josef Bacik, Akinobu Mita
In-Reply-To: <b6057235-a1f4-a461-a2d5-295e964249ea@fb.com>

On Thu, 28 Dec 2017 17:03:24 -0800
Alexei Starovoitov <ast@fb.com> wrote:

> On 12/28/17 12:20 AM, Masami Hiramatsu wrote:
> > On Wed, 27 Dec 2017 20:32:07 -0800
> > Alexei Starovoitov <ast@fb.com> wrote:
> >
> >> On 12/27/17 8:16 PM, Steven Rostedt wrote:
> >>> On Wed, 27 Dec 2017 19:45:42 -0800
> >>> Alexei Starovoitov <ast@fb.com> wrote:
> >>>
> >>>> I don't think that's the case. My reading of current
> >>>> trace_kprobe_ftrace() -> arch_check_ftrace_location()
> >>>> is that it will not be true for old mcount case.
> >>>
> >>> In the old mcount case, you can't use ftrace to return without calling
> >>> the function. That is, no modification of the return ip, unless you
> >>> created a trampoline that could handle arbitrary stack frames, and
> >>> remove them from the stack before returning back to the function.
> >>
> >> correct. I was saying that trace_kprobe_ftrace() won't let us do
> >> bpf_override_return with old mcount.
> >
> > No, trace_kprobe_ftrace() just checks the given address will be
> > managed by ftrace. you can see arch_check_ftrace_location() in kernel/kprobes.c.
> >
> > FYI, CONFIG_KPROBES_ON_FTRACE depends on DYNAMIC_FTRACE_WITH_REGS, and
> > DYNAMIC_FTRACE_WITH_REGS doesn't depend on CC_USING_FENTRY.
> > This means if you compile kernel with old gcc and enable DYNAMIC_FTRACE,
> > kprobes uses ftrace on mcount address which is NOT the entry point
> > of target function.
> 
> ok. fair enough. I think we can gate the feature to !mcount only.
> 
> > On the other hand, changing IP feature has been implemented originaly
> > by kprobes with int3 (sw breakpoint). This means you can use kprobes
> > at correct address (the entry address of the function) you can hijack
> > the function, as jprobe did.
> >
> >>>> As far as the rest of your arguments it very much puzzles me that
> >>>> you claim that this patch suppose to work based on historical
> >>>> reasoning whereas you did NOT test it.
> >>>
> >>> I believe that Masami is saying that the modification of the IP from
> >>> kprobes has been very well tested. But I'm guessing that you still want
> >>> a test case for using kprobes in this particular instance. It's not the
> >>> implementation of modifying the IP that you are worried about, but the
> >>> implementation of BPF using it in this case. Right?
> >>
> >> exactly. No doubt that old code works.
> >> But it doesn't mean that bpf_override_return() will continue to
> >> work in kprobes that are not ftrace based.
> >> I suspect Josef's existing test case will cover this situation.
> >> Probably only special .config is needed to disable ftrace, so
> >> "kprobe on entry but not ftrace" check will kick in.
> >
> > Right. If you need to test it, you can run Josef's test case without
> > CONFIG_DYNAMIC_FTRACE.
> 
> It should be obvious that the person who submits the patch
> must run the tests.
> 
> >> But I didn't get an impression that this situation was tested.
> >> Instead I see only logical reasoning that it's _supposed_ to work.
> >> That's not enough.
> >
> > OK, so would you just ask me to run samples/bpf ?
> 
> Please run Josef's test in the !ftrace setup.

Yes, I'll add the result of the test case.

Thank you,


-- 
Masami Hiramatsu <mhiramat@kernel.org>

^ permalink raw reply

* Re: iproute2 net-next
From: Jiri Pirko @ 2017-12-29  8:58 UTC (permalink / raw)
  To: Daniel Borkmann; +Cc: Leon Romanovsky, Stephen Hemminger, netdev, dsa
In-Reply-To: <5f75c535-e4a0-1cfa-d4b0-c3c60d50a1c6@iogearbox.net>

Fri, Dec 29, 2017 at 12:46:31AM CET, daniel@iogearbox.net wrote:
>On 12/26/2017 10:35 AM, Leon Romanovsky wrote:
>> On Mon, Dec 25, 2017 at 10:14:26PM -0800, Stephen Hemminger wrote:
>>> On Tue, 26 Dec 2017 06:47:43 +0200
>>> Leon Romanovsky <leon@kernel.org> wrote:
>>>
>>>> On Mon, Dec 25, 2017 at 10:49:19AM -0800, Stephen Hemminger wrote:
>>>>> David Ahern has agreed to take over managing the net-next branch of iproute2.
>>>>> The new location is:
>>>>>  https://git.kernel.org/pub/scm/linux/kernel/git/dsahern/iproute2-next.git/
>>>>>
>>>>> In the past, I have accepted new features into iproute2 master branch, but
>>>>> am changing the policy so that outside of the merge window (up until -rc1)
>>>>> new features will get put into net-next to get some more review and testing
>>>>> time. This means that things like the proposed batch streaming mode will
>>>>> go through net-next.
>>>>
>>>> Did you consider to create one shared repo for the iproute2 to allow
>>>> multiple committers workflow?
>>>
>>> For now having separate trees is best, there is no need for multiple
>>> committers the load is very light.
>>>
>>>> It will be much convenient for the users to have one place for
>>>> master/stable/net-next branches, instead of actually following two
>>>> different repositories.
>>>
>>> If you are doing network development, you already need to deal with
>>> multiple repo's on the kernel side so there is no difference.
>> 
>> I agree with you that one extra "git remote add .." is not so huge and
>> all people who develop for the netdev will do it. My concern is about
>> Documentation and newcomers, who will have a hard time to find a right
>> tree.
>
>I guess it would certainly help to identify the official repo to rebase
>against much quicker if it would be under a common group on korg e.g.
>
>  * iproute2/iproute2.git         - for current cycle
>  * iproute2/iproute2-next.git    - for net-next bits
>
>and also be in line with other tooling (ethtool and others), even if
>not as high volume, but it would make it unambiguous right away from
>the other, private iproute2 repos on korg, imho. Just a thought.

+1

I was about to suggest this. This is nice opportunity to do such change.


>
>>>> Example, of such shared repo:
>>>> BPF: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git/
>>>> Bluetooth: https://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git/
>>>> RDMA: https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git/
>>>
>>> Most of these are high volume or vendor silo'd which is not the case here.
>Cheers,
>Daniel

^ permalink raw reply

* [PATCH] NET: usb: qmi_wwan: add support for YUGA CLM920-NC5 PID 0x9625
From: SZ Lin (林上智) @ 2017-12-29  9:02 UTC (permalink / raw)
  Cc: SZ Lin (林上智), Bjørn Mork, netdev,
	linux-usb, linux-kernel

This patch adds support for PID 0x9625 of YUGA CLM920-NC5.

YUGA CLM920-NC5 needs to enable QMI_WWAN_QUIRK_DTR before QMI operation.

qmicli -d /dev/cdc-wdm0 -p --dms-get-revision
[/dev/cdc-wdm0] Device revision retrieved:
        Revision: 'CLM920_NC5-V1  1  [Oct 23 2016 19:00:00]'

Signed-off-by: SZ Lin (林上智) <sz.lin@moxa.com>
---
 drivers/net/usb/qmi_wwan.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 3000ddd1c7e2..728819feab44 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1100,6 +1100,7 @@ static const struct usb_device_id products[] = {
 	{QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
 	{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
+	{QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)},	/* YUGA CLM920-NC5 */
 	{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
 	{QMI_FIXED_INTF(0x12d1, 0x140c, 1)},	/* Huawei E173 */
 	{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},	/* Huawei E1820 */
-- 
2.15.1

^ permalink raw reply related

* [PATCH][V2] wcn36xx: fix incorrect assignment to msg_body.min_ch_time
From: Colin King @ 2017-12-29  9:07 UTC (permalink / raw)
  To: Eugene Krasnikov, Kalle Valo, wcn36xx, linux-wireless, netdev
  Cc: kernel-janitors, linux-kernel

From: Colin Ian King <colin.king@canonical.com>

The second assignment to msg_body.min_ch_time is incorrect, it
should actually be to msg_body.max_ch_time.

Thanks to Bjorn Andersson for identifying the correct way to fix
this as my original fix was incorrect.

Detected by CoverityScan, CID#1463042 ("Unused Value")

Fixes: 2f3bef4b247e ("wcn36xx: Add hardware scan offload support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/net/wireless/ath/wcn36xx/smd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 2914618a0335..2a4871ca9c72 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -626,7 +626,7 @@ int wcn36xx_smd_start_hw_scan(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 
 	msg_body.scan_type = WCN36XX_HAL_SCAN_TYPE_ACTIVE;
 	msg_body.min_ch_time = 30;
-	msg_body.min_ch_time = 100;
+	msg_body.max_ch_time = 100;
 	msg_body.scan_hidden = 1;
 	memcpy(msg_body.mac, vif->addr, ETH_ALEN);
 	msg_body.p2p_search = vif->p2p;
-- 
2.14.1

^ permalink raw reply related

* [PATCH][V2] wcn36xx: fix incorrect assignment to msg_body.min_ch_time
From: Colin King @ 2017-12-29  9:07 UTC (permalink / raw)
  To: Eugene Krasnikov, Kalle Valo, wcn36xx, linux-wireless, netdev
  Cc: kernel-janitors, linux-kernel
In-Reply-To: <20171229090732.14928-1-colin.king@canonical.com>

From: Colin Ian King <colin.king@canonical.com>

The second assignment to msg_body.min_ch_time is incorrect, it
should actually be to msg_body.max_ch_time.

Thanks to Bjorn Andersson for identifying the correct way to fix
this as my original fix was incorrect.

Detected by CoverityScan, CID#1463042 ("Unused Value")

Fixes: 2f3bef4b247e ("wcn36xx: Add hardware scan offload support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
---
 drivers/net/wireless/ath/wcn36xx/smd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
index 2914618a0335..2a4871ca9c72 100644
--- a/drivers/net/wireless/ath/wcn36xx/smd.c
+++ b/drivers/net/wireless/ath/wcn36xx/smd.c
@@ -626,7 +626,7 @@ int wcn36xx_smd_start_hw_scan(struct wcn36xx *wcn, struct ieee80211_vif *vif,
 
 	msg_body.scan_type = WCN36XX_HAL_SCAN_TYPE_ACTIVE;
 	msg_body.min_ch_time = 30;
-	msg_body.min_ch_time = 100;
+	msg_body.max_ch_time = 100;
 	msg_body.scan_hidden = 1;
 	memcpy(msg_body.mac, vif->addr, ETH_ALEN);
 	msg_body.p2p_search = vif->p2p;
-- 
2.14.1

^ permalink raw reply related

* [PATCH net-next] net: hns: add ACPI mode support for ethtool -p
From: Peng Li @ 2017-12-29  9:11 UTC (permalink / raw)
  To: davem; +Cc: netdev, linux-kernel, linuxarm, salil.mehta, lipeng321

From: Jian Shen <shenjian15@huawei.com>

The locate operation interface of fiber port can only
work with DT mode. Add a new interface to control the
locate led for ACPI mode.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Peng Li <lipeng321@huawei.com>
Tested-by: Zhou Wang <wangzhou1@hisilicon.com>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c  |  2 +-
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c | 57 +++++++++++++++++++++-
 2 files changed, 57 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 8b5cdf4..cac86e9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -1168,7 +1168,7 @@ void hns_set_led_opt(struct hns_mac_cb *mac_cb)
 int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb,
 			enum hnae_led_state status)
 {
-	if (!mac_cb || !mac_cb->cpld_ctrl)
+	if (!mac_cb)
 		return 0;
 
 	return mac_cb->dsaf_dev->misc_op->cpld_set_led_id(mac_cb, status);
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 408b63f..ca247c2 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -18,6 +18,7 @@ enum _dsm_op_index {
 	HNS_OP_LED_SET_FUNC             = 0x3,
 	HNS_OP_GET_PORT_TYPE_FUNC       = 0x4,
 	HNS_OP_GET_SFP_STAT_FUNC        = 0x5,
+	HNS_OP_LOCATE_LED_SET_FUNC      = 0x6,
 };
 
 enum _dsm_rst_type {
@@ -81,6 +82,33 @@ static void hns_dsaf_acpi_ledctrl_by_port(struct hns_mac_cb *mac_cb, u8 op_type,
        ACPI_FREE(obj);
 }
 
+static void hns_dsaf_acpi_locate_ledctrl_by_port(struct hns_mac_cb *mac_cb,
+						 u8 op_type, u32 locate,
+						 u32 port)
+{
+	union acpi_object obj_args[2], argv4;
+	union acpi_object *obj;
+
+	obj_args[0].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[0].integer.value = locate;
+	obj_args[1].integer.type = ACPI_TYPE_INTEGER;
+	obj_args[1].integer.value = port;
+
+	argv4.type = ACPI_TYPE_PACKAGE;
+	argv4.package.count = 2;
+	argv4.package.elements = obj_args;
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(mac_cb->dev),
+				&hns_dsaf_acpi_dsm_guid, 0, op_type, &argv4);
+	if (!obj) {
+		dev_err(mac_cb->dev, "ledctrl fail, locate:%d port:%d!\n",
+			locate, port);
+		return;
+	}
+
+	ACPI_FREE(obj);
+}
+
 static void hns_cpld_set_led(struct hns_mac_cb *mac_cb, int link_status,
 			     u16 speed, int data)
 {
@@ -160,6 +188,9 @@ static void cpld_led_reset_acpi(struct hns_mac_cb *mac_cb)
 static int cpld_set_led_id(struct hns_mac_cb *mac_cb,
 			   enum hnae_led_state status)
 {
+	if (!mac_cb->cpld_ctrl)
+		return 0;
+
 	switch (status) {
 	case HNAE_LED_ACTIVE:
 		mac_cb->cpld_led_value =
@@ -184,6 +215,30 @@ static int cpld_set_led_id(struct hns_mac_cb *mac_cb,
 	return 0;
 }
 
+static int cpld_set_led_id_acpi(struct hns_mac_cb *mac_cb,
+				enum hnae_led_state status)
+{
+	switch (status) {
+	case HNAE_LED_ACTIVE:
+		hns_dsaf_acpi_locate_ledctrl_by_port(mac_cb,
+						     HNS_OP_LOCATE_LED_SET_FUNC,
+						     CPLD_LED_ON_VALUE,
+						     mac_cb->mac_id);
+		break;
+	case HNAE_LED_INACTIVE:
+		hns_dsaf_acpi_locate_ledctrl_by_port(mac_cb,
+						     HNS_OP_LOCATE_LED_SET_FUNC,
+						     CPLD_LED_DEFAULT_VALUE,
+						     mac_cb->mac_id);
+		break;
+	default:
+		dev_err(mac_cb->dev, "invalid led state: %d!", status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 #define RESET_REQ_OR_DREQ 1
 
 static void hns_dsaf_acpi_srst_by_port(struct dsaf_device *dsaf_dev, u8 op_type,
@@ -660,7 +715,7 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 	} else if (is_acpi_node(dsaf_dev->dev->fwnode)) {
 		misc_op->cpld_set_led = hns_cpld_set_led_acpi;
 		misc_op->cpld_reset_led = cpld_led_reset_acpi;
-		misc_op->cpld_set_led_id = cpld_set_led_id;
+		misc_op->cpld_set_led_id = cpld_set_led_id_acpi;
 
 		misc_op->dsaf_reset = hns_dsaf_rst_acpi;
 		misc_op->xge_srst = hns_dsaf_xge_srst_by_port_acpi;
-- 
1.9.1

^ permalink raw reply related

* [PATCH net-next 0/2] XDP transmission for tuntap
From: Jason Wang @ 2017-12-29 10:00 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, jbrouer, Jason Wang

Hi all:

This series tries to implement XDP transmission (ndo_xdp_xmit) for
tuntap. Pointer ring was used for queuing both XDP buffers and
sk_buff, this is done by encoding the type into lowest bit of the
pointer and storin XDP metadata in the headroom of XDP buff.

Tests gets 3.05 Mpps when doing xdp_redirect_map from ixgbe to VM
(testpmd + virtio-net in guest).

Please review.

Thanks

Jason Wang (2):
  tun/tap: use ptr_ring instead of skb_array
  tuntap: XDP transmission

 drivers/net/tap.c      |  41 ++++-----
 drivers/net/tun.c      | 233 ++++++++++++++++++++++++++++++++++++++-----------
 drivers/vhost/net.c    |  52 ++++++-----
 include/linux/if_tap.h |   6 +-
 include/linux/if_tun.h |  21 ++++-
 5 files changed, 258 insertions(+), 95 deletions(-)

-- 
2.7.4

^ permalink raw reply

* [PATCH net-next 1/2] tun/tap: use ptr_ring instead of skb_array
From: Jason Wang @ 2017-12-29 10:00 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, jbrouer, Jason Wang
In-Reply-To: <1514541604-12728-1-git-send-email-jasowang@redhat.com>

This patch switches to use ptr_ring instead of skb_array. This will be
used to enqueue different types of pointers by encoding type into
lower bits.

Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tap.c      | 41 +++++++++++++++++++++--------------------
 drivers/net/tun.c      | 42 ++++++++++++++++++++++--------------------
 drivers/vhost/net.c    | 39 ++++++++++++++++++++-------------------
 include/linux/if_tap.h |  6 +++---
 include/linux/if_tun.h |  4 ++--
 5 files changed, 68 insertions(+), 64 deletions(-)

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 0a886fda..7c38659 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -330,7 +330,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 	if (!q)
 		return RX_HANDLER_PASS;
 
-	if (__skb_array_full(&q->skb_array))
+	if (__ptr_ring_full(&q->ring))
 		goto drop;
 
 	skb_push(skb, ETH_HLEN);
@@ -348,7 +348,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 			goto drop;
 
 		if (!segs) {
-			if (skb_array_produce(&q->skb_array, skb))
+			if (ptr_ring_produce(&q->ring, skb))
 				goto drop;
 			goto wake_up;
 		}
@@ -358,7 +358,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 			struct sk_buff *nskb = segs->next;
 
 			segs->next = NULL;
-			if (skb_array_produce(&q->skb_array, segs)) {
+			if (ptr_ring_produce(&q->ring, segs)) {
 				kfree_skb(segs);
 				kfree_skb_list(nskb);
 				break;
@@ -375,7 +375,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb)
 		    !(features & NETIF_F_CSUM_MASK) &&
 		    skb_checksum_help(skb))
 			goto drop;
-		if (skb_array_produce(&q->skb_array, skb))
+		if (ptr_ring_produce(&q->ring, skb))
 			goto drop;
 	}
 
@@ -497,7 +497,7 @@ static void tap_sock_destruct(struct sock *sk)
 {
 	struct tap_queue *q = container_of(sk, struct tap_queue, sk);
 
-	skb_array_cleanup(&q->skb_array);
+	ptr_ring_cleanup(&q->ring, __skb_array_destroy_skb);
 }
 
 static int tap_open(struct inode *inode, struct file *file)
@@ -517,7 +517,7 @@ static int tap_open(struct inode *inode, struct file *file)
 					     &tap_proto, 0);
 	if (!q)
 		goto err;
-	if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) {
+	if (ptr_ring_init(&q->ring, tap->dev->tx_queue_len, GFP_KERNEL)) {
 		sk_free(&q->sk);
 		goto err;
 	}
@@ -546,7 +546,7 @@ static int tap_open(struct inode *inode, struct file *file)
 
 	err = tap_set_queue(tap, file, q);
 	if (err) {
-		/* tap_sock_destruct() will take care of freeing skb_array */
+		/* tap_sock_destruct() will take care of freeing ptr_ring */
 		goto err_put;
 	}
 
@@ -583,7 +583,7 @@ static unsigned int tap_poll(struct file *file, poll_table *wait)
 	mask = 0;
 	poll_wait(file, &q->wq.wait, wait);
 
-	if (!skb_array_empty(&q->skb_array))
+	if (!ptr_ring_empty(&q->ring))
 		mask |= POLLIN | POLLRDNORM;
 
 	if (sock_writeable(&q->sk) ||
@@ -844,7 +844,7 @@ static ssize_t tap_do_read(struct tap_queue *q,
 					TASK_INTERRUPTIBLE);
 
 		/* Read frames from the queue */
-		skb = skb_array_consume(&q->skb_array);
+		skb = ptr_ring_consume(&q->ring);
 		if (skb)
 			break;
 		if (noblock) {
@@ -1176,7 +1176,7 @@ static int tap_peek_len(struct socket *sock)
 {
 	struct tap_queue *q = container_of(sock, struct tap_queue,
 					       sock);
-	return skb_array_peek_len(&q->skb_array);
+	return PTR_RING_PEEK_CALL(&q->ring, __skb_array_len_with_tag);
 }
 
 /* Ops structure to mimic raw sockets with tun */
@@ -1202,7 +1202,7 @@ struct socket *tap_get_socket(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tap_get_socket);
 
-struct skb_array *tap_get_skb_array(struct file *file)
+struct ptr_ring *tap_get_ptr_ring(struct file *file)
 {
 	struct tap_queue *q;
 
@@ -1211,29 +1211,30 @@ struct skb_array *tap_get_skb_array(struct file *file)
 	q = file->private_data;
 	if (!q)
 		return ERR_PTR(-EBADFD);
-	return &q->skb_array;
+	return &q->ring;
 }
-EXPORT_SYMBOL_GPL(tap_get_skb_array);
+EXPORT_SYMBOL_GPL(tap_get_ptr_ring);
 
 int tap_queue_resize(struct tap_dev *tap)
 {
 	struct net_device *dev = tap->dev;
 	struct tap_queue *q;
-	struct skb_array **arrays;
+	struct ptr_ring **rings;
 	int n = tap->numqueues;
 	int ret, i = 0;
 
-	arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
-	if (!arrays)
+	rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
+	if (!rings)
 		return -ENOMEM;
 
 	list_for_each_entry(q, &tap->queue_list, next)
-		arrays[i++] = &q->skb_array;
+		rings[i++] = &q->ring;
 
-	ret = skb_array_resize_multiple(arrays, n,
-					dev->tx_queue_len, GFP_KERNEL);
+	ret = ptr_ring_resize_multiple(rings, n,
+				       dev->tx_queue_len, GFP_KERNEL,
+				       __skb_array_destroy_skb);
 
-	kfree(arrays);
+	kfree(rings);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(tap_queue_resize);
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e367d631..2c89efe 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -179,7 +179,7 @@ struct tun_file {
 	struct mutex napi_mutex;	/* Protects access to the above napi */
 	struct list_head next;
 	struct tun_struct *detached;
-	struct skb_array tx_array;
+	struct ptr_ring tx_ring;
 };
 
 struct tun_flow_entry {
@@ -634,7 +634,7 @@ static void tun_queue_purge(struct tun_file *tfile)
 {
 	struct sk_buff *skb;
 
-	while ((skb = skb_array_consume(&tfile->tx_array)) != NULL)
+	while ((skb = ptr_ring_consume(&tfile->tx_ring)) != NULL)
 		kfree_skb(skb);
 
 	skb_queue_purge(&tfile->sk.sk_write_queue);
@@ -688,7 +688,8 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 				unregister_netdevice(tun->dev);
 		}
 		if (tun)
-			skb_array_cleanup(&tfile->tx_array);
+			ptr_ring_cleanup(&tfile->tx_ring,
+					 __skb_array_destroy_skb);
 		sock_put(&tfile->sk);
 	}
 }
@@ -777,7 +778,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
 	}
 
 	if (!tfile->detached &&
-	    skb_array_init(&tfile->tx_array, dev->tx_queue_len, GFP_KERNEL)) {
+	    ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) {
 		err = -ENOMEM;
 		goto out;
 	}
@@ -1027,7 +1028,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	nf_reset(skb);
 
-	if (skb_array_produce(&tfile->tx_array, skb))
+	if (ptr_ring_produce(&tfile->tx_ring, skb))
 		goto drop;
 
 	/* Notify and wake up reader process */
@@ -1295,7 +1296,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table *wait)
 
 	poll_wait(file, sk_sleep(sk), wait);
 
-	if (!skb_array_empty(&tfile->tx_array))
+	if (!ptr_ring_empty(&tfile->tx_ring))
 		mask |= POLLIN | POLLRDNORM;
 
 	if (tun->dev->flags & IFF_UP &&
@@ -1944,7 +1945,7 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
 	struct sk_buff *skb = NULL;
 	int error = 0;
 
-	skb = skb_array_consume(&tfile->tx_array);
+	skb = ptr_ring_consume(&tfile->tx_ring);
 	if (skb)
 		goto out;
 	if (noblock) {
@@ -1956,7 +1957,7 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
 	current->state = TASK_INTERRUPTIBLE;
 
 	while (1) {
-		skb = skb_array_consume(&tfile->tx_array);
+		skb = ptr_ring_consume(&tfile->tx_ring);
 		if (skb)
 			break;
 		if (signal_pending(current)) {
@@ -2186,7 +2187,7 @@ static int tun_peek_len(struct socket *sock)
 	if (!tun)
 		return 0;
 
-	ret = skb_array_peek_len(&tfile->tx_array);
+	ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, __skb_array_len_with_tag);
 	tun_put(tun);
 
 	return ret;
@@ -3092,25 +3093,26 @@ static int tun_queue_resize(struct tun_struct *tun)
 {
 	struct net_device *dev = tun->dev;
 	struct tun_file *tfile;
-	struct skb_array **arrays;
+	struct ptr_ring **rings;
 	int n = tun->numqueues + tun->numdisabled;
 	int ret, i;
 
-	arrays = kmalloc_array(n, sizeof(*arrays), GFP_KERNEL);
-	if (!arrays)
+	rings = kmalloc_array(n, sizeof(*rings), GFP_KERNEL);
+	if (!rings)
 		return -ENOMEM;
 
 	for (i = 0; i < tun->numqueues; i++) {
 		tfile = rtnl_dereference(tun->tfiles[i]);
-		arrays[i] = &tfile->tx_array;
+		rings[i] = &tfile->tx_ring;
 	}
 	list_for_each_entry(tfile, &tun->disabled, next)
-		arrays[i++] = &tfile->tx_array;
+		rings[i++] = &tfile->tx_ring;
 
-	ret = skb_array_resize_multiple(arrays, n,
-					dev->tx_queue_len, GFP_KERNEL);
+	ret = ptr_ring_resize_multiple(rings, n,
+				       dev->tx_queue_len, GFP_KERNEL,
+				       __skb_array_destroy_skb);
 
-	kfree(arrays);
+	kfree(rings);
 	return ret;
 }
 
@@ -3196,7 +3198,7 @@ struct socket *tun_get_socket(struct file *file)
 }
 EXPORT_SYMBOL_GPL(tun_get_socket);
 
-struct skb_array *tun_get_skb_array(struct file *file)
+struct ptr_ring *tun_get_tx_ring(struct file *file)
 {
 	struct tun_file *tfile;
 
@@ -3205,9 +3207,9 @@ struct skb_array *tun_get_skb_array(struct file *file)
 	tfile = file->private_data;
 	if (!tfile)
 		return ERR_PTR(-EBADFD);
-	return &tfile->tx_array;
+	return &tfile->tx_ring;
 }
-EXPORT_SYMBOL_GPL(tun_get_skb_array);
+EXPORT_SYMBOL_GPL(tun_get_tx_ring);
 
 module_init(tun_init);
 module_exit(tun_cleanup);
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c7bdeb6..c316555 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -89,7 +89,7 @@ struct vhost_net_ubuf_ref {
 
 #define VHOST_RX_BATCH 64
 struct vhost_net_buf {
-	struct sk_buff **queue;
+	void **queue;
 	int tail;
 	int head;
 };
@@ -108,7 +108,7 @@ struct vhost_net_virtqueue {
 	/* Reference counting for outstanding ubufs.
 	 * Protected by vq mutex. Writers must also take device mutex. */
 	struct vhost_net_ubuf_ref *ubufs;
-	struct skb_array *rx_array;
+	struct ptr_ring *rx_ring;
 	struct vhost_net_buf rxq;
 };
 
@@ -158,7 +158,7 @@ static int vhost_net_buf_produce(struct vhost_net_virtqueue *nvq)
 	struct vhost_net_buf *rxq = &nvq->rxq;
 
 	rxq->head = 0;
-	rxq->tail = skb_array_consume_batched(nvq->rx_array, rxq->queue,
+	rxq->tail = ptr_ring_consume_batched(nvq->rx_ring, rxq->queue,
 					      VHOST_RX_BATCH);
 	return rxq->tail;
 }
@@ -167,9 +167,10 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
 {
 	struct vhost_net_buf *rxq = &nvq->rxq;
 
-	if (nvq->rx_array && !vhost_net_buf_is_empty(rxq)) {
-		skb_array_unconsume(nvq->rx_array, rxq->queue + rxq->head,
-				    vhost_net_buf_get_size(rxq));
+	if (nvq->rx_ring && !vhost_net_buf_is_empty(rxq)) {
+		ptr_ring_unconsume(nvq->rx_ring, rxq->queue + rxq->head,
+				   vhost_net_buf_get_size(rxq),
+				   __skb_array_destroy_skb);
 		rxq->head = rxq->tail = 0;
 	}
 }
@@ -583,7 +584,7 @@ static int peek_head_len(struct vhost_net_virtqueue *rvq, struct sock *sk)
 	int len = 0;
 	unsigned long flags;
 
-	if (rvq->rx_array)
+	if (rvq->rx_ring)
 		return vhost_net_buf_peek(rvq);
 
 	spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
@@ -790,7 +791,7 @@ static void handle_rx(struct vhost_net *net)
 			 * they refilled. */
 			goto out;
 		}
-		if (nvq->rx_array)
+		if (nvq->rx_ring)
 			msg.msg_control = vhost_net_buf_consume(&nvq->rxq);
 		/* On overrun, truncate and discard */
 		if (unlikely(headcount > UIO_MAXIOV)) {
@@ -896,7 +897,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 	struct vhost_net *n;
 	struct vhost_dev *dev;
 	struct vhost_virtqueue **vqs;
-	struct sk_buff **queue;
+	void **queue;
 	int i;
 
 	n = kvmalloc(sizeof *n, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
@@ -908,7 +909,7 @@ static int vhost_net_open(struct inode *inode, struct file *f)
 		return -ENOMEM;
 	}
 
-	queue = kmalloc_array(VHOST_RX_BATCH, sizeof(struct sk_buff *),
+	queue = kmalloc_array(VHOST_RX_BATCH, sizeof(void *),
 			      GFP_KERNEL);
 	if (!queue) {
 		kfree(vqs);
@@ -1046,23 +1047,23 @@ static struct socket *get_raw_socket(int fd)
 	return ERR_PTR(r);
 }
 
-static struct skb_array *get_tap_skb_array(int fd)
+static struct ptr_ring *get_tap_ptr_ring(int fd)
 {
-	struct skb_array *array;
+	struct ptr_ring *ring;
 	struct file *file = fget(fd);
 
 	if (!file)
 		return NULL;
-	array = tun_get_skb_array(file);
-	if (!IS_ERR(array))
+	ring = tun_get_tx_ring(file);
+	if (!IS_ERR(ring))
 		goto out;
-	array = tap_get_skb_array(file);
-	if (!IS_ERR(array))
+	ring = tap_get_ptr_ring(file);
+	if (!IS_ERR(ring))
 		goto out;
-	array = NULL;
+	ring = NULL;
 out:
 	fput(file);
-	return array;
+	return ring;
 }
 
 static struct socket *get_tap_socket(int fd)
@@ -1143,7 +1144,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
 		vq->private_data = sock;
 		vhost_net_buf_unproduce(nvq);
 		if (index == VHOST_NET_VQ_RX)
-			nvq->rx_array = get_tap_skb_array(fd);
+			nvq->rx_ring = get_tap_ptr_ring(fd);
 		r = vhost_vq_init_access(vq);
 		if (r)
 			goto err_used;
diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h
index 3ecef57..8e66866 100644
--- a/include/linux/if_tap.h
+++ b/include/linux/if_tap.h
@@ -4,7 +4,7 @@
 
 #if IS_ENABLED(CONFIG_TAP)
 struct socket *tap_get_socket(struct file *);
-struct skb_array *tap_get_skb_array(struct file *file);
+struct ptr_ring *tap_get_ptr_ring(struct file *file);
 #else
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -14,7 +14,7 @@ static inline struct socket *tap_get_socket(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
-static inline struct skb_array *tap_get_skb_array(struct file *f)
+static inline struct ptr_ring *tap_get_ptr_ring(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
@@ -70,7 +70,7 @@ struct tap_queue {
 	u16 queue_index;
 	bool enabled;
 	struct list_head next;
-	struct skb_array skb_array;
+	struct ptr_ring ring;
 };
 
 rx_handler_result_t tap_handle_frame(struct sk_buff **pskb);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index bf9bdf4..bdee9b8 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -19,7 +19,7 @@
 
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
-struct skb_array *tun_get_skb_array(struct file *file);
+struct ptr_ring *tun_get_tx_ring(struct file *file);
 #else
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -29,7 +29,7 @@ static inline struct socket *tun_get_socket(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
-static inline struct skb_array *tun_get_skb_array(struct file *f)
+static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
-- 
2.7.4

^ permalink raw reply related

* [PATCH net-next 2/2] tuntap: XDP transmission
From: Jason Wang @ 2017-12-29 10:00 UTC (permalink / raw)
  To: netdev, linux-kernel; +Cc: mst, jbrouer, Jason Wang, Jesper Dangaard Brouer
In-Reply-To: <1514541604-12728-1-git-send-email-jasowang@redhat.com>

This patch implements XDP transmission for TAP. Since we can't create
new queues for TAP during XDP set, exist ptr_ring was reused for
queuing XDP buffers. To differ xdp_buff from sk_buff, TUN_XDP_FLAG
(0x1ULL) was encoded into lowest bit of xpd_buff pointer during
ptr_ring_produce, and was decoded during consuming. XDP metadata was
stored in the headroom of the packet which should work in most of
cases since driver usually reserve enough headroom. Very minor changes
were done for vhost_net: it just need to peek the length depends on
the type of pointer.

Tests was done on two Intel E5-2630 2.40GHz machines connected back to
back through two 82599ES. Traffic were generated through MoonGen and
testpmd(rxonly) in guest reports 2.97Mpps when xdp_redirect_map is
doing redirection from ixgbe to TAP.

Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 drivers/net/tun.c      | 205 ++++++++++++++++++++++++++++++++++++++++---------
 drivers/vhost/net.c    |  13 +++-
 include/linux/if_tun.h |  17 ++++
 3 files changed, 197 insertions(+), 38 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2c89efe..be6d993 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -240,6 +240,24 @@ struct tun_struct {
 	struct tun_steering_prog __rcu *steering_prog;
 };
 
+bool tun_is_xdp_buff(void *ptr)
+{
+	return (unsigned long)ptr & TUN_XDP_FLAG;
+}
+EXPORT_SYMBOL(tun_is_xdp_buff);
+
+void *tun_xdp_to_ptr(void *ptr)
+{
+	return (void *)((unsigned long)ptr | TUN_XDP_FLAG);
+}
+EXPORT_SYMBOL(tun_xdp_to_ptr);
+
+void *tun_ptr_to_xdp(void *ptr)
+{
+	return (void *)((unsigned long)ptr & ~TUN_XDP_FLAG);
+}
+EXPORT_SYMBOL(tun_ptr_to_xdp);
+
 static int tun_napi_receive(struct napi_struct *napi, int budget)
 {
 	struct tun_file *tfile = container_of(napi, struct tun_file, napi);
@@ -630,12 +648,25 @@ static struct tun_struct *tun_enable_queue(struct tun_file *tfile)
 	return tun;
 }
 
+static void tun_ptr_free(void *ptr)
+{
+	if (!ptr)
+		return;
+	if (tun_is_xdp_buff(ptr)) {
+		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+
+		put_page(virt_to_head_page(xdp->data));
+	} else {
+		__skb_array_destroy_skb(ptr);
+	}
+}
+
 static void tun_queue_purge(struct tun_file *tfile)
 {
-	struct sk_buff *skb;
+	void *ptr;
 
-	while ((skb = ptr_ring_consume(&tfile->tx_ring)) != NULL)
-		kfree_skb(skb);
+	while ((ptr = ptr_ring_consume(&tfile->tx_ring)) != NULL)
+		tun_ptr_free(ptr);
 
 	skb_queue_purge(&tfile->sk.sk_write_queue);
 	skb_queue_purge(&tfile->sk.sk_error_queue);
@@ -688,8 +719,7 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
 				unregister_netdevice(tun->dev);
 		}
 		if (tun)
-			ptr_ring_cleanup(&tfile->tx_ring,
-					 __skb_array_destroy_skb);
+			ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free);
 		sock_put(&tfile->sk);
 	}
 }
@@ -1201,6 +1231,54 @@ static const struct net_device_ops tun_netdev_ops = {
 	.ndo_get_stats64	= tun_net_get_stats64,
 };
 
+static int tun_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	struct xdp_buff *buff = xdp->data_hard_start;
+	int headroom = xdp->data - xdp->data_hard_start;
+	struct tun_file *tfile;
+	u32 numqueues;
+	int ret = 0;
+
+	/* Assure headroom is available and buff is properly aligned */
+	if (unlikely(headroom < sizeof(*xdp) || tun_is_xdp_buff(xdp)))
+		return -ENOSPC;
+
+	*buff = *xdp;
+
+	rcu_read_lock();
+
+	numqueues = READ_ONCE(tun->numqueues);
+	if (!numqueues) {
+		ret = -ENOSPC;
+		goto out;
+	}
+	tfile = rcu_dereference(tun->tfiles[smp_processor_id() %
+					    numqueues]);
+	/* Encode the XDP flag into lowest bit for consumer to differ
+	 * XDP buffer from sk_buff.
+	 */
+	if (ptr_ring_produce(&tfile->tx_ring, tun_xdp_to_ptr(buff))) {
+		this_cpu_inc(tun->pcpu_stats->tx_dropped);
+		ret = -ENOSPC;
+	}
+
+out:
+	rcu_read_unlock();
+	return ret;
+}
+
+static void tun_xdp_flush(struct net_device *dev)
+{
+	struct tun_struct *tun = netdev_priv(dev);
+	struct tun_file *tfile = tun->tfiles[0];
+
+	/* Notify and wake up reader process */
+	if (tfile->flags & TUN_FASYNC)
+		kill_fasync(&tfile->fasync, SIGIO, POLL_IN);
+	tfile->socket.sk->sk_data_ready(tfile->socket.sk);
+}
+
 static const struct net_device_ops tap_netdev_ops = {
 	.ndo_uninit		= tun_net_uninit,
 	.ndo_open		= tun_net_open,
@@ -1218,6 +1296,8 @@ static const struct net_device_ops tap_netdev_ops = {
 	.ndo_set_rx_headroom	= tun_set_headroom,
 	.ndo_get_stats64	= tun_net_get_stats64,
 	.ndo_bpf		= tun_xdp,
+	.ndo_xdp_xmit		= tun_xdp_xmit,
+	.ndo_xdp_flush		= tun_xdp_flush,
 };
 
 static void tun_flow_init(struct tun_struct *tun)
@@ -1841,6 +1921,40 @@ static ssize_t tun_chr_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	return result;
 }
 
+static ssize_t tun_put_user_xdp(struct tun_struct *tun,
+				struct tun_file *tfile,
+				struct xdp_buff *xdp,
+				struct iov_iter *iter)
+{
+	int vnet_hdr_sz = 0;
+	size_t size = xdp->data_end - xdp->data;
+	struct tun_pcpu_stats *stats;
+	size_t ret;
+
+	if (tun->flags & IFF_VNET_HDR) {
+		struct virtio_net_hdr gso = { 0 };
+
+		vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
+		if (unlikely(iov_iter_count(iter) < vnet_hdr_sz))
+			return -EINVAL;
+		if (unlikely(copy_to_iter(&gso, sizeof(gso), iter) !=
+			     sizeof(gso)))
+			return -EFAULT;
+		iov_iter_advance(iter, vnet_hdr_sz - sizeof(gso));
+	}
+
+	ret = copy_to_iter(xdp->data, size, iter) + vnet_hdr_sz;
+
+	stats = get_cpu_ptr(tun->pcpu_stats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->tx_packets++;
+	stats->tx_bytes += ret;
+	u64_stats_update_end(&stats->syncp);
+	put_cpu_ptr(tun->pcpu_stats);
+
+	return ret;
+}
+
 /* Put packet to the user space buffer */
 static ssize_t tun_put_user(struct tun_struct *tun,
 			    struct tun_file *tfile,
@@ -1938,15 +2052,14 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 	return total;
 }
 
-static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
-				     int *err)
+static void *tun_ring_recv(struct tun_file *tfile, int noblock, int *err)
 {
 	DECLARE_WAITQUEUE(wait, current);
-	struct sk_buff *skb = NULL;
+	void *ptr = NULL;
 	int error = 0;
 
-	skb = ptr_ring_consume(&tfile->tx_ring);
-	if (skb)
+	ptr = ptr_ring_consume(&tfile->tx_ring);
+	if (ptr)
 		goto out;
 	if (noblock) {
 		error = -EAGAIN;
@@ -1957,8 +2070,8 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
 	current->state = TASK_INTERRUPTIBLE;
 
 	while (1) {
-		skb = ptr_ring_consume(&tfile->tx_ring);
-		if (skb)
+		ptr = ptr_ring_consume(&tfile->tx_ring);
+		if (ptr)
 			break;
 		if (signal_pending(current)) {
 			error = -ERESTARTSYS;
@@ -1977,36 +2090,42 @@ static struct sk_buff *tun_ring_recv(struct tun_file *tfile, int noblock,
 
 out:
 	*err = error;
-	return skb;
+	return ptr;
 }
 
 static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile,
 			   struct iov_iter *to,
-			   int noblock, struct sk_buff *skb)
+			   int noblock, void *ptr)
 {
 	ssize_t ret;
 	int err;
 
 	tun_debug(KERN_INFO, tun, "tun_do_read\n");
 
-	if (!iov_iter_count(to)) {
-		if (skb)
-			kfree_skb(skb);
-		return 0;
-	}
+	if (!iov_iter_count(to))
+		tun_ptr_free(ptr);
 
-	if (!skb) {
+	if (!ptr) {
 		/* Read frames from ring */
-		skb = tun_ring_recv(tfile, noblock, &err);
-		if (!skb)
+		ptr = tun_ring_recv(tfile, noblock, &err);
+		if (!ptr)
 			return err;
 	}
 
-	ret = tun_put_user(tun, tfile, skb, to);
-	if (unlikely(ret < 0))
-		kfree_skb(skb);
-	else
-		consume_skb(skb);
+	if (tun_is_xdp_buff(ptr)) {
+		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+
+		ret = tun_put_user_xdp(tun, tfile, xdp, to);
+		put_page(virt_to_head_page(xdp->data));
+	} else {
+		struct sk_buff *skb = ptr;
+
+		ret = tun_put_user(tun, tfile, skb, to);
+		if (unlikely(ret < 0))
+			kfree_skb(skb);
+		else
+			consume_skb(skb);
+	}
 
 	return ret;
 }
@@ -2143,12 +2262,12 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 {
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = tun_get(tfile);
-	struct sk_buff *skb = m->msg_control;
+	void *ptr = m->msg_control;
 	int ret;
 
 	if (!tun) {
 		ret = -EBADFD;
-		goto out_free_skb;
+		goto out_free;
 	}
 
 	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
@@ -2160,7 +2279,7 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 					 SOL_PACKET, TUN_TX_TIMESTAMP);
 		goto out;
 	}
-	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, skb);
+	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT, ptr);
 	if (ret > (ssize_t)total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
@@ -2171,12 +2290,25 @@ static int tun_recvmsg(struct socket *sock, struct msghdr *m, size_t total_len,
 
 out_put_tun:
 	tun_put(tun);
-out_free_skb:
-	if (skb)
-		kfree_skb(skb);
+out_free:
+	tun_ptr_free(ptr);
 	return ret;
 }
 
+static int tun_ptr_peek_len(void *ptr)
+{
+	if (likely(ptr)) {
+		if (tun_is_xdp_buff(ptr)) {
+			struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+
+			return xdp->data_end - xdp->data;
+		}
+		return __skb_array_len_with_tag(ptr);
+	} else {
+		return 0;
+	}
+}
+
 static int tun_peek_len(struct socket *sock)
 {
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
@@ -2187,7 +2319,7 @@ static int tun_peek_len(struct socket *sock)
 	if (!tun)
 		return 0;
 
-	ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, __skb_array_len_with_tag);
+	ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
 	tun_put(tun);
 
 	return ret;
@@ -3110,7 +3242,7 @@ static int tun_queue_resize(struct tun_struct *tun)
 
 	ret = ptr_ring_resize_multiple(rings, n,
 				       dev->tx_queue_len, GFP_KERNEL,
-				       __skb_array_destroy_skb);
+				       tun_ptr_free);
 
 	kfree(rings);
 	return ret;
@@ -3191,8 +3323,7 @@ struct socket *tun_get_socket(struct file *file)
 	struct tun_file *tfile;
 	if (file->f_op != &tun_fops)
 		return ERR_PTR(-EINVAL);
-	tfile = file->private_data;
-	if (!tfile)
+	tfile = file->private_data;	if (!tfile)
 		return ERR_PTR(-EBADFD);
 	return &tfile->socket;
 }
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index c316555..a5a1db6 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -175,6 +175,17 @@ static void vhost_net_buf_unproduce(struct vhost_net_virtqueue *nvq)
 	}
 }
 
+static int vhost_net_buf_peek_len(void *ptr)
+{
+	if (tun_is_xdp_buff(ptr)) {
+		struct xdp_buff *xdp = tun_ptr_to_xdp(ptr);
+
+		return xdp->data_end - xdp->data;
+	}
+
+	return __skb_array_len_with_tag(ptr);
+}
+
 static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
 {
 	struct vhost_net_buf *rxq = &nvq->rxq;
@@ -186,7 +197,7 @@ static int vhost_net_buf_peek(struct vhost_net_virtqueue *nvq)
 		return 0;
 
 out:
-	return __skb_array_len_with_tag(vhost_net_buf_get_ptr(rxq));
+	return vhost_net_buf_peek_len(vhost_net_buf_get_ptr(rxq));
 }
 
 static void vhost_net_buf_init(struct vhost_net_buf *rxq)
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index bdee9b8..1cafdc2 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -17,9 +17,14 @@
 
 #include <uapi/linux/if_tun.h>
 
+#define TUN_XDP_FLAG 0x1ULL
+
 #if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
 struct socket *tun_get_socket(struct file *);
 struct ptr_ring *tun_get_tx_ring(struct file *file);
+bool tun_is_xdp_buff(void *ptr);
+void *tun_xdp_to_ptr(void *ptr);
+void *tun_ptr_to_xdp(void *ptr);
 #else
 #include <linux/err.h>
 #include <linux/errno.h>
@@ -33,5 +38,17 @@ static inline struct ptr_ring *tun_get_tx_ring(struct file *f)
 {
 	return ERR_PTR(-EINVAL);
 }
+static inline bool tun_is_xdp_buff(void *ptr)
+{
+	return false;
+}
+void *tun_xdp_to_ptr(void *ptr)
+{
+	return NULL;
+}
+void *tun_ptr_to_xdp(void *ptr)
+{
+	return NULL;
+}
 #endif /* CONFIG_TUN */
 #endif /* __IF_TUN_H */
-- 
2.7.4

^ permalink raw reply related

* Re: [PATCH][next] wcn36xx: remove redundant assignment to msg_body.min_ch_time
From: Colin Ian King @ 2017-12-29 10:52 UTC (permalink / raw)
  To: Loic Poulain, Bjorn Andersson
  Cc: Eugene Krasnikov, Kalle Valo, wcn36xx, linux-wireless, netdev,
	kernel-janitors, linux-kernel
In-Reply-To: <CAMZdPi8hYToYoPSMggPY-tVpsJNL2W57uzB+d0nSLdfNtPK6Cg@mail.gmail.com>

On 29/12/17 07:44, Loic Poulain wrote:
> Hi Colin, Bjorn,
> 
> On 26 December 2017 at 21:13, Bjorn Andersson
> <bjorn.andersson@linaro.org> wrote:
>> On Tue 19 Dec 09:04 PST 2017, Colin King wrote:
>>
>>> From: Colin Ian King <colin.king@canonical.com>
>>>
>>> msg_body.min_ch_time is being assigned twice; remove the redundant
>>> first assignment.
>>>
>>> Detected by CoverityScan, CID#1463042 ("Unused Value")
>>>
>>
>> Happy to see Coverity working for us :)
>>
>>
>> This should have had a:
>>
>> Fixes: 2f3bef4b247e ("wcn36xx: Add hardware scan offload support")
>>
>>> Signed-off-by: Colin Ian King <colin.king@canonical.com>
>>> ---
>>>  drivers/net/wireless/ath/wcn36xx/smd.c | 1 -
>>>  1 file changed, 1 deletion(-)
>>>
>>> diff --git a/drivers/net/wireless/ath/wcn36xx/smd.c b/drivers/net/wireless/ath/wcn36xx/smd.c
>>> index 2914618a0335..bab2eca5fcac 100644
>>> --- a/drivers/net/wireless/ath/wcn36xx/smd.c
>>> +++ b/drivers/net/wireless/ath/wcn36xx/smd.c
>>> @@ -625,7 +625,6 @@ int wcn36xx_smd_start_hw_scan(struct wcn36xx *wcn, struct ieee80211_vif *vif,
>>>       INIT_HAL_MSG(msg_body, WCN36XX_HAL_START_SCAN_OFFLOAD_REQ);
>>>
>>>       msg_body.scan_type = WCN36XX_HAL_SCAN_TYPE_ACTIVE;
>>> -     msg_body.min_ch_time = 30;
>>>       msg_body.min_ch_time = 100;
>>
>> But I strongly suspect the second line is supposed to be max_ch_time.
>>
>> @Loic, do you agree?
> 
> You're absolutely right.
> Colin could you please update your patch accordingly?

Resent as "wcn36xx: fix incorrect assignment to msg_body.min_ch_time"

> 
> Regards,
> Loic
> --
> To unsubscribe from this list: send the line "unsubscribe kernel-janitors" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox