Netdev List
 help / color / mirror / Atom feed
* [RFC V3 PATCH 12/26] net/netpolicy: NET device hotplug
From: kan.liang @ 2016-09-12 14:55 UTC (permalink / raw)
  To: davem, linux-kernel, netdev
  Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
	kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
	decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
	hannes, stephen, alexei.starovoitov, jesse.brandeburg, andi,
	Kan Liang
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

From: Kan Liang <kan.liang@intel.com>

Support NET device up/down/namechange in the NET policy code.

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 net/core/netpolicy.c | 66 +++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 58 insertions(+), 8 deletions(-)

diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index f56beca..271ecc3 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -688,6 +688,9 @@ static const struct file_operations proc_net_policy_operations = {
 
 static int netpolicy_proc_dev_init(struct net *net, struct net_device *dev)
 {
+	if (dev->proc_dev)
+		proc_remove(dev->proc_dev);
+
 	dev->proc_dev = proc_net_mkdir(net, dev->name, net->proc_netpolicy);
 	if (!dev->proc_dev)
 		return -ENOMEM;
@@ -754,6 +757,19 @@ void uninit_netpolicy(struct net_device *dev)
 	spin_unlock(&dev->np_lock);
 }
 
+static void netpolicy_dev_init(struct net *net,
+			       struct net_device *dev)
+{
+	if (!init_netpolicy(dev)) {
+#ifdef CONFIG_PROC_FS
+		if (netpolicy_proc_dev_init(net, dev))
+			uninit_netpolicy(dev);
+		else
+#endif /* CONFIG_PROC_FS */
+		pr_info("NETPOLICY: Init net policy for %s\n", dev->name);
+	}
+}
+
 static int __net_init netpolicy_net_init(struct net *net)
 {
 	struct net_device *dev, *aux;
@@ -767,14 +783,7 @@ static int __net_init netpolicy_net_init(struct net *net)
 
 	rtnl_lock();
 	for_each_netdev_safe(net, dev, aux) {
-		if (!init_netpolicy(dev)) {
-#ifdef CONFIG_PROC_FS
-			if (netpolicy_proc_dev_init(net, dev))
-				uninit_netpolicy(dev);
-			else
-#endif /* CONFIG_PROC_FS */
-			pr_info("NETPOLICY: Init net policy for %s\n", dev->name);
-		}
+		netpolicy_dev_init(net, dev);
 	}
 	rtnl_unlock();
 
@@ -799,17 +808,58 @@ static struct pernet_operations netpolicy_net_ops = {
 	.exit = netpolicy_net_exit,
 };
 
+static int netpolicy_notify(struct notifier_block *this,
+			    unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+
+	switch (event) {
+	case NETDEV_CHANGENAME:
+#ifdef CONFIG_PROC_FS
+		if (dev->proc_dev) {
+			proc_remove(dev->proc_dev);
+			if ((netpolicy_proc_dev_init(dev_net(dev), dev) < 0) &&
+			    dev->proc_dev) {
+				proc_remove(dev->proc_dev);
+				dev->proc_dev = NULL;
+			}
+		}
+#endif
+		break;
+	case NETDEV_UP:
+		netpolicy_dev_init(dev_net(dev), dev);
+		break;
+	case NETDEV_GOING_DOWN:
+		uninit_netpolicy(dev);
+#ifdef CONFIG_PROC_FS
+		proc_remove(dev->proc_dev);
+		dev->proc_dev = NULL;
+#endif
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block netpolicy_dev_notf = {
+	.notifier_call = netpolicy_notify,
+};
+
 static int __init netpolicy_init(void)
 {
 	int ret;
 
 	ret = register_pernet_subsys(&netpolicy_net_ops);
+	if (!ret)
+		register_netdevice_notifier(&netpolicy_dev_notf);
 
 	return ret;
 }
 
 static void __exit netpolicy_exit(void)
 {
+	unregister_netdevice_notifier(&netpolicy_dev_notf);
 	unregister_pernet_subsys(&netpolicy_net_ops);
 }
 
-- 
2.5.5

^ permalink raw reply related

* [RFC V3 PATCH 14/26] net/netpolicy: handle channel changes
From: kan.liang @ 2016-09-12 14:55 UTC (permalink / raw)
  To: davem, linux-kernel, netdev
  Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
	kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
	decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
	hannes, stephen, alexei.starovoitov, jesse.brandeburg, andi,
	Kan Liang
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

From: Kan Liang <kan.liang@intel.com>

User can uses ethtool to set the channel number. This patch handles the
channel changes by rebuilding the object list.

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 include/linux/netpolicy.h | 8 ++++++++
 net/core/ethtool.c        | 8 +++++++-
 net/core/netpolicy.c      | 1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index f60331d..d6ba9f6 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -80,4 +80,12 @@ struct netpolicy_info {
 	struct list_head	obj_list[NETPOLICY_RXTX][NET_POLICY_MAX];
 };
 
+#ifdef CONFIG_NETPOLICY
+extern void update_netpolicy_sys_map(void);
+#else
+static inline void update_netpolicy_sys_map(void)
+{
+}
+#endif
+
 #endif /*__LINUX_NETPOLICY_H*/
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 9774898..e1f8bd0 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1703,6 +1703,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
 {
 	struct ethtool_channels channels, max;
 	u32 max_rx_in_use = 0;
+	int ret;
 
 	if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels)
 		return -EOPNOTSUPP;
@@ -1726,7 +1727,12 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
 	    (channels.combined_count + channels.rx_count) <= max_rx_in_use)
 	    return -EINVAL;
 
-	return dev->ethtool_ops->set_channels(dev, &channels);
+	ret = dev->ethtool_ops->set_channels(dev, &channels);
+#ifdef CONFIG_NETPOLICY
+	if (!ret)
+		update_netpolicy_sys_map();
+#endif
+	return ret;
 }
 
 static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr)
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 3bf0a44..a739ac7 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -893,6 +893,7 @@ unlock:
 		}
 	}
 }
+EXPORT_SYMBOL(update_netpolicy_sys_map);
 
 static int netpolicy_cpu_callback(struct notifier_block *nfb,
 				  unsigned long action, void *hcpu)
-- 
2.5.5

^ permalink raw reply related

* [RFC V3 PATCH 16/26] net/netpolicy: introduce per socket netpolicy
From: kan.liang @ 2016-09-12 14:55 UTC (permalink / raw)
  To: davem, linux-kernel, netdev
  Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
	kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
	decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
	hannes, stephen, alexei.starovoitov, jesse.brandeburg, andi,
	Kan Liang
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

From: Kan Liang <kan.liang@intel.com>

The network socket is the most basic unit which control the network
traffic. A socket option is needed for user to set their own policy on
socket to improve the network performance.

There is no existing SOCKET options which can be reused. For socket
options, SO_MARK or may be SO_PRIORITY is close to NET policy's
requirement. But they can not be reused for NET policy. SO_MARK can be
used for routing and packet filtering. But the NET policy doesn't intend
to change the routing. It only redirects the packet to the specific
device queue. Also, the target queue is assigned by NET policy subsystem
at run time. It should not be set in advance. SO_PRIORITY can set
protocol-defined priority for all packets on the socket. But the NET
policies don't have priority yet.

This patch introduces a new socket option SO_NETPOLICY to
set/get net policy for socket. so that the application can set its own
policy on socket to improve the network performance.
Per socket net policy can also be inherited by new socket.

The usage of SO_NETPOLICY socket option is as below.
setsockopt(sockfd,SOL_SOCKET,SO_NETPOLICY,&policy,sizeof(int))
getsockopt(sockfd,SOL_SOCKET,SO_NETPOLICY,&policy,sizeof(int))
The policy set by SO_NETPOLICY socket option must be valid and
compatible with current device policy. Othrewise, it will error out. The
socket policy will be set to NET_POLICY_INVALID.

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 arch/alpha/include/uapi/asm/socket.h   |  2 ++
 arch/avr32/include/uapi/asm/socket.h   |  2 ++
 arch/frv/include/uapi/asm/socket.h     |  2 ++
 arch/ia64/include/uapi/asm/socket.h    |  2 ++
 arch/m32r/include/uapi/asm/socket.h    |  2 ++
 arch/mips/include/uapi/asm/socket.h    |  2 ++
 arch/mn10300/include/uapi/asm/socket.h |  2 ++
 arch/parisc/include/uapi/asm/socket.h  |  2 ++
 arch/powerpc/include/uapi/asm/socket.h |  2 ++
 arch/s390/include/uapi/asm/socket.h    |  2 ++
 arch/sparc/include/uapi/asm/socket.h   |  2 ++
 arch/xtensa/include/uapi/asm/socket.h  |  2 ++
 include/net/request_sock.h             |  4 +++-
 include/net/sock.h                     |  9 +++++++++
 include/uapi/asm-generic/socket.h      |  2 ++
 net/core/sock.c                        | 28 ++++++++++++++++++++++++++++
 16 files changed, 66 insertions(+), 1 deletion(-)

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 9e46d6e..06b2ef9 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 1fd147f..24f85f0 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index afbc98f0..82c8d44 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -90,5 +90,7 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 0018fad..b99c1df 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -99,4 +99,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index 5fe42fc..71a43ed 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 2027240a..ce8b9ba 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -108,4 +108,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index 5129f23..c041265 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -90,4 +90,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 9c935d7..2639dcd 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -89,4 +89,6 @@
 
 #define SO_CNX_ADVICE		0x402E
 
+#define SO_NETPOLICY		0x402F
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 1672e33..e04e3b6 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 41b51c2..d43b854 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -96,4 +96,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 31aede3..94a2cdf 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -86,6 +86,8 @@
 
 #define SO_CNX_ADVICE		0x0037
 
+#define SO_NETPOLICY		0x0038
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 81435d9..97f1691 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -101,4 +101,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 6ebe13e..1fa2d0e 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -101,7 +101,9 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
 	sk_tx_queue_clear(req_to_sk(req));
 	req->saved_syn = NULL;
 	atomic_set(&req->rsk_refcnt, 0);
-
+#ifdef CONFIG_NETPOLICY
+	memcpy(&req_to_sk(req)->sk_netpolicy, &sk_listener->sk_netpolicy, sizeof(sk_listener->sk_netpolicy));
+#endif
 	return req;
 }
 
diff --git a/include/net/sock.h b/include/net/sock.h
index c797c57..e1e9e3d 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -70,6 +70,7 @@
 #include <net/checksum.h>
 #include <net/tcp_states.h>
 #include <linux/net_tstamp.h>
+#include <linux/netpolicy.h>
 
 /*
  * This structure really needs to be cleaned up.
@@ -141,6 +142,7 @@ typedef __u64 __bitwise __addrpair;
  *		%SO_OOBINLINE settings, %SO_TIMESTAMPING settings
  *	@skc_incoming_cpu: record/match cpu processing incoming packets
  *	@skc_refcnt: reference count
+ *	@skc_netpolicy: per socket net policy
  *
  *	This is the minimal network layer representation of sockets, the header
  *	for struct sock and struct inet_timewait_sock.
@@ -200,6 +202,10 @@ struct sock_common {
 		struct sock	*skc_listener; /* request_sock */
 		struct inet_timewait_death_row *skc_tw_dr; /* inet_timewait_sock */
 	};
+
+#ifdef CONFIG_NETPOLICY
+	struct netpolicy_instance    skc_netpolicy;
+#endif
 	/*
 	 * fields between dontcopy_begin/dontcopy_end
 	 * are not copied in sock_copy()
@@ -339,6 +345,9 @@ struct sock {
 #define sk_incoming_cpu		__sk_common.skc_incoming_cpu
 #define sk_flags		__sk_common.skc_flags
 #define sk_rxhash		__sk_common.skc_rxhash
+#ifdef CONFIG_NETPOLICY
+#define sk_netpolicy		__sk_common.skc_netpolicy
+#endif
 
 	socket_lock_t		sk_lock;
 	struct sk_buff_head	sk_receive_queue;
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index 67d632f..d2a5aeb 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -92,4 +92,6 @@
 
 #define SO_CNX_ADVICE		53
 
+#define SO_NETPOLICY		54
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 51a7304..80d9f08 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1003,6 +1003,12 @@ set_rcvbuf:
 		if (val == 1)
 			dst_negative_advice(sk);
 		break;
+
+#ifdef CONFIG_NETPOLICY
+	case SO_NETPOLICY:
+		ret = netpolicy_register(&sk->sk_netpolicy, val);
+		break;
+#endif
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -1263,6 +1269,11 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = sk->sk_incoming_cpu;
 		break;
 
+#ifdef CONFIG_NETPOLICY
+	case SO_NETPOLICY:
+		v.val = sk->sk_netpolicy.policy;
+		break;
+#endif
 	default:
 		/* We implement the SO_SNDLOWAT etc to not be settable
 		 * (1003.1g 7).
@@ -1402,6 +1413,12 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 
 		sock_update_classid(&sk->sk_cgrp_data);
 		sock_update_netprioidx(&sk->sk_cgrp_data);
+
+#ifdef CONFIG_NETPOLICY
+		sk->sk_netpolicy.dev = NULL;
+		sk->sk_netpolicy.ptr = (void *)sk;
+		sk->sk_netpolicy.policy = NET_POLICY_INVALID;
+#endif
 	}
 
 	return sk;
@@ -1439,6 +1456,10 @@ static void __sk_destruct(struct rcu_head *head)
 	put_pid(sk->sk_peer_pid);
 	if (likely(sk->sk_net_refcnt))
 		put_net(sock_net(sk));
+#ifdef CONFIG_NETPOLICY
+	if (is_net_policy_valid(sk->sk_netpolicy.policy))
+		netpolicy_unregister(&sk->sk_netpolicy);
+#endif
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
 
@@ -1575,6 +1596,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		if (sock_needs_netstamp(sk) &&
 		    newsk->sk_flags & SK_FLAGS_TIMESTAMP)
 			net_enable_timestamp();
+
+#ifdef CONFIG_NETPOLICY
+		newsk->sk_netpolicy.ptr = (void *)newsk;
+		if (is_net_policy_valid(newsk->sk_netpolicy.policy))
+			netpolicy_register(&newsk->sk_netpolicy, newsk->sk_netpolicy.policy);
+
+#endif
 	}
 out:
 	return newsk;
-- 
2.5.5

^ permalink raw reply related

* [RFC V3 PATCH 17/26] net/netpolicy: introduce netpolicy_pick_queue
From: kan.liang @ 2016-09-12 14:55 UTC (permalink / raw)
  To: davem, linux-kernel, netdev
  Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
	kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
	decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
	hannes, stephen, alexei.starovoitov, jesse.brandeburg, andi,
	Kan Liang
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

From: Kan Liang <kan.liang@intel.com>

To achieve better network performance, the key step is to distribute the
packets to dedicated queues according to policy and system run time
status.

This patch provides an interface which can return the proper dedicated
queue for socket/task. Then the packets of the socket/task will be
redirect to the dedicated queue for better network performance.

For selecting the proper queue, currently it checks the CPU loads and
ref number. The object which has lowest CPU loads and ref number will be
chosen.

The selected object will be stored in hashtable. So it does not need to
go through the whole object list every time.

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 include/linux/netpolicy.h |  12 ++++
 include/linux/sched.h     |   3 +
 kernel/sched/fair.c       |   8 +--
 net/core/netpolicy.c      | 179 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 198 insertions(+), 4 deletions(-)

diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index ee33978..e06b74c 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -85,8 +85,15 @@ struct netpolicy_instance {
 	struct net_device	*dev;
 	enum netpolicy_name	policy; /* required policy */
 	void			*ptr;   /* pointers */
+	struct task_struct	*task;
 };
 
+struct netpolicy_cpu_load {
+	unsigned long		load;
+	struct netpolicy_object	*obj;
+};
+#define LOAD_TOLERANCE	5
+
 /* check if policy is valid */
 static inline int is_net_policy_valid(enum netpolicy_name policy)
 {
@@ -98,6 +105,7 @@ extern void update_netpolicy_sys_map(void);
 extern int netpolicy_register(struct netpolicy_instance *instance,
 			      enum netpolicy_name policy);
 extern void netpolicy_unregister(struct netpolicy_instance *instance);
+extern int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx);
 #else
 static inline void update_netpolicy_sys_map(void)
 {
@@ -112,6 +120,10 @@ static inline void netpolicy_unregister(struct netpolicy_instance *instance)
 {
 }
 
+static inline int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
+{
+	return 0;
+}
 #endif
 
 #endif /*__LINUX_NETPOLICY_H*/
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c68e5..3b716a3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3481,4 +3481,7 @@ void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
 void cpufreq_remove_update_util_hook(int cpu);
 #endif /* CONFIG_CPU_FREQ */
 
+extern unsigned long weighted_cpuload(const int cpu);
+extern unsigned long capacity_of(int cpu);
+
 #endif
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 039de34..a579ba2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1257,10 +1257,10 @@ bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
 	       group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
 }
 
-static unsigned long weighted_cpuload(const int cpu);
+unsigned long weighted_cpuload(const int cpu);
 static unsigned long source_load(int cpu, int type);
 static unsigned long target_load(int cpu, int type);
-static unsigned long capacity_of(int cpu);
+unsigned long capacity_of(int cpu);
 static long effective_load(struct task_group *tg, int cpu, long wl, long wg);
 
 /* Cached statistics for all CPUs within a node */
@@ -4752,7 +4752,7 @@ static void cpu_load_update(struct rq *this_rq, unsigned long this_load,
 }
 
 /* Used instead of source_load when we know the type == 0 */
-static unsigned long weighted_cpuload(const int cpu)
+unsigned long weighted_cpuload(const int cpu)
 {
 	return cfs_rq_runnable_load_avg(&cpu_rq(cpu)->cfs);
 }
@@ -4902,7 +4902,7 @@ static unsigned long target_load(int cpu, int type)
 	return max(rq->cpu_load[type-1], total);
 }
 
-static unsigned long capacity_of(int cpu)
+unsigned long capacity_of(int cpu)
 {
 	return cpu_rq(cpu)->cpu_capacity;
 }
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 503ebd1..e82e0d3 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -40,6 +40,7 @@
 #include <linux/ctype.h>
 #include <linux/cpu.h>
 #include <linux/hashtable.h>
+#include <linux/sched.h>
 
 struct netpolicy_record {
 	struct hlist_node	hash_node;
@@ -293,6 +294,184 @@ static void netpolicy_record_clear_dev_node(struct net_device *dev)
 	spin_unlock_bh(&np_hashtable_lock);
 }
 
+static struct netpolicy_object *get_avail_object(struct net_device *dev,
+						 enum netpolicy_name policy,
+						 struct netpolicy_instance *instance,
+						 bool is_rx)
+{
+	int avail_cpu_num = cpumask_weight(tsk_cpus_allowed(instance->task));
+	int dir = is_rx ? NETPOLICY_RX : NETPOLICY_TX;
+	struct netpolicy_object *tmp, *obj = NULL;
+	unsigned long load = 0, min_load = -1;
+	struct netpolicy_cpu_load *cpu_load;
+	int i = 0, val = -1;
+
+	/* Check if net policy is supported */
+	if (!dev || !dev->netpolicy)
+		goto exit;
+
+	/* The system should have queues which support the request policy. */
+	if ((policy != dev->netpolicy->cur_policy) &&
+	    (dev->netpolicy->cur_policy != NET_POLICY_MIX))
+		goto exit;
+
+	if (!avail_cpu_num)
+		goto exit;
+
+	cpu_load = kcalloc(avail_cpu_num, sizeof(*cpu_load), GFP_KERNEL);
+	if (!cpu_load)
+		goto exit;
+
+	spin_lock_bh(&dev->np_ob_list_lock);
+
+	/* find the lowest load and remove obvious high load objects */
+	list_for_each_entry(tmp, &dev->netpolicy->obj_list[dir][policy], list) {
+		if (!cpumask_test_cpu(tmp->cpu, tsk_cpus_allowed(instance->task)))
+			continue;
+
+#ifdef CONFIG_SMP
+		/* normalized load */
+		load = weighted_cpuload(tmp->cpu) * 100 / capacity_of(tmp->cpu);
+
+		if ((min_load != -1) &&
+		    load > (min_load + LOAD_TOLERANCE))
+			continue;
+#endif
+		cpu_load[i].load = load;
+		cpu_load[i].obj = tmp;
+		if ((min_load == -1) ||
+		    (load < min_load))
+			min_load = load;
+		i++;
+	}
+	avail_cpu_num = i;
+	spin_unlock_bh(&dev->np_ob_list_lock);
+
+	for (i = 0; i < avail_cpu_num; i++) {
+		if (cpu_load[i].load > (min_load + LOAD_TOLERANCE))
+			continue;
+
+		tmp = cpu_load[i].obj;
+		if ((val > atomic_read(&tmp->refcnt)) ||
+		    (val == -1)) {
+			val = atomic_read(&tmp->refcnt);
+			obj = tmp;
+		}
+	}
+
+	if (!obj)
+		goto free_load;
+
+	atomic_inc(&obj->refcnt);
+
+free_load:
+	kfree(cpu_load);
+exit:
+	return obj;
+}
+
+static int get_avail_queue(struct netpolicy_instance *instance, bool is_rx)
+{
+	struct netpolicy_record *old_record, *new_record;
+	struct net_device *dev = instance->dev;
+	unsigned long ptr_id = (uintptr_t)instance->ptr;
+	int queue = -1;
+
+	spin_lock_bh(&np_hashtable_lock);
+	old_record = netpolicy_record_search(ptr_id);
+	if (!old_record) {
+		pr_warn("NETPOLICY: doesn't registered. Remove net policy settings!\n");
+		instance->policy = NET_POLICY_INVALID;
+		goto err;
+	}
+
+	if (is_rx && old_record->rx_obj) {
+		queue = old_record->rx_obj->queue;
+	} else if (!is_rx && old_record->tx_obj) {
+		queue = old_record->tx_obj->queue;
+	} else {
+		new_record = kzalloc(sizeof(*new_record), GFP_KERNEL);
+		if (!new_record)
+			goto err;
+		memcpy(new_record, old_record, sizeof(*new_record));
+
+		if (is_rx) {
+			new_record->rx_obj = get_avail_object(dev, new_record->policy,
+							      instance, is_rx);
+			if (!new_record->dev)
+				new_record->dev = dev;
+			if (!new_record->rx_obj) {
+				kfree(new_record);
+				goto err;
+			}
+			queue = new_record->rx_obj->queue;
+		} else {
+			new_record->tx_obj = get_avail_object(dev, new_record->policy,
+							      instance, is_rx);
+			if (!new_record->dev)
+				new_record->dev = dev;
+			if (!new_record->tx_obj) {
+				kfree(new_record);
+				goto err;
+			}
+			queue = new_record->tx_obj->queue;
+		}
+		/* update record */
+		hlist_replace_rcu(&old_record->hash_node, &new_record->hash_node);
+		kfree(old_record);
+	}
+err:
+	spin_unlock_bh(&np_hashtable_lock);
+	return queue;
+}
+
+static inline bool policy_validate(struct netpolicy_instance *instance)
+{
+	struct net_device *dev = instance->dev;
+	enum netpolicy_name cur_policy;
+
+	cur_policy = dev->netpolicy->cur_policy;
+	if ((instance->policy == NET_POLICY_NONE) ||
+	    (cur_policy == NET_POLICY_NONE))
+		return false;
+
+	if (((cur_policy != NET_POLICY_MIX) && (cur_policy != instance->policy)) ||
+	    ((cur_policy == NET_POLICY_MIX) && (instance->policy == NET_POLICY_CPU))) {
+		pr_warn("NETPOLICY: %s current device policy %s doesn't support required policy %s! Remove net policy settings!\n",
+			dev->name, policy_name[cur_policy],
+			policy_name[instance->policy]);
+		return false;
+	}
+	return true;
+}
+
+/**
+ * netpolicy_pick_queue() - Find proper queue
+ * @instance:	NET policy per socket/task instance info
+ * @is_rx:	RX queue or TX queue
+ *
+ * This function intends to find the proper queue according to policy.
+ * For selecting the proper queue, currently it uses round-robin algorithm
+ * to find the available object from the given policy object list.
+ * The selected object will be stored in hashtable. So it does not need to
+ * go through the whole object list every time.
+ *
+ * Return: negative on failure, otherwise on the assigned queue
+ */
+int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
+{
+	struct net_device *dev = instance->dev;
+
+	if (!dev || !dev->netpolicy)
+		return -EINVAL;
+
+	if (!policy_validate(instance))
+		return -EINVAL;
+
+	return get_avail_queue(instance, is_rx);
+}
+EXPORT_SYMBOL(netpolicy_pick_queue);
+
 /**
  * netpolicy_register() - Register per socket/task policy request
  * @instance:	NET policy per socket/task instance info
-- 
2.5.5

^ permalink raw reply related

* [RFC V3 PATCH 19/26] net/netpolicy: tc bpf extension to pick Tx queue
From: kan.liang @ 2016-09-12 14:55 UTC (permalink / raw)
  To: davem, linux-kernel, netdev
  Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
	kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
	decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
	hannes, stephen, alexei.starovoitov, jesse.brandeburg, andi,
	Kan Liang
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

From: Kan Liang <kan.liang@intel.com>

This patch extends the netpolicy to support tc bpf when selecting Tx
queue. It implements a bpf classifier for clsact qdisc. The classifier
will pick up the proper queue from net policy subsystem. This queue
selection from tc is not compatible with XPS. So XPS will be invalid.

Currently, tc bpf extension only supports the queue selection on egress.
To enable the extension, the following command must be applied.
 # ./tc qdisc add dev $DEVNAME clsact
 # ./tc filter add dev $DEVNAME egress bpf obj netpolicy_kern.o

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 include/uapi/linux/bpf.h  |  8 ++++++++
 net/core/dev.c            |  4 ++--
 net/core/filter.c         | 36 ++++++++++++++++++++++++++++++++++++
 samples/bpf/Makefile      |  1 +
 samples/bpf/bpf_helpers.h |  2 ++
 5 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f896dfa..9c7d847 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -398,6 +398,14 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_change_tail,
 
+	/**
+	 * bpf_netpolicy(skb)
+	 * Netpolicy tc extension. Search for proper Tx queue
+	 * @skb: pointer to skb
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_netpolicy,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index b9a8044..82304ce 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3285,8 +3285,8 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 #ifdef CONFIG_NETPOLICY
 			struct netpolicy_instance *instance;
 
-			queue_index = -1;
-			if (dev->netpolicy && sk) {
+			queue_index = sk_tx_queue_get(sk);
+			if ((queue_index < 0) && dev->netpolicy && sk) {
 				instance = netpolicy_find_instance(sk);
 				if (instance) {
 					if (!instance->dev)
diff --git a/net/core/filter.c b/net/core/filter.c
index a83766b..ce32288 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2351,6 +2351,38 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
 	.arg3_type	= ARG_CONST_STACK_SIZE,
 };
 
+#ifdef CONFIG_NETPOLICY
+static u64 bpf_netpolicy(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (unsigned long) r1;
+	struct netpolicy_instance *instance;
+	struct net_device *dev = skb->dev;
+	struct sock *sk = skb->sk;
+	int queue_index;
+
+	if (dev->netpolicy && sk) {
+		instance = netpolicy_find_instance(sk);
+		if (instance) {
+			if (!instance->dev)
+				instance->dev = dev;
+			queue_index = netpolicy_pick_queue(instance, false);
+			if ((queue_index >= 0) && sk_fullsock(sk) &&
+			    rcu_access_pointer(sk->sk_dst_cache))
+				sk_tx_queue_set(sk, queue_index);
+		}
+	}
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_netpolicy_proto = {
+	.func		= bpf_netpolicy,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+#endif
+
 static const struct bpf_func_proto *
 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 {
@@ -2515,6 +2547,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_skb_under_cgroup:
 		return &bpf_skb_under_cgroup_proto;
+#ifdef CONFIG_NETPOLICY
+	case BPF_FUNC_netpolicy:
+		return &bpf_netpolicy_proto;
+#endif
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 12b7304..4aedbb9 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -85,6 +85,7 @@ always += xdp2_kern.o
 always += test_current_task_under_cgroup_kern.o
 always += trace_event_kern.o
 always += sampleip_kern.o
+always += netpolicy_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 90f44bd..b295bbc 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -88,6 +88,8 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag
 	(void *) BPF_FUNC_l4_csum_replace;
 static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_netpolicy)(void *ctx) =
+	(void *) BPF_FUNC_netpolicy;
 
 #if defined(__x86_64__)
 
-- 
2.5.5

^ permalink raw reply related

* [RFC V3 PATCH 21/26] net/netpolicy: introduce per task net policy
From: kan.liang @ 2016-09-12 14:55 UTC (permalink / raw)
  To: davem, linux-kernel, netdev
  Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
	kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
	decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
	hannes, stephen, alexei.starovoitov, jesse.brandeburg, andi,
	Kan Liang
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

From: Kan Liang <kan.liang@intel.com>

Usually, application as a whole has specific requirement. Applying the
net policy to all sockets one by one in the application is too complex.
This patch introduces per task net policy to address this case.
Once the per task net policy is applied, all the sockets in the
application will apply the same net policy. Also, per task net policy
can be inherited by all children.

The usage of PR_SET_NETPOLICY option is as below.
prctl(PR_SET_NETPOLICY, POLICY_NAME, NULL, NULL, NULL).
It applies per task policy. The policy name must be valid and compatible
with current device policy. Othrewise, it will error out. The task
policy will be set to NET_POLICY_INVALID.

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 include/linux/init_task.h  |  9 +++++++++
 include/linux/sched.h      |  5 +++++
 include/net/sock.h         | 12 +++++++++++-
 include/uapi/linux/prctl.h |  4 ++++
 kernel/exit.c              |  4 ++++
 kernel/fork.c              |  6 ++++++
 kernel/sys.c               | 31 +++++++++++++++++++++++++++++++
 net/core/netpolicy.c       | 35 +++++++++++++++++++++++++++++++++++
 net/core/sock.c            | 10 +++++++++-
 net/ipv4/af_inet.c         |  7 +++++--
 10 files changed, 119 insertions(+), 4 deletions(-)

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f8834f8..133d1cb 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -183,6 +183,14 @@ extern struct task_group root_task_group;
 # define INIT_KASAN(tsk)
 #endif
 
+#ifdef CONFIG_NETPOLICY
+#define INIT_NETPOLICY(tsk)						\
+	.task_netpolicy.policy = NET_POLICY_INVALID,			\
+	.task_netpolicy.dev = NULL,					\
+	.task_netpolicy.ptr = (void *)&tsk,
+#else
+#define INIT_NETPOLICY(tsk)
+#endif
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -260,6 +268,7 @@ extern struct task_group root_task_group;
 	INIT_VTIME(tsk)							\
 	INIT_NUMA_BALANCING(tsk)					\
 	INIT_KASAN(tsk)							\
+	INIT_NETPOLICY(tsk)						\
 }
 
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3b716a3..1c8c674 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -62,6 +62,8 @@ struct sched_param {
 
 #include <asm/processor.h>
 
+#include <linux/netpolicy.h>
+
 #define SCHED_ATTR_SIZE_VER0	48	/* sizeof first published struct */
 
 /*
@@ -1923,6 +1925,9 @@ struct task_struct {
 #ifdef CONFIG_MMU
 	struct task_struct *oom_reaper_list;
 #endif
+#ifdef CONFIG_NETPOLICY
+	struct netpolicy_instance task_netpolicy;
+#endif
 /* CPU-specific state of this task */
 	struct thread_struct thread;
 /*
diff --git a/include/net/sock.h b/include/net/sock.h
index ca97f35..867dc84 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1484,6 +1484,7 @@ void sock_edemux(struct sk_buff *skb);
 #define sock_edemux(skb) sock_efree(skb)
 #endif
 
+void sock_setnetpolicy(struct socket *sock);
 int sock_setsockopt(struct socket *sock, int level, int op,
 		    char __user *optval, unsigned int optlen);
 
@@ -2280,10 +2281,19 @@ extern int sysctl_optmem_max;
 extern __u32 sysctl_wmem_default;
 extern __u32 sysctl_rmem_default;
 
-/* Return netpolicy instance information from socket. */
+/* Return netpolicy instance information from either task or socket.
+ * If both task and socket have netpolicy instance information,
+ * using task's and unregistering socket's. Because task policy is
+ * dominant policy
+ */
 static inline struct netpolicy_instance *netpolicy_find_instance(struct sock *sk)
 {
 #ifdef CONFIG_NETPOLICY
+	if (is_net_policy_valid(current->task_netpolicy.policy)) {
+		if (is_net_policy_valid(sk->sk_netpolicy.policy))
+			netpolicy_unregister(&sk->sk_netpolicy);
+		return &current->task_netpolicy;
+	}
 	if (is_net_policy_valid(sk->sk_netpolicy.policy))
 		return &sk->sk_netpolicy;
 #endif
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759..bc182d2 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,8 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER		3
 # define PR_CAP_AMBIENT_CLEAR_ALL	4
 
+/* Control net policy */
+#define PR_SET_NETPOLICY		48
+#define PR_GET_NETPOLICY		49
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 2f974ae..37841da 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -858,6 +858,10 @@ void do_exit(long code)
 	if (unlikely(current->pi_state_cache))
 		kfree(current->pi_state_cache);
 #endif
+#ifdef CONFIG_NETPOLICY
+	if (is_net_policy_valid(current->task_netpolicy.policy))
+		netpolicy_unregister(&current->task_netpolicy);
+#endif
 	/*
 	 * Make sure we are holding no locks:
 	 */
diff --git a/kernel/fork.c b/kernel/fork.c
index 52e725d..fd61b7d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1451,6 +1451,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->sequential_io_avg	= 0;
 #endif
 
+#ifdef CONFIG_NETPOLICY
+	p->task_netpolicy.ptr = (void *)p;
+	if (is_net_policy_valid(p->task_netpolicy.policy))
+		netpolicy_register(&p->task_netpolicy, p->task_netpolicy.policy);
+#endif
+
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	retval = sched_fork(clone_flags, p);
 	if (retval)
diff --git a/kernel/sys.c b/kernel/sys.c
index 89d5be4..b481a64 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2072,6 +2072,31 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
 }
 #endif
 
+#ifdef CONFIG_NETPOLICY
+static int prctl_set_netpolicy(struct task_struct *me, int policy)
+{
+	return netpolicy_register(&me->task_netpolicy, policy);
+}
+
+static int prctl_get_netpolicy(struct task_struct *me, unsigned long adr)
+{
+	return put_user(me->task_netpolicy.policy, (int __user *)adr);
+}
+
+#else /* CONFIG_NETPOLICY */
+
+static int prctl_set_netpolicy(struct task_struct *me, int policy)
+{
+	return -EINVAL;
+}
+
+static int prctl_get_netpolicy(struct task_struct *me, unsigned long adr)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_NETPOLICY */
+
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 		unsigned long, arg4, unsigned long, arg5)
 {
@@ -2270,6 +2295,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 	case PR_GET_FP_MODE:
 		error = GET_FP_MODE(me);
 		break;
+	case PR_SET_NETPOLICY:
+		error = prctl_set_netpolicy(me, arg2);
+		break;
+	case PR_GET_NETPOLICY:
+		error = prctl_get_netpolicy(me, arg2);
+		break;
 	default:
 		error = -EINVAL;
 		break;
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 252cbee..60a6d69 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -24,6 +24,35 @@
  *	  is too difficult for users.
  * 	So, it is a big challenge to get good network performance.
  *
+ * NET policy supports four policies per device, and three policies per task
+ * and per socket. For using NET policy, the device policy must be set in
+ * advance. The task policy or socket policy must be compatible with device
+ * policy.
+ *
+ * BULK policy		This policy is designed for high throughput. It can be
+ *			applied to either device policy or task/socket policy.
+ *			If it is applied to device policy, the only compatible
+ *			task/socket policy is BULK policy itself.
+ * CPU policy		This policy is designed for high throughput and lower
+ *			CPU utilization. It can be applied to either device
+ *			policy or task/socket policy. If it is applied to
+ *			device policy, the only compatible task/socket policy
+ *			is CPU policy itself.
+ * LATENCY policy	This policy is designed for low latency. It can be
+ *			applied to either device policy or task/socket policy.
+ *			If it is applied to device policy, the only compatible
+ *			task/socket policy is LATENCY policy itself.
+ * MIX policy		This policy can only be applied to device policy. It
+ *			is compatible with BULK and LATENCY policy. This
+ *			policy is designed for the case which miscellaneous
+ *			types of workload running on the device.
+ *
+ * The device policy changes the system configuration and reorganize the
+ * resource on the device, but it does not change the packets behavior.
+ * The task policy and socket policy redirect the packets to get good
+ * performance. If both task policy and socket policy are set in the same
+ * task, task policy will be applied. The task policy can also be inherited by
+ * children.
  */
 #include <linux/module.h>
 #include <linux/kernel.h>
@@ -444,6 +473,12 @@ static inline bool policy_validate(struct netpolicy_instance *instance)
 			policy_name[instance->policy]);
 		return false;
 	}
+
+	/* task policy is dominant policy */
+	if (is_net_policy_valid(current->task_netpolicy.policy) &&
+	    (current->task_netpolicy.policy != instance->policy))
+		return false;
+
 	return true;
 }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 80d9f08..1726a3c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1006,7 +1006,13 @@ set_rcvbuf:
 
 #ifdef CONFIG_NETPOLICY
 	case SO_NETPOLICY:
-		ret = netpolicy_register(&sk->sk_netpolicy, val);
+		if (is_net_policy_valid(current->task_netpolicy.policy) &&
+		    (current->task_netpolicy.policy != val)) {
+			printk_ratelimited(KERN_WARNING "NETPOLICY: new policy is not compatible with task netpolicy\n");
+			ret = -EINVAL;
+		} else {
+			ret = netpolicy_register(&sk->sk_netpolicy, val);
+		}
 		break;
 #endif
 	default:
@@ -1599,6 +1605,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 
 #ifdef CONFIG_NETPOLICY
 		newsk->sk_netpolicy.ptr = (void *)newsk;
+		if (is_net_policy_valid(current->task_netpolicy.policy))
+			newsk->sk_netpolicy.policy = NET_POLICY_INVALID;
 		if (is_net_policy_valid(newsk->sk_netpolicy.policy))
 			netpolicy_register(&newsk->sk_netpolicy, newsk->sk_netpolicy.policy);
 
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 209edc4..71bee44 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -766,8 +766,11 @@ static void sock_netpolicy_manage_flow(struct sock *sk, struct msghdr *msg)
 	if (!instance)
 		return;
 
-	if (!instance->dev)
-		return;
+	if (!instance->dev) {
+		if (!sk->sk_netpolicy.dev)
+			return;
+		instance->dev = sk->sk_netpolicy.dev;
+	}
 
 	flow = &instance->flow;
 	/* TODO: need to change here and add more protocol support */
-- 
2.5.5

^ permalink raw reply related

* [RFC V3 PATCH 24/26] net/netpolicy: optimize for queue pair
From: kan.liang @ 2016-09-12 14:55 UTC (permalink / raw)
  To: davem, linux-kernel, netdev
  Cc: jeffrey.t.kirsher, mingo, peterz, kuznet, jmorris, yoshfuji,
	kaber, akpm, keescook, viro, gorcunov, john.stultz, aduyck, ben,
	decot, fw, alexander.duyck, daniel, tom, rdunlap, xiyou.wangcong,
	hannes, stephen, alexei.starovoitov, jesse.brandeburg, andi,
	Kan Liang
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

From: Kan Liang <kan.liang@intel.com>

Some drivers like i40e driver does not support separate Tx and Rx queues
as channels. Using Rx queue to stand for the channels, if queue_pair is
set by driver.

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 include/linux/netpolicy.h | 1 +
 net/core/netpolicy.c      | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/include/linux/netpolicy.h b/include/linux/netpolicy.h
index 88f4f60..9b03b4d 100644
--- a/include/linux/netpolicy.h
+++ b/include/linux/netpolicy.h
@@ -83,6 +83,7 @@ struct netpolicy_info {
 	unsigned long avail_policy[BITS_TO_LONGS(NET_POLICY_MAX)];
 	bool irq_affinity;
 	bool has_mix_policy;
+	bool queue_pair;
 	/* cpu and queue mapping information */
 	struct netpolicy_sys_info	sys_info;
 	/* List of policy objects 0 rx 1 tx */
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 2f55a14..84503a4 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -538,6 +538,9 @@ int netpolicy_pick_queue(struct netpolicy_instance *instance, bool is_rx)
 	if (!policy_validate(instance))
 		return -EINVAL;
 
+	if (dev->netpolicy->queue_pair)
+		is_rx = true;
+
 	/* fast path */
 	rcu_read_lock();
 	version = rcu_dereference(dev->netpolicy->sys_info.version)->major;
-- 
2.5.5

^ permalink raw reply related

* Re: [PATCH v3 3/9] ARM: sun8i: dt: Add DT bindings documentation for Allwinner sun8i-emac
From: LABBE Corentin @ 2016-09-12 15:01 UTC (permalink / raw)
  To: Andrew Lunn
  Cc: robh+dt, mark.rutland, maxime.ripard, wens, linux, davem, netdev,
	linux-kernel, linux-arm-kernel, devicetree
In-Reply-To: <20160909140413.GD30871@lunn.ch>

On Fri, Sep 09, 2016 at 04:04:13PM +0200, Andrew Lunn wrote:
> > +The device node referenced by "phy" or "phy-handle" should be a child node
> > +of this node. See phy.txt for the generic PHY bindings.
> 
> I've not looked at the code yet, but is this really true? Generally
> there is not this limitation. You can point to any Ethernet phy
> anyway, so long as it is on am MDIO bus.
> 
> > +
> > +Optional properties:
> > +- allwinner,tx-delay: TX clock delay chain value. Range value is 0-0x07. Default is 0)
> > +- allwinner,rx-delay: RX clock delay chain value. Range value is 0-0x1F. Default is 0)
> > +
> > +The TX/RX clock delay chain settings are board specific.
> > +
> > +Optional properties for "allwinner,sun8i-h3-emac":
> > +- allwinner,leds-active-low: EPHY LEDs are active low
> > +
> > +Example:
> > +
> > +emac: ethernet@01c0b000 {
> > +	compatible = "allwinner,sun8i-h3-emac";
> > +	syscon = <&syscon>;
> > +	reg = <0x01c0b000 0x104>;
> > +	reg-names = "emac";
> > +	interrupts = <GIC_SPI 82 IRQ_TYPE_LEVEL_HIGH>;
> > +	resets = <&ccu RST_BUS_EMAC>, <<&ccu RST_BUS_EPHY>;
> > +	reset-names = "ahb", "ephy";
> > +	clocks = <&ccu CLK_BUS_EMAC>, <&ccu CLK_BUS_EPHY>;
> > +	clock-names = "ahb", "ephy";
> > +	#address-cells = <1>;
> > +	#size-cells = <0>;
> > +
> > +	phy = <&phy1>;
> 
> ethernet.txt say:
> 
> - phy: the same as "phy-handle" property, not recommended for new bindings.
> 
> This is a new binding, please don't support it.
> 
> > +	phy-mode = "mii";
> > +	allwinner,leds-active-low;
> > +
> > +	phy1: ethernet-phy@1 {
> > +		reg = <1>;
> > +	};
> 
> It is normal to place these phy nodes inside an container node called
> mdio.
> 

Hello

Since the MDIO bus is a part of the sun8i-emac, does I really need to create such a mdio node ?
All example I found are mdio bus with separate driver. (others driver have the phy directly in [eg]mac node.

Anyway I try the following patch to solve your comments, but it breaks the PHY finding(Could not attach to PHY).

Regards

-->8--
--- a/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
+++ b/arch/arm/boot/dts/sun8i-h3-orangepi-pc.dts
@@ -166,14 +166,18 @@
        status = "okay";
 };
 
+&mdio {
+       reg = <1>;
+       phy1: ethernet-phy@1 {
+               reg = <1>;
+       };
+};
+
 &emac {
-       phy = <&phy1>;
+       phy-handle = <&phy1>;
        phy-mode = "mii";
        allwinner,leds-active-low;
        status = "okay";
-       phy1: ethernet-phy@1 {
-               reg = <1>;
-       };
 };/arch/arm/boot/dts/sun8i-h3.dtsi
+++ b/arch/arm/boot/dts/sun8i-h3.dtsi
@@ -474,6 +474,11 @@
                        #address-cells = <1>;
                        #size-cells = <0>;
                        status = "disabled";
+
+                       mdio: mdio@0 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                       };
                };
 
                crypto: crypto@1c15000 {
--- a/drivers/net/ethernet/allwinner/sun8i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun8i-emac.c
@@ -2122,7 +2122,7 @@ static int sun8i_emac_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       priv->phy_node = of_parse_phandle(node, "phy", 0);
+       priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
        if (!priv->phy_node) {
                netdev_err(ndev, "No associated PHY\n");
                return -ENODEV;

 
 &crypto {

^ permalink raw reply

* Re: enable/disable temporary IPv6 per prefix
From: Hannes Frederic Sowa @ 2016-09-12 15:15 UTC (permalink / raw)
  To: Oliver Mangold, netdev
In-Reply-To: <74afa986-01e5-729e-8847-2181f576893f@gmail.com>

On 11.09.2016 09:29, Oliver Mangold wrote:
> Hi,
> 
> I have a question as a relatively new user to IPv6. I am wondering if it
> is currently possible to enable/disable the usage of temporary addresses
> on a per-prefix basis. My current understanding is that the feature is
> enabled by the the 'use_tempaddr' sysctl attribute, which is
> per-interface. What I would like to do is disable temp addresses for ULA
> prefixes. Did I miss something and this can already be done, or is it a
> feature planned for the future, maybe? RFC4941 seems to agree that this
> is a valid use case:
> 
> 
> Additionally, sites might wish to selectively enable or disable the use
> of temporary addresses for some prefixes.  For example, a site might
> wish to disable temporary address generation for "Unique local" [ULA]
> prefixes while still generating temporary addresses for all other global
> prefixes.  Another site might wish to enable temporary address
> generation only for the prefixes 2001::/16 and 2002::/16, while
> disabling it for all other prefixes. To support this behavior,
> implementations SHOULD provide a way to enable and disable generation of
> temporary addresses for specific prefix subranges.  This per-prefix
> setting SHOULD override the global settings on the node with respect to
> the specified prefix subranges.  Note that the pre-prefix setting can be
> applied at any granularity, and not necessarily on a per-subnet basis.

We don't support such a feature (yet). I think NetworkManager also
doesn't implement it so far.

Bye,
Hannes

^ permalink raw reply

* Re: [PATCH v3 3/9] ARM: sun8i: dt: Add DT bindings documentation for Allwinner sun8i-emac
From: Andrew Lunn @ 2016-09-12 15:15 UTC (permalink / raw)
  To: LABBE Corentin
  Cc: robh+dt, mark.rutland, maxime.ripard, wens, linux, davem, netdev,
	linux-kernel, linux-arm-kernel, devicetree
In-Reply-To: <20160912150125.GA15570@Red>

> Hello
> 

> Since the MDIO bus is a part of the sun8i-emac, does I really need
> to create such a mdio node ?

It is good practice. Part of the issue is that there are no written
guidelines, so different drivers do different things. I'm trying to
push all new drivers to have an MDIO node.

> Anyway I try the following patch to solve your comments, but it
> breaks the PHY finding(Could not attach to PHY).

> --- a/drivers/net/ethernet/allwinner/sun8i-emac.c
> +++ b/drivers/net/ethernet/allwinner/sun8i-emac.c
> @@ -2122,7 +2122,7 @@ static int sun8i_emac_probe(struct platform_device *pdev)
>                 return -EINVAL;
>         }
>  
> -       priv->phy_node = of_parse_phandle(node, "phy", 0);
> +       priv->phy_node = of_parse_phandle(node, "phy-handle", 0);
>         if (!priv->phy_node) {
>                 netdev_err(ndev, "No associated PHY\n");
>                 return -ENODEV;
> 
>  
>  &crypto {
> 


I don't see a change here for of_mdiobus_register(). You need to pass
the mdio node.

    Andrew

^ permalink raw reply

* [PATCH iproute2 net-next v1 2/7] tipc: add link monitor set threshold
From: Parthasarathy Bhuvaragan @ 2016-09-12 15:17 UTC (permalink / raw)
  To: netdev; +Cc: jon.maloy, tipc-discussion
In-Reply-To: <1473693441-14254-1-git-send-email-parthasarathy.bhuvaragan@ericsson.com>

The command sets the activation threshold for the new
cluster ring supervision.
A sample usage is shown below:
$ tipc link monitor set threshold 4

$ tipc link monitor set -h
Usage: tipc monitor set PPROPERTY

PROPERTIES
 threshold SIZE - Set activation threshold for monitor

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Tested-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
---
 tipc/link.c | 69 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 68 insertions(+), 1 deletion(-)

diff --git a/tipc/link.c b/tipc/link.c
index 8bdc98224d39..3469cd302469 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -489,6 +489,71 @@ static int cmd_link_set(struct nlmsghdr *nlh, const struct cmd *cmd,
 	return run_cmd(nlh, cmd, cmds, cmdl, NULL);
 }
 
+static int cmd_link_mon_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
+				 struct cmdl *cmdl, void *data)
+{
+	int size;
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlattr *attrs;
+
+	if (cmdl->argc != cmdl->optind + 1) {
+		fprintf(stderr, "error, missing value\n");
+		return -EINVAL;
+	}
+	size = atoi(shift_cmdl(cmdl));
+
+	if (!(nlh = msg_init(buf, TIPC_NL_MON_SET))) {
+		fprintf(stderr, "error, message initialisation failed\n");
+		return -1;
+	}
+	attrs = mnl_attr_nest_start(nlh, TIPC_NLA_MON);
+
+	mnl_attr_put_u32(nlh, TIPC_NLA_MON_ACTIVATION_THRESHOLD, size);
+
+	mnl_attr_nest_end(nlh, attrs);
+
+	return msg_doit(nlh, NULL, NULL);
+}
+
+static void cmd_link_mon_set_help(struct cmdl *cmdl)
+{
+	fprintf(stderr, "Usage: %s monitor set PPROPERTY\n\n"
+		"PROPERTIES\n"
+		" threshold SIZE	- Set monitor activation threshold\n",
+		cmdl->argv[0]);
+}
+
+static int cmd_link_mon_set(struct nlmsghdr *nlh, const struct cmd *cmd,
+			    struct cmdl *cmdl, void *data)
+{
+	const struct cmd cmds[] = {
+		{ "threshold",	cmd_link_mon_set_prop,	NULL },
+		{ NULL }
+	};
+
+	return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
+static void cmd_link_mon_help(struct cmdl *cmdl)
+{
+	fprintf(stderr,
+		"Usage: %s montior COMMAND [ARGS] ...\n\n"
+		"COMMANDS\n"
+		" set                  - Set monitor properties\n",
+		cmdl->argv[0]);
+}
+
+static int cmd_link_mon(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
+			void *data)
+{
+	const struct cmd cmds[] = {
+		{ "set",	cmd_link_mon_set,	cmd_link_mon_set_help },
+		{ NULL }
+	};
+
+	return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
 void cmd_link_help(struct cmdl *cmdl)
 {
 	fprintf(stderr,
@@ -498,7 +563,8 @@ void cmd_link_help(struct cmdl *cmdl)
 		" list                  - List links\n"
 		" get                   - Get various link properties\n"
 		" set                   - Set various link properties\n"
-		" statistics            - Show or reset statistics\n",
+		" statistics            - Show or reset statistics\n"
+		" monitor               - Show or set link supervision\n",
 		cmdl->argv[0]);
 }
 
@@ -510,6 +576,7 @@ int cmd_link(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl,
 		{ "list",	cmd_link_list,	NULL },
 		{ "set",	cmd_link_set,	cmd_link_set_help },
 		{ "statistics", cmd_link_stat,	cmd_link_stat_help },
+		{ "monitor",	cmd_link_mon,	cmd_link_mon_help },
 		{ NULL }
 	};
 
-- 
2.1.4


------------------------------------------------------------------------------

^ permalink raw reply related

* [PATCH iproute2 net-next v1 3/7] tipc: add link monitor get threshold
From: Parthasarathy Bhuvaragan @ 2016-09-12 15:17 UTC (permalink / raw)
  To: netdev; +Cc: jon.maloy, tipc-discussion
In-Reply-To: <1473693441-14254-1-git-send-email-parthasarathy.bhuvaragan@ericsson.com>

The command prints the monitor activation threshold.
A sample usage is shown below:
$ tipc link monitor get threshold
32

$ tipc link monitor get -h
Usage: tipc monitor get PPROPERTY

PROPERTIES
 threshold      - Get monitor activation threshold

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Tested-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
---
 tipc/link.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/tipc/link.c b/tipc/link.c
index 3469cd302469..3f0c32106772 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -534,12 +534,65 @@ static int cmd_link_mon_set(struct nlmsghdr *nlh, const struct cmd *cmd,
 	return run_cmd(nlh, cmd, cmds, cmdl, NULL);
 }
 
+static void cmd_link_mon_get_help(struct cmdl *cmdl)
+{
+	fprintf(stderr, "Usage: %s monitor get PPROPERTY \n\n"
+		"PROPERTIES\n"
+		" threshold 	- Get monitor activation threshold\n",
+		cmdl->argv[0]);
+}
+
+static int link_mon_get_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+	struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+	struct nlattr *attrs[TIPC_NLA_MON_MAX + 1] = {};
+
+	mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+	if (!info[TIPC_NLA_MON])
+		return MNL_CB_ERROR;
+
+	mnl_attr_parse_nested(info[TIPC_NLA_MON], parse_attrs, attrs);
+	if (!attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD])
+		return MNL_CB_ERROR;
+
+	printf("%u\n",
+	       mnl_attr_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]));
+
+	return MNL_CB_OK;
+}
+
+static int cmd_link_mon_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
+				 struct cmdl *cmdl, void *data)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+
+	if (!(nlh = msg_init(buf, TIPC_NL_MON_GET))) {
+		fprintf(stderr, "error, message initialisation failed\n");
+		return -1;
+	}
+
+	return msg_doit(nlh,	link_mon_get_cb,	NULL);
+}
+
+static int cmd_link_mon_get(struct nlmsghdr *nlh, const struct cmd *cmd,
+			    struct cmdl *cmdl, void *data)
+{
+	const struct cmd cmds[] = {
+		{ "threshold",	cmd_link_mon_get_prop,	NULL},
+		{ NULL }
+	};
+
+	return run_cmd(nlh, cmd, cmds, cmdl, NULL);
+}
+
 static void cmd_link_mon_help(struct cmdl *cmdl)
 {
 	fprintf(stderr,
 		"Usage: %s montior COMMAND [ARGS] ...\n\n"
 		"COMMANDS\n"
-		" set                  - Set monitor properties\n",
+		" set			- Set monitor properties\n"
+		" get			- Get monitor properties\n",
 		cmdl->argv[0]);
 }
 
@@ -548,6 +601,7 @@ static int cmd_link_mon(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl
 {
 	const struct cmd cmds[] = {
 		{ "set",	cmd_link_mon_set,	cmd_link_mon_set_help },
+		{ "get",	cmd_link_mon_get,	cmd_link_mon_get_help },
 		{ NULL }
 	};
 
-- 
2.1.4


------------------------------------------------------------------------------

^ permalink raw reply related

* [PATCH iproute2 net-next v1 5/7] tipc: refractor bearer to facilitate link monitor
From: Parthasarathy Bhuvaragan @ 2016-09-12 15:17 UTC (permalink / raw)
  To: netdev; +Cc: jon.maloy, tipc-discussion
In-Reply-To: <1473693441-14254-1-git-send-email-parthasarathy.bhuvaragan@ericsson.com>

In this commit, we:
1. Export print_bearer_media()
2. Move the bearer name handling from nl_add_bearer_name() into
   a new function cmd_get_unique_bearer_name().

These exported functions will be used by link monitor used in
subsequent commits.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
---
 tipc/bearer.c | 75 +++++++++++++++++++++++++++++++++++------------------------
 tipc/bearer.h |  4 ++++
 2 files changed, 48 insertions(+), 31 deletions(-)

diff --git a/tipc/bearer.c b/tipc/bearer.c
index 8729dad4a060..810344f672af 100644
--- a/tipc/bearer.c
+++ b/tipc/bearer.c
@@ -45,7 +45,7 @@ static void _print_bearer_opts(void)
 		" window                - Bearer link window\n");
 }
 
-static void _print_bearer_media(void)
+void print_bearer_media(void)
 {
 	fprintf(stderr,
 		"\nMEDIA\n"
@@ -192,14 +192,28 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts,
 }
 
 static int nl_add_bearer_name(struct nlmsghdr *nlh, const struct cmd *cmd,
-			   struct cmdl *cmdl, struct opt *opts,
-			   struct tipc_sup_media sup_media[])
+			      struct cmdl *cmdl, struct opt *opts,
+			      const struct tipc_sup_media *sup_media)
+{
+	char bname[TIPC_MAX_BEARER_NAME];
+	int err;
+
+	if ((err = cmd_get_unique_bearer_name(cmd, cmdl, opts, bname, sup_media)))
+		return err;
+
+	mnl_attr_put_strz(nlh, TIPC_NLA_BEARER_NAME, bname);
+	return 0;
+}
+
+int cmd_get_unique_bearer_name(const struct cmd *cmd, struct cmdl *cmdl,
+			       struct opt *opts, char *bname,
+			       const struct tipc_sup_media *sup_media)
 {
-	char id[TIPC_MAX_BEARER_NAME];
 	char *media;
 	char *identifier;
 	struct opt *opt;
-	struct tipc_sup_media *entry;
+	const struct tipc_sup_media *entry;
+
 
 	if (!(opt = get_opt(opts, "media"))) {
 		if (help_flag)
@@ -219,13 +233,12 @@ static int nl_add_bearer_name(struct nlmsghdr *nlh, const struct cmd *cmd,
 				(entry->help)(cmdl, media);
 			else
 				fprintf(stderr, "error, missing bearer %s\n",
-						entry->identifier);
+					entry->identifier);
 			return -EINVAL;
 		}
 
 		identifier = opt->val;
-		snprintf(id, sizeof(id), "%s:%s", media, identifier);
-		mnl_attr_put_strz(nlh, TIPC_NLA_BEARER_NAME, id);
+		snprintf(bname, TIPC_MAX_BEARER_NAME, "%s:%s", media, identifier);
 
 		return 0;
 	}
@@ -270,13 +283,13 @@ static int udp_bearer_add(struct nlmsghdr *nlh, struct opt *opts,
 
 		if ((err = getaddrinfo(ip, remport, &hints, &addr))) {
 			fprintf(stderr, "UDP address error: %s\n",
-					gai_strerror(err));
+				gai_strerror(err));
 			freeaddrinfo(addr);
 			return err;
 		}
 
 		mnl_attr_put(nlh, TIPC_NLA_UDP_REMOTE, addr->ai_addrlen,
-				addr->ai_addr);
+			     addr->ai_addr);
 		freeaddrinfo(addr);
 	} else {
 		fprintf(stderr, "error, missing remoteip\n");
@@ -302,7 +315,7 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const struct cmd *cmd,
 		{ "media",		OPT_KEYVAL,	NULL },
 		{ NULL }
 	};
-	struct tipc_sup_media sup_media[] = {
+	const struct tipc_sup_media sup_media[] = {
 		{ "udp",	"name",		cmd_bearer_add_udp_help},
 		{ NULL, },
 	};
@@ -366,7 +379,7 @@ static void cmd_bearer_enable_help(struct cmdl *cmdl)
 		" domain DOMAIN         - Discovery domain\n"
 		" priority PRIORITY     - Bearer priority\n",
 		cmdl->argv[0]);
-	_print_bearer_media();
+	print_bearer_media();
 }
 
 static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd,
@@ -389,9 +402,9 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd,
 		{ NULL }
 	};
 	struct tipc_sup_media sup_media[] = {
-		{ "udp",	"name",		cmd_bearer_enable_udp_help},
-		{ "eth",	"device",	cmd_bearer_enable_l2_help },
-		{ "ib",		"device",	cmd_bearer_enable_l2_help },
+		{ "udp",        "name",         cmd_bearer_enable_udp_help},
+		{ "eth",        "device",       cmd_bearer_enable_l2_help },
+		{ "ib",         "device",       cmd_bearer_enable_l2_help },
 		{ NULL, },
 	};
 
@@ -449,7 +462,7 @@ static void cmd_bearer_disable_help(struct cmdl *cmdl)
 {
 	fprintf(stderr, "Usage: %s bearer disable media MEDIA ARGS...\n",
 		cmdl->argv[0]);
-	_print_bearer_media();
+	print_bearer_media();
 }
 
 static int cmd_bearer_disable(struct nlmsghdr *nlh, const struct cmd *cmd,
@@ -465,9 +478,9 @@ static int cmd_bearer_disable(struct nlmsghdr *nlh, const struct cmd *cmd,
 		{ NULL }
 	};
 	struct tipc_sup_media sup_media[] = {
-		{ "udp",	"name",		cmd_bearer_disable_udp_help},
-		{ "eth",	"device",	cmd_bearer_disable_l2_help },
-		{ "ib",		"device",	cmd_bearer_disable_l2_help },
+		{ "udp",        "name",         cmd_bearer_disable_udp_help},
+		{ "eth",        "device",       cmd_bearer_disable_l2_help },
+		{ "ib",         "device",       cmd_bearer_disable_l2_help },
 		{ NULL, },
 	};
 
@@ -497,7 +510,7 @@ static void cmd_bearer_set_help(struct cmdl *cmdl)
 	fprintf(stderr, "Usage: %s bearer set OPTION media MEDIA ARGS...\n",
 		cmdl->argv[0]);
 	_print_bearer_opts();
-	_print_bearer_media();
+	print_bearer_media();
 }
 
 static void cmd_bearer_set_udp_help(struct cmdl *cmdl, char *media)
@@ -516,7 +529,7 @@ static void cmd_bearer_set_l2_help(struct cmdl *cmdl, char *media)
 }
 
 static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
-			 struct cmdl *cmdl, void *data)
+			       struct cmdl *cmdl, void *data)
 {
 	int err;
 	int val;
@@ -531,9 +544,9 @@ static int cmd_bearer_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
 		{ NULL }
 	};
 	struct tipc_sup_media sup_media[] = {
-		{ "udp",	"name",		cmd_bearer_set_udp_help},
-		{ "eth",	"device",	cmd_bearer_set_l2_help },
-		{ "ib",		"device",	cmd_bearer_set_l2_help },
+		{ "udp",        "name",         cmd_bearer_set_udp_help},
+		{ "eth",        "device",       cmd_bearer_set_l2_help },
+		{ "ib",         "device",       cmd_bearer_set_l2_help },
 		{ NULL, },
 	};
 
@@ -592,7 +605,7 @@ static void cmd_bearer_get_help(struct cmdl *cmdl)
 	fprintf(stderr, "Usage: %s bearer get [OPTION] media MEDIA ARGS...\n",
 		cmdl->argv[0]);
 	_print_bearer_opts();
-	_print_bearer_media();
+	print_bearer_media();
 }
 
 static void cmd_bearer_get_udp_help(struct cmdl *cmdl, char *media)
@@ -639,7 +652,7 @@ static int bearer_dump_udp_cb(const struct nlmsghdr *nlh, void *data)
 		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *) addr;
 
 		if (!inet_ntop(AF_INET6, &ipv6->sin6_addr, straddr,
-					sizeof(straddr))) {
+			       sizeof(straddr))) {
 			fprintf(stderr, "error, parsing IPv6 addr\n");
 			return MNL_CB_ERROR;
 		}
@@ -705,7 +718,7 @@ static int bearer_get_udp_cb(const struct nlmsghdr *nlh, void *data)
 		switch (cb_data->prop) {
 		case UDP_PROP_IP:
 			if (!inet_ntop(AF_INET6, &ipv6->sin6_addr, straddr,
-						sizeof(straddr))) {
+				       sizeof(straddr))) {
 				fprintf(stderr, "error, parsing IPv6 addr\n");
 				return MNL_CB_ERROR;
 			}
@@ -769,7 +782,7 @@ static int cmd_bearer_get_media(struct nlmsghdr *nlh, const struct cmd *cmd,
 		{ NULL }
 	};
 	struct tipc_sup_media sup_media[] = {
-		{ "udp",	"name",		cmd_bearer_get_udp_help},
+		{ "udp",        "name",         cmd_bearer_get_udp_help},
 		{ NULL, },
 	};
 
@@ -844,9 +857,9 @@ static int cmd_bearer_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
 		{ NULL }
 	};
 	struct tipc_sup_media sup_media[] = {
-		{ "udp",	"name",		cmd_bearer_get_udp_help},
-		{ "eth",	"device",	cmd_bearer_get_l2_help },
-		{ "ib",		"device",	cmd_bearer_get_l2_help },
+		{ "udp",        "name",         cmd_bearer_get_udp_help},
+		{ "eth",        "device",       cmd_bearer_get_l2_help },
+		{ "ib",         "device",       cmd_bearer_get_l2_help },
 		{ NULL, },
 	};
 
diff --git a/tipc/bearer.h b/tipc/bearer.h
index 9459d65ebb5f..c0d099630b27 100644
--- a/tipc/bearer.h
+++ b/tipc/bearer.h
@@ -19,4 +19,8 @@ extern int help_flag;
 int cmd_bearer(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data);
 void cmd_bearer_help(struct cmdl *cmdl);
 
+void print_bearer_media(void);
+int cmd_get_unique_bearer_name(const struct cmd *cmd, struct cmdl *cmdl,
+			       struct opt *opts, char *bname,
+			       const struct tipc_sup_media *sup_media);
 #endif
-- 
2.1.4


------------------------------------------------------------------------------

^ permalink raw reply related

* [PATCH iproute2 net-next v1 6/7] tipc: add link monitor list
From: Parthasarathy Bhuvaragan @ 2016-09-12 15:17 UTC (permalink / raw)
  To: netdev; +Cc: jon.maloy, tipc-discussion
In-Reply-To: <1473693441-14254-1-git-send-email-parthasarathy.bhuvaragan@ericsson.com>

In this commit, we list the monitor attributes. By default it lists
the attributes for all bearers, otherwise the specified bearer.

A sample usage is shown below:
$ tipc link monitor list

bearer eth:data0
node          status monitored generation applied_node_status [non_applied_node:status]
1.1.1         up     direct    16         UU []
1.1.2         up     direct    16         UU []
1.1.3         up     direct    16         UU []

bearer eth:data1
node          status monitored generation applied_node_status [non_applied_node:status]
1.1.1         up     direct    2          UU []
1.1.2         up     direct    3          UU []
1.1.3         up     direct    3          UU []

$ tipc link monitor list media eth device data0

bearer eth:data0
node          status monitored generation applied_node_status [non_applied_node:status]
1.1.1         up     direct    16         UU []
1.1.2         up     direct    16         UU []
1.1.3         up     direct    16         UU []

$ tipc link monitor list -h
Usage: tipc monitor list [ media MEDIA ARGS...]

MEDIA
 udp                   - User Datagram Protocol
 ib                    - Infiniband
 eth                   - Ethernet

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Tested-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
---
 tipc/link.c | 237 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 237 insertions(+)

diff --git a/tipc/link.c b/tipc/link.c
index df93409f2173..0b5c0491a35f 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -22,6 +22,7 @@
 #include "cmdl.h"
 #include "msg.h"
 #include "link.h"
+#include "bearer.h"
 
 static int link_list_cb(const struct nlmsghdr *nlh, void *data)
 {
@@ -558,6 +559,240 @@ static int cmd_link_mon_summary(struct nlmsghdr *nlh, const struct cmd *cmd,
 	return msg_dumpit(nlh, link_mon_summary_cb, NULL);
 }
 
+#define STATUS_WIDTH 7
+#define MAX_NODE_WIDTH 14 /* 255.4095.4095 */
+#define MAX_DOM_GEN_WIDTH 11 /* 65535 */
+#define DIRECTLY_MON_WIDTH 10
+
+#define APPL_NODE_STATUS_WIDTH 5
+
+static int map_get(uint64_t up_map, int i)
+{
+	return (up_map & (1 << i)) >> i;
+}
+
+/* print the applied members, since we know the the members
+ * are listed in ascending order, we print only the state */
+static void link_mon_print_applied(uint16_t applied, uint64_t up_map)
+{
+	int i;
+	char state;
+
+	for (i = 0; i < applied; i++) {
+		/* print the delimiter for every -n- entry */
+		if (i && !(i % APPL_NODE_STATUS_WIDTH))
+			printf(",");
+
+		state = map_get(up_map, i) ? 'U' : 'D';
+		printf("%c", state);
+	}
+}
+
+/* print the non applied members, since we dont know
+ * the members, we print them along with the state */
+static void link_mon_print_non_applied(uint16_t applied, uint16_t member_cnt,
+				       uint64_t up_map,  uint32_t *members)
+{
+	int i;
+	char state;
+
+	printf(" [");
+	for (i = applied; i < member_cnt; i++) {
+		char addr_str[16];
+
+		/* print the delimiter for every entry */
+		if (i != applied)
+			printf(",");
+
+		sprintf(addr_str, "%u.%u.%u:", tipc_zone(members[i]),
+			tipc_cluster(members[i]), tipc_node(members[i]));
+		state = map_get(up_map, i) ? 'U' : 'D';
+		printf("%s%c", addr_str, state);
+	}
+	printf("]");
+}
+
+static void link_mon_print_peer_state(const uint32_t addr, const char *status,
+				      const char *monitored,
+				      const uint32_t dom_gen)
+{
+	char addr_str[16];
+
+	sprintf(addr_str, "%u.%u.%u", tipc_zone(addr), tipc_cluster(addr),
+		tipc_node(addr));
+
+	printf("%-*s", MAX_NODE_WIDTH, addr_str);
+	printf("%-*s", STATUS_WIDTH, status);
+	printf("%-*s", DIRECTLY_MON_WIDTH, monitored);
+	printf("%-*u", MAX_DOM_GEN_WIDTH, dom_gen);
+}
+
+static int link_mon_peer_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+	struct nlattr *attrs[TIPC_NLA_MON_PEER_MAX + 1] = {};
+	struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+	uint16_t member_cnt;
+	uint32_t applied;
+	uint32_t dom_gen;
+	uint64_t up_map;
+	char status[16];
+	char monitored[16];
+
+	mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+	if (!info[TIPC_NLA_MON_PEER])
+		return MNL_CB_ERROR;
+
+	mnl_attr_parse_nested(info[TIPC_NLA_MON_PEER], parse_attrs, attrs);
+
+	(attrs[TIPC_NLA_MON_PEER_LOCAL] || attrs[TIPC_NLA_MON_PEER_HEAD]) ?
+		strcpy(monitored, "direct") :
+		strcpy(monitored, "indirect");
+
+	attrs[TIPC_NLA_MON_PEER_UP] ?
+		strcpy(status, "up") :
+		strcpy(status, "down");
+
+	dom_gen = attrs[TIPC_NLA_MON_PEER_DOMGEN] ?
+		mnl_attr_get_u32(attrs[TIPC_NLA_MON_PEER_DOMGEN]) : 0;
+
+	link_mon_print_peer_state(mnl_attr_get_u32(attrs[TIPC_NLA_MON_PEER_ADDR]),
+				  status, monitored, dom_gen);
+
+	applied = mnl_attr_get_u32(attrs[TIPC_NLA_MON_PEER_APPLIED]);
+
+	if (!applied)
+		goto exit;
+
+	up_map = mnl_attr_get_u64(attrs[TIPC_NLA_MON_PEER_UPMAP]);
+
+	member_cnt = mnl_attr_get_payload_len(attrs[TIPC_NLA_MON_PEER_MEMBERS]);
+
+	/* each tipc address occupies 4 bytes of payload, hence compensate it */
+	member_cnt /= sizeof(uint32_t);
+
+	link_mon_print_applied(applied, up_map);
+
+	link_mon_print_non_applied(applied, member_cnt, up_map,
+				   mnl_attr_get_payload(attrs[TIPC_NLA_MON_PEER_MEMBERS]));
+
+exit:
+	printf("\n");
+
+	return MNL_CB_OK;
+}
+
+static int link_mon_peer_list(uint32_t mon_ref)
+{
+	struct nlmsghdr *nlh;
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	struct nlattr *nest;
+
+	if (!(nlh = msg_init(buf, TIPC_NL_MON_PEER_GET))) {
+		fprintf(stderr, "error, message initialisation failed\n");
+		return -1;
+	}
+
+	nest = mnl_attr_nest_start(nlh, TIPC_NLA_MON);
+	mnl_attr_put_u32(nlh, TIPC_NLA_MON_REF, mon_ref);
+	mnl_attr_nest_end(nlh, nest);
+
+	return msg_dumpit(nlh, link_mon_peer_list_cb, NULL);
+}
+
+static int link_mon_list_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+	struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+	struct nlattr *attrs[TIPC_NLA_MON_MAX + 1] = {};
+	char *req_bearer = data;
+	const char *bname;
+	const char *title = "node          status monitored generation "
+			    "applied_node_status [non_applied_node:status]";
+
+	mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+	if (!info[TIPC_NLA_MON])
+		return MNL_CB_ERROR;
+
+	mnl_attr_parse_nested(info[TIPC_NLA_MON], parse_attrs, attrs);
+
+	bname = mnl_attr_get_str(attrs[TIPC_NLA_MON_BEARER_NAME]);
+
+	if (*req_bearer && (strcmp(req_bearer, bname) != 0))
+		return MNL_CB_OK;
+
+	printf("\nbearer %s\n", bname);
+	printf("%s\n", title);
+
+	if (mnl_attr_get_u32(attrs[TIPC_NLA_MON_PEERCNT]))
+		link_mon_peer_list(mnl_attr_get_u32(attrs[TIPC_NLA_MON_REF]));
+
+	return MNL_CB_OK;
+}
+
+static void cmd_link_mon_list_help(struct cmdl *cmdl)
+{
+	fprintf(stderr, "Usage: %s monitor list [ media MEDIA ARGS...] \n\n",
+		cmdl->argv[0]);
+	print_bearer_media();
+}
+
+static void cmd_link_mon_list_l2_help(struct cmdl *cmdl, char *media)
+{
+	fprintf(stderr,
+		"Usage: %s monitor list media %s device DEVICE [OPTIONS]\n",
+		cmdl->argv[0], media);
+}
+
+static void cmd_link_mon_list_udp_help(struct cmdl *cmdl, char *media)
+{
+	fprintf(stderr,
+		"Usage: %s monitor list media udp name NAME \n\n",
+		cmdl->argv[0]);
+}
+
+static int cmd_link_mon_list(struct nlmsghdr *nlh, const struct cmd *cmd,
+			     struct cmdl *cmdl, void *data)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+	char bname[TIPC_MAX_BEARER_NAME] = {0};
+	struct opt opts[] = {
+		{ "media",	OPT_KEYVAL,	NULL },
+		{ "device",	OPT_KEYVAL,	NULL },
+		{ "name",	OPT_KEYVAL,	NULL },
+		{ NULL }
+	};
+	struct tipc_sup_media sup_media[] = {
+		{ "udp",        "name",         cmd_link_mon_list_udp_help},
+		{ "eth",        "device",       cmd_link_mon_list_l2_help },
+		{ "ib",         "device",       cmd_link_mon_list_l2_help },
+		{ NULL, },
+	};
+
+	int err;
+
+	if (parse_opts(opts, cmdl) < 0)
+		return -EINVAL;
+
+	if (get_opt(opts, "media")) {
+		if ((err = cmd_get_unique_bearer_name(cmd, cmdl, opts, bname,
+						      sup_media)))
+			return err;
+	}
+
+	if (help_flag) {
+		cmd->help(cmdl);
+		return -EINVAL;
+	}
+
+	if (!(nlh = msg_init(buf, TIPC_NL_MON_GET))) {
+		fprintf(stderr, "error, message initialisation failed\n");
+		return -1;
+	}
+
+	return msg_dumpit(nlh, link_mon_list_cb, bname);
+}
+
 static void cmd_link_mon_set_help(struct cmdl *cmdl)
 {
 	fprintf(stderr, "Usage: %s monitor set PPROPERTY\n\n"
@@ -636,6 +871,7 @@ static void cmd_link_mon_help(struct cmdl *cmdl)
 		"COMMANDS\n"
 		" set			- Set monitor properties\n"
 		" get			- Get monitor properties\n"
+		" list			- List all cluster members\n"
 		" summary		- Show local node monitor summary\n",
 		cmdl->argv[0]);
 }
@@ -646,6 +882,7 @@ static int cmd_link_mon(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl
 	const struct cmd cmds[] = {
 		{ "set",	cmd_link_mon_set,	cmd_link_mon_set_help },
 		{ "get",	cmd_link_mon_get,	cmd_link_mon_get_help },
+		{ "list",	cmd_link_mon_list,	cmd_link_mon_list_help },
 		{ "summary",	cmd_link_mon_summary,	NULL },
 		{ NULL }
 	};
-- 
2.1.4


------------------------------------------------------------------------------

^ permalink raw reply related

* [PATCH iproute2 net-next v1 7/7] tipc: update man page for link monitor
From: Parthasarathy Bhuvaragan @ 2016-09-12 15:17 UTC (permalink / raw)
  To: netdev; +Cc: jon.maloy, tipc-discussion
In-Reply-To: <1473693441-14254-1-git-send-email-parthasarathy.bhuvaragan@ericsson.com>

Add description for the new link monitor commands.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
---
 man/man8/tipc-link.8 | 104 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

diff --git a/man/man8/tipc-link.8 b/man/man8/tipc-link.8
index 2ee03a0bd96e..fee283e5cfff 100644
--- a/man/man8/tipc-link.8
+++ b/man/man8/tipc-link.8
@@ -39,6 +39,29 @@ tipc-link \- show links or modify link properties
 .B tipc link list
 .br
 
+.ti -8
+.B tipc link monitor set
+.RB "{ " "threshold" " } "
+
+.ti -8
+.B tipc link monitor get
+.RB "{ " "threshold" " } "
+
+.ti -8
+.B tipc link monitor summary
+.br
+
+.ti -8
+.B tipc link monitor list
+.br
+.RB "[ " "media " " { " eth " | " ib " } " device
+.IR "DEVICE" " ]"
+.RB "|"
+.br
+.RB "[ " "media udp name"
+.IR NAME " ]"
+.br
+
 .SH OPTIONS
 Options (flags) that can be passed anywhere in the command chain.
 .TP
@@ -204,6 +227,87 @@ The link window controls how many unacknowledged messages a link endpoint can
 have in its transmit queue before TIPC's congestion control mechanism is
 activated.
 
+.SS Monitor properties
+
+.TP
+.B threshold
+.br
+The threshold specifies the cluster size exceeding which the link monitoring
+algorithm will switch from "full-mesh" to "overlapping-ring".
+If set of 0 the overlapping-ring monitoring is always on and if set to a
+value larger than anticipated cluster size the overlapping-ring is disabled.
+The default value is 32.
+
+.SS Monitor information
+
+.TP
+.B table_generation
+.br
+Represents the event count in a node's local monitoring list. It steps every
+time something changes in the local monitor list, including changes in the
+local domain.
+
+.TP
+.B cluster_size
+.br
+Represents the current count of cluster members.
+
+.TP
+.B algorithm
+.br
+The current supervision algorithm used for neighbour monitoring for the bearer.
+Possible values are full-mesh or overlapping-ring.
+
+.TP
+.B status
+.br
+The node status derived by the local node.
+Possible status are up or down.
+
+.TP
+.B monitored
+.br
+Represent the type of monitoring chosen by the local node.
+Possible values are direct or indirect.
+
+.TP
+.B generation
+.br
+Represents the domain generation which is the event count in a node's local
+domain. Every time something changes (peer add/remove/up/down) the domain
+generation is stepped and a new version of node record is sent to inform
+the neighbors about this change. The domain generation helps the receiver
+of a domain record to know if it should ignore or process the record.
+
+.TP
+.B applied_node_status
+.br
+The node status reported by the peer node for the succeeding peers in
+the node list. The Node list is a circular list of ascending addresses
+starting with the local node.
+Possible status are: U or D. The status U implies up and D down.
+
+.TP
+.B [non_applied_node:status]
+.br
+Represents the nodes and their status as reported by the peer node.
+These nodes were not applied to the monitoring list for this peer node.
+They are usually transient and occur during the cluster startup phase
+or network reconfiguration.
+Possible status are: U or D. The status U implies up and D down.
+
+.SH EXAMPLES
+.PP
+tipc link monitor list
+.RS 4
+Shows the link monitoring information for cluster members on device data0.
+.RE
+.PP
+tipc link monitor summary
+.RS 4
+The monitor summary command prints the basic attributes.
+.RE
+
 .SH EXIT STATUS
 Exit status is 0 if command was successful or a positive integer upon failure.
 
-- 
2.1.4


------------------------------------------------------------------------------

^ permalink raw reply related

* Re: [PATCH net-next 7/7] cxgb4: add support for drop and redirect actions
From: John Fastabend @ 2016-09-12 15:17 UTC (permalink / raw)
  To: Jiri Pirko, Rahul Lakkireddy, Jamal Hadi Salim
  Cc: netdev, davem, hariprasad, leedom, nirranjan, indranil
In-Reply-To: <20160912085253.GF2021@nanopsycho>

On 16-09-12 01:52 AM, Jiri Pirko wrote:
> Mon, Sep 12, 2016 at 10:12:40AM CEST, rahul.lakkireddy@chelsio.com wrote:
>> Add support for dropping matched packets in hardware.  Also add support
>> for re-directing matched packets to a specified port in hardware.
>>
>> Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
>> Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
>> ---

[...]

>>
>> +/* Fill ch_filter_specification with parsed action. */
>> +static int fill_action_fields(struct adapter *adap,
>> +			      struct ch_filter_specification *fs,
>> +			      struct tc_cls_u32_offload *cls)
>> +{
>> +	const struct tc_action *a;
>> +	struct tcf_exts *exts;
>> +	LIST_HEAD(actions);
>> +	unsigned int num_actions = 0;
>> +	bool found = false;
>> +
>> +	exts = cls->knode.exts;
>> +	if (tc_no_actions(exts))
>> +		return -EINVAL;
>> +
>> +	tcf_exts_to_list(exts, &actions);
>> +	list_for_each_entry(a, &actions, list) {
>> +		/* Don't allow more than one action per rule. */
>> +		if (num_actions)
>> +			return -EINVAL;
> 
> 
> Looking at this, unrelated to this patch, we really need some advanced
> reporting to user about what went wrong. Otherwise he's playing a
> guessing game.
> 

+1 my recommendation to new users has been to annotate or read the
kernel source when they get errors which is obviously a failure on
our part to build usable error messages.

Note its not even really related to hardware offload its bad just
with software use case and the hardware offloads make it a bit more
mysterious when an error is returned.

Maybe Jamal can add it to his tc-workshop at netdev conference so
we can get some consensus about how to do this.

.John

^ permalink raw reply

* [PATCH iproute2 net-next v1 0/7] tipc: updates for neighbour monitor
From: Parthasarathy Bhuvaragan @ 2016-09-12 15:17 UTC (permalink / raw)
  To: netdev; +Cc: tipc-discussion, jon.maloy, maloy

We add configuration support for the new link monitoring attributes.

Parthasarathy Bhuvaragan (7):
  tipc: remove dead code
  tipc: add link monitor set threshold
  tipc: add link monitor get threshold
  tipc: add link monitor summary
  tipc: refractor bearer to facilitate link monitor
  tipc: add link monitor list
  tipc: update man page for link monitor

 man/man8/tipc-link.8 | 104 +++++++++++++
 tipc/bearer.c        |  75 ++++++----
 tipc/bearer.h        |   4 +
 tipc/link.c          | 408 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 556 insertions(+), 35 deletions(-)

-- 
2.1.4

^ permalink raw reply

* [PATCH iproute2 net-next v1 1/7] tipc: remove dead code
From: Parthasarathy Bhuvaragan @ 2016-09-12 15:17 UTC (permalink / raw)
  To: netdev; +Cc: tipc-discussion, jon.maloy, maloy
In-Reply-To: <1473693441-14254-1-git-send-email-parthasarathy.bhuvaragan@ericsson.com>

remove dead code and a newline.

Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
---
 tipc/link.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tipc/link.c b/tipc/link.c
index 061b1c534389..8bdc98224d39 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -90,7 +90,6 @@ static int link_get_cb(const struct nlmsghdr *nlh, void *data)
 	return MNL_CB_OK;
 }
 
-
 static int cmd_link_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
 			     struct cmdl *cmdl, void *data)
 {
@@ -475,8 +474,6 @@ static int cmd_link_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
 	mnl_attr_nest_end(nlh, attrs);
 
 	return msg_doit(nlh, link_get_cb, &prop);
-
-	return 0;
 }
 
 static int cmd_link_set(struct nlmsghdr *nlh, const struct cmd *cmd,
-- 
2.1.4

^ permalink raw reply related

* [PATCH iproute2 net-next v1 4/7] tipc: add link monitor summary
From: Parthasarathy Bhuvaragan @ 2016-09-12 15:17 UTC (permalink / raw)
  To: netdev; +Cc: tipc-discussion, jon.maloy, maloy
In-Reply-To: <1473693441-14254-1-git-send-email-parthasarathy.bhuvaragan@ericsson.com>

The monitor summary command prints the basic attributes
specific to the local node.
A sample usage is shown below:
$ tipc link monitor summary
bearer eth:data0
    table_generation 15
    cluster_size 8
    algorithm overlapping-ring

bearer eth:data1
    table_generation 15
    cluster_size 8
    algorithm overlapping-ring

$ tipc link monitor summary -h
Usage: tipc monitor summary

Acked-by: Jon Maloy <jon.maloy@ericsson.com>
Tested-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
---
 tipc/link.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/tipc/link.c b/tipc/link.c
index 3f0c32106772..df93409f2173 100644
--- a/tipc/link.c
+++ b/tipc/link.c
@@ -515,6 +515,49 @@ static int cmd_link_mon_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd,
 	return msg_doit(nlh, NULL, NULL);
 }
 
+static int link_mon_summary_cb(const struct nlmsghdr *nlh, void *data)
+{
+	struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh);
+	struct nlattr *info[TIPC_NLA_MAX + 1] = {};
+	struct nlattr *attrs[TIPC_NLA_MON_MAX + 1] = {};
+
+	mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info);
+	if (!info[TIPC_NLA_MON])
+		return MNL_CB_ERROR;
+
+	mnl_attr_parse_nested(info[TIPC_NLA_MON], parse_attrs, attrs);
+
+	printf("\nbearer %s\n",
+		mnl_attr_get_str(attrs[TIPC_NLA_MON_BEARER_NAME]));
+
+	printf("    table_generation %u\n",
+	       mnl_attr_get_u32(attrs[TIPC_NLA_MON_LISTGEN]));
+	printf("    cluster_size %u\n",
+		mnl_attr_get_u32(attrs[TIPC_NLA_MON_PEERCNT]));
+	printf("    algorithm %s\n",
+		attrs[TIPC_NLA_MON_ACTIVE] ? "overlapping-ring" : "full-mesh");
+
+	return MNL_CB_OK;
+}
+
+static int cmd_link_mon_summary(struct nlmsghdr *nlh, const struct cmd *cmd,
+				struct cmdl *cmdl, void *data)
+{
+	char buf[MNL_SOCKET_BUFFER_SIZE];
+
+	if (help_flag) {
+		fprintf(stderr,	"Usage: %s monitor summary\n", cmdl->argv[0]);
+		return -EINVAL;
+	}
+
+	if (!(nlh = msg_init(buf, TIPC_NL_MON_GET))) {
+		fprintf(stderr, "error, message initialisation failed\n");
+		return -1;
+	}
+
+	return msg_dumpit(nlh, link_mon_summary_cb, NULL);
+}
+
 static void cmd_link_mon_set_help(struct cmdl *cmdl)
 {
 	fprintf(stderr, "Usage: %s monitor set PPROPERTY\n\n"
@@ -592,7 +635,8 @@ static void cmd_link_mon_help(struct cmdl *cmdl)
 		"Usage: %s montior COMMAND [ARGS] ...\n\n"
 		"COMMANDS\n"
 		" set			- Set monitor properties\n"
-		" get			- Get monitor properties\n",
+		" get			- Get monitor properties\n"
+		" summary		- Show local node monitor summary\n",
 		cmdl->argv[0]);
 }
 
@@ -602,6 +646,7 @@ static int cmd_link_mon(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl
 	const struct cmd cmds[] = {
 		{ "set",	cmd_link_mon_set,	cmd_link_mon_set_help },
 		{ "get",	cmd_link_mon_get,	cmd_link_mon_get_help },
+		{ "summary",	cmd_link_mon_summary,	NULL },
 		{ NULL }
 	};
 
-- 
2.1.4

^ permalink raw reply related

* Re: [PATCH RFC 1/6] spinlock: Add library function to allocate spinlock buckets array
From: Greg @ 2016-09-12 15:17 UTC (permalink / raw)
  To: Tom Herbert; +Cc: davem, netdev, kernel-team, tgraf
In-Reply-To: <1473463197-3076903-2-git-send-email-tom@herbertland.com>

On Fri, 2016-09-09 at 16:19 -0700, Tom Herbert wrote:
> Add two new library functions alloc_bucket_spinlocks and
> free_bucket_spinlocks. These are use to allocate and free an array
> of spinlocks that are useful as locks for hash buckets. The interface
> specifies the maximum number of spinlocks in the array as well
> as a CPU multiplier to derive the number of spinlocks to allocate.
> The number to allocated is rounded up to a power of two to make
> the array amenable to hash lookup.
> 
> Signed-off-by: Tom Herbert <tom@herbertland.com>

I like this idea!!

Reviewed by Greg Rose <grose@lightfleet.com>

> ---
>  include/linux/spinlock.h |  6 +++++
>  lib/Makefile             |  2 +-
>  lib/bucket_locks.c       | 63 ++++++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 70 insertions(+), 1 deletion(-)
>  create mode 100644 lib/bucket_locks.c
> 
> diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
> index 47dd0ce..4ebdfbf 100644
> --- a/include/linux/spinlock.h
> +++ b/include/linux/spinlock.h
> @@ -416,4 +416,10 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
>  #define atomic_dec_and_lock(atomic, lock) \
>  		__cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
>  
> +int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
> +			   unsigned int max_size, unsigned int cpu_mult,
> +			   gfp_t gfp);
> +
> +void free_bucket_spinlocks(spinlock_t *locks);
> +
>  #endif /* __LINUX_SPINLOCK_H */
> diff --git a/lib/Makefile b/lib/Makefile
> index cfa68eb..a1dedf1 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -37,7 +37,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
>  	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
>  	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
>  	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \
> -	 once.o
> +	 once.o bucket_locks.o
>  obj-y += string_helpers.o
>  obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
>  obj-y += hexdump.o
> diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c
> new file mode 100644
> index 0000000..bb9bf11
> --- /dev/null
> +++ b/lib/bucket_locks.c
> @@ -0,0 +1,63 @@
> +#include <linux/kernel.h>
> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +#include <linux/mm.h>
> +#include <linux/export.h>
> +
> +/* Allocate an array of spinlocks to be accessed by a hash. Two arguments
> + * indicate the number of elements to allocate in the array. max_size
> + * gives the maximum number of elements to allocate. cpu_mult gives
> + * the number of locks per CPU to allocate. The size is rounded up
> + * to a power of 2 to be suitable as a hash table.
> + */
> +int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask,
> +			   unsigned int max_size, unsigned int cpu_mult,
> +			   gfp_t gfp)
> +{
> +	unsigned int i, size;
> +#if defined(CONFIG_PROVE_LOCKING)
> +	unsigned int nr_pcpus = 2;
> +#else
> +	unsigned int nr_pcpus = num_possible_cpus();
> +#endif
> +	spinlock_t *tlocks = NULL;
> +
> +	if (cpu_mult) {
> +		nr_pcpus = min_t(unsigned int, nr_pcpus, 64UL);
> +		size = min_t(unsigned int, nr_pcpus * cpu_mult, max_size);
> +	} else {
> +		size = max_size;
> +	}
> +	size = roundup_pow_of_two(size);
> +
> +	if (!size)
> +		return -EINVAL;
> +
> +	if (sizeof(spinlock_t) != 0) {
> +#ifdef CONFIG_NUMA
> +		if (size * sizeof(spinlock_t) > PAGE_SIZE &&
> +		    gfp == GFP_KERNEL)
> +			tlocks = vmalloc(size * sizeof(spinlock_t));
> +#endif
> +		if (gfp != GFP_KERNEL)
> +			gfp |= __GFP_NOWARN | __GFP_NORETRY;
> +
> +		if (!tlocks)
> +			tlocks = kmalloc_array(size, sizeof(spinlock_t), gfp);
> +		if (!tlocks)
> +			return -ENOMEM;
> +		for (i = 0; i < size; i++)
> +			spin_lock_init(&tlocks[i]);
> +	}
> +	*locks = tlocks;
> +	*locks_mask = size - 1;
> +
> +	return 0;
> +}
> +EXPORT_SYMBOL(alloc_bucket_spinlocks);
> +
> +void free_bucket_spinlocks(spinlock_t *locks)
> +{
> +	kvfree(locks);
> +}
> +EXPORT_SYMBOL(free_bucket_spinlocks);

^ permalink raw reply

* Re: [PATCH net] net_sched: act_mirred: full rcu conversion
From: John Fastabend @ 2016-09-12 15:34 UTC (permalink / raw)
  To: Cong Wang
  Cc: Eric Dumazet, David Miller, Linux Kernel Network Developers,
	Jamal Hadi Salim, Hadar Hen Zion, Amir Vadai
In-Reply-To: <CAM_iQpXycdn9_LW_qYzUe5T97ABOj3KrivjCB5ho9wbKeFR0MQ@mail.gmail.com>

On 16-09-11 11:12 PM, Cong Wang wrote:
> On Fri, Sep 9, 2016 at 8:52 AM, John Fastabend <john.fastabend@gmail.com> wrote:
>> On 16-09-08 10:26 PM, Cong Wang wrote:
>>> On Thu, Sep 8, 2016 at 8:51 AM, Eric Dumazet <eric.dumazet@gmail.com> wrote:
>>>> On Thu, 2016-09-08 at 08:47 -0700, John Fastabend wrote:
>>>>
>>>>> Works for me. FWIW I find this plenty straightforward and don't really
>>>>> see the need to make the hash table itself rcu friendly.
>>>>>
>>>>> Acked-by: John Fastabend <john.r.fastabend@intel.com>
>>>>>
>>>>
>>>> Yes, it seems this hash table is used in control path, with RTNL held
>>>> anyway.
>>>
>>> Seriously? You never read hashtable in fast path?? I think you need
>>> to wake up.
>>>
>>
>> But the actions use refcnt'ing and should never be decremented to zero
>> as long as they can still be referenced by an active filter. If each
>> action handles its parameters like mirred/gact then I don't see why its
>> necessary.
> 
> This is correct, by "read" I meant "dereference", the tc actions
> are now permanently stored in hashtable directly, so "reading"
> a tc action is reading from hashtable.
> 
> Sorry if this wasn't clear.
> 

OK, but with the current code there is no need to protect the hash table
with an RCU semantics. The ref counting ensures the hash table entries
are always available and any 'replace' commands on actions are handled
internally in the action itself with rcu_assign_pointer replacing old
params with new params. The fast path readers will always read a
consistent set of parameters in this scheme.

So no need for an rcu hash table. The reference count though should
likely be atomic because not all increments/decrements are protected by
RTNL lock.

.John

^ permalink raw reply

* Re: [PATCH 08/15] ixgbe: use IS_ENABLED() instead of checking for built-in or module
From: Greg @ 2016-09-12 15:36 UTC (permalink / raw)
  To: Javier Martinez Canillas
  Cc: linux-kernel, netdev, intel-wired-lan, Jeff Kirsher
In-Reply-To: <1473689026-6983-9-git-send-email-javier@osg.samsung.com>

On Mon, 2016-09-12 at 10:03 -0400, Javier Martinez Canillas wrote:
> The IS_ENABLED() macro checks if a Kconfig symbol has been enabled either
> built-in or as a module, use that macro instead of open coding the same.
> 
> Using the macro makes the code more readable by helping abstract away some
> of the Kconfig built-in and module enable details.
> 
> Signed-off-by: Javier Martinez Canillas <javier@osg.samsung.com>
> ---
> 
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> index 33c025055011..b06e32d0d22a 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> @@ -45,10 +45,10 @@
>  #include "ixgbe_type.h"
>  #include "ixgbe_common.h"
>  #include "ixgbe_dcb.h"
> -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
> +#if IS_ENABLED(CONFIG_FCOE)

I was wondering what happens if CONFIG_FCOE_MODULE is defined but not
CONFIG_FCOE but then couldn't even find CONFIG_FCOE_MODULE in any
Kconfigs anywhere in the current tree.

Looks good to me.

      * Reviewed-by: Greg Rose <grose@lightfleet.com>

>  #define IXGBE_FCOE
>  #include "ixgbe_fcoe.h"
> -#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */
> +#endif /* IS_ENABLED(CONFIG_FCOE) */
>  #ifdef CONFIG_IXGBE_DCA
>  #include <linux/dca.h>
>  #endif

^ permalink raw reply

* About enet_out clk on i.MX28, i.MX6 and i.MX7
From: Uwe Kleine-König @ 2016-09-12 15:36 UTC (permalink / raw)
  To: linux-arm-kernel, netdev
  Cc: Lothar Waßmann, Lauri Hintsala, Shawn Guo, Fabio Estevam,
	kernel

Hello,

to operate the MDIO bus a clk is required. On some i.MX SoCs it can be
configured if that clk is provided by the CPU or not (i.e. something
else provides it).

The devicetree abstraction for that is (e.g. on i.MX28):

	mac0: ethernet@800f0000 {
		compatible = "fsl,imx28-fec";
		...
		clocks = <&clks 57>, <&clks 57>, <&clks 64>;
		clock-names = "ipg", "ahb", "enet_out";
		...
	};

and the driver does:

	/* enet_out is optional, depends on board */
	fep->clk_enet_out = devm_clk_get(&pdev->dev, "enet_out");
	if (IS_ERR(fep->clk_enet_out))
		fep->clk_enet_out = NULL;

. IMHO this is clumsy and wrong. See for example
arch/arm/boot/dts/imx28-m28evk.dts which has:

	ethernet@800f0000 {
		...
		clocks = <&clks 57>, <&clks 57>;
		clock-names = "ipg", "ahb";
	}

to get rid of this entry. Moreover enet_out isn't a clock for the fec
instance but for the mdio bus, so the better binding would be:

	mac0: ethernet@800f0000 {
		compatible = "fsl,imx28-fec";

		clocks = <&clks 57>, <&clks 57>;
		clock-names = "ipg", "ahb";

		mdio {
			clocks = <&clks 64>;
			...
		};
	};

This better matches reality and is easier to overwrite per board without
repeating stuff from imx28.dtsi as it is now.

What do you think? Compatibility isn't a big concern, the fec driver
could just keep handling "enet_out" as is and learn about the optional
mdio/clocks.
This would handle old dtbs just fine.

Slightly related: Some machines (bluegiga,apx4devkit, karo,tx28) still
enable enet_out clk in arch/arm/mach-mxs/mach-mxs.c. I think this can be
dropped, right? (Unless recent kernels should still handle dtbs that
don't have enet_out in the fec node. That affects dtbs built between

	v3.6-rc1~144^2~7^2~11 = 3143bbb42b3d ("ARM: mxs: convert apx4devkit board to device tree")
and

	v3.10-rc1~63^2~10^2~20 = f231a9fe7f80 ("ARM: dts: mxs: add enet_out clock to devicetree")
.)

Best regards
Uwe

-- 
Pengutronix e.K.                           | Uwe Kleine-König            |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |

^ permalink raw reply

* Re: [RFC V3 PATCH 00/26] Kernel NET policy
From: Florian Westphal @ 2016-09-12 15:38 UTC (permalink / raw)
  To: kan.liang
  Cc: davem, linux-kernel, netdev, jeffrey.t.kirsher, mingo, peterz,
	kuznet, jmorris, yoshfuji, kaber, akpm, keescook, viro, gorcunov,
	john.stultz, aduyck, ben, decot, fw, alexander.duyck, daniel, tom,
	rdunlap, xiyou.wangcong, hannes, stephen, alexei.starovoitov,
	jesse.brandeburg, andi
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

kan.liang@intel.com <kan.liang@intel.com> wrote:
> From: Kan Liang <kan.liang@intel.com>
> 
> It is a big challenge to get good network performance. First, the network
> performance is not good with default system settings. Second, it is too

[..]

I ask to be dropped from CC list of further submissions of this series,
I've said all I have say about this ('do it in userspace') and
its very unlikely I will change my opinion.

Thanks.

^ permalink raw reply

* Re: [RFC V3 PATCH 00/26] Kernel NET policy
From: Eric Dumazet @ 2016-09-12 15:52 UTC (permalink / raw)
  To: kan.liang
  Cc: davem, linux-kernel, netdev, jeffrey.t.kirsher, mingo, peterz,
	kuznet, jmorris, yoshfuji, kaber, akpm, keescook, viro, gorcunov,
	john.stultz, aduyck, ben, decot, fw, alexander.duyck, daniel, tom,
	rdunlap, xiyou.wangcong, hannes, stephen, alexei.starovoitov,
	jesse.brandeburg, andi
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

On Mon, 2016-09-12 at 07:55 -0700, kan.liang@intel.com wrote:
> From: Kan Liang <kan.liang@intel.com>
> 

> 
>  Documentation/networking/netpolicy.txt |  157 ++++


I find this patch series very suspect, as
Documentation/networking/scaling.txt is untouched.

I highly recommend you present your ideas at next netdev conference.

I really doubt the mailing lists are the best place to present your
work, given the huge amount of code/layers you want to add in linux
kernel.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox