From: kan.liang@intel.com
To: davem@davemloft.net, linux-kernel@vger.kernel.org,
netdev@vger.kernel.org
Cc: mingo@redhat.com, peterz@infradead.org, kuznet@ms2.inr.ac.ru,
jmorris@namei.org, yoshfuji@linux-ipv6.org, kaber@trash.net,
akpm@linux-foundation.org, keescook@chromium.org,
viro@zeniv.linux.org.uk, gorcunov@openvz.org,
john.stultz@linaro.org, aduyck@mirantis.com, ben@decadent.org.uk,
decot@googlers.com, fw@strlen.de, alexander.duyck@gmail.com,
daniel@iogearbox.net, tom@herbertland.com, rdunlap@infradead.org,
xiyou.wangcong@gmail.com, hannes@stressinduktion.org,
jesse.brandeburg@intel.com, andi@firstfloor.org,
Kan Liang <kan.liang@intel.com>
Subject: [RFC V2 PATCH 20/25] net/netpolicy: introduce per task net policy
Date: Thu, 4 Aug 2016 15:36:24 -0400 [thread overview]
Message-ID: <1470339389-8542-21-git-send-email-kan.liang@intel.com> (raw)
In-Reply-To: <1470339389-8542-1-git-send-email-kan.liang@intel.com>
From: Kan Liang <kan.liang@intel.com>
Usually, application as a whole has specific requirement. Applying the
net policy to all sockets one by one in the application is too complex.
This patch introduces per task net policy to address this case.
Once the per task net policy is applied, all the sockets in the
application will apply the same net policy. Also, per task net policy
can be inherited by all children.
The usage of PR_SET_NETPOLICY option is as below.
prctl(PR_SET_NETPOLICY, POLICY_NAME, NULL, NULL, NULL).
It applies per task policy. The policy name must be valid and compatible
with current device policy. Othrewise, it will error out. The task
policy will be set to NET_POLICY_INVALID.
Signed-off-by: Kan Liang <kan.liang@intel.com>
---
include/linux/init_task.h | 9 +++++++++
include/linux/sched.h | 5 +++++
include/net/sock.h | 12 +++++++++++-
include/uapi/linux/prctl.h | 4 ++++
kernel/exit.c | 4 ++++
kernel/fork.c | 6 ++++++
kernel/sys.c | 31 +++++++++++++++++++++++++++++++
net/core/netpolicy.c | 35 +++++++++++++++++++++++++++++++++++
net/core/sock.c | 10 +++++++++-
net/ipv4/af_inet.c | 7 +++++--
10 files changed, 119 insertions(+), 4 deletions(-)
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index f8834f8..133d1cb 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -183,6 +183,14 @@ extern struct task_group root_task_group;
# define INIT_KASAN(tsk)
#endif
+#ifdef CONFIG_NETPOLICY
+#define INIT_NETPOLICY(tsk) \
+ .task_netpolicy.policy = NET_POLICY_INVALID, \
+ .task_netpolicy.dev = NULL, \
+ .task_netpolicy.ptr = (void *)&tsk,
+#else
+#define INIT_NETPOLICY(tsk)
+#endif
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -260,6 +268,7 @@ extern struct task_group root_task_group;
INIT_VTIME(tsk) \
INIT_NUMA_BALANCING(tsk) \
INIT_KASAN(tsk) \
+ INIT_NETPOLICY(tsk) \
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d99218a..2cfcdbd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -62,6 +62,8 @@ struct sched_param {
#include <asm/processor.h>
+#include <linux/netpolicy.h>
+
#define SCHED_ATTR_SIZE_VER0 48 /* sizeof first published struct */
/*
@@ -1919,6 +1921,9 @@ struct task_struct {
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
#endif
+#ifdef CONFIG_NETPOLICY
+ struct netpolicy_instance task_netpolicy;
+#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/*
diff --git a/include/net/sock.h b/include/net/sock.h
index 6219434..e4f023c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1477,6 +1477,7 @@ void sock_edemux(struct sk_buff *skb);
#define sock_edemux(skb) sock_efree(skb)
#endif
+void sock_setnetpolicy(struct socket *sock);
int sock_setsockopt(struct socket *sock, int level, int op,
char __user *optval, unsigned int optlen);
@@ -2273,10 +2274,19 @@ extern int sysctl_optmem_max;
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;
-/* Return netpolicy instance information from socket. */
+/* Return netpolicy instance information from either task or socket.
+ * If both task and socket have netpolicy instance information,
+ * using task's and unregistering socket's. Because task policy is
+ * dominant policy
+ */
static inline struct netpolicy_instance *netpolicy_find_instance(struct sock *sk)
{
#ifdef CONFIG_NETPOLICY
+ if (is_net_policy_valid(current->task_netpolicy.policy)) {
+ if (is_net_policy_valid(sk->sk_netpolicy.policy))
+ netpolicy_unregister(&sk->sk_netpolicy);
+ return ¤t->task_netpolicy;
+ }
if (is_net_policy_valid(sk->sk_netpolicy.policy))
return &sk->sk_netpolicy;
#endif
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759..bc182d2 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,8 @@ struct prctl_mm_map {
# define PR_CAP_AMBIENT_LOWER 3
# define PR_CAP_AMBIENT_CLEAR_ALL 4
+/* Control net policy */
+#define PR_SET_NETPOLICY 48
+#define PR_GET_NETPOLICY 49
+
#endif /* _LINUX_PRCTL_H */
diff --git a/kernel/exit.c b/kernel/exit.c
index 84ae830..4abd921 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -858,6 +858,10 @@ void do_exit(long code)
if (unlikely(current->pi_state_cache))
kfree(current->pi_state_cache);
#endif
+#ifdef CONFIG_NETPOLICY
+ if (is_net_policy_valid(current->task_netpolicy.policy))
+ netpolicy_unregister(¤t->task_netpolicy);
+#endif
/*
* Make sure we are holding no locks:
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index de21f25..03754ae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1453,6 +1453,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->sequential_io_avg = 0;
#endif
+#ifdef CONFIG_NETPOLICY
+ p->task_netpolicy.ptr = (void *)p;
+ if (is_net_policy_valid(p->task_netpolicy.policy))
+ netpolicy_register(&p->task_netpolicy, p->task_netpolicy.policy);
+#endif
+
/* Perform scheduler related setup. Assign this task to a CPU. */
retval = sched_fork(clone_flags, p);
if (retval)
diff --git a/kernel/sys.c b/kernel/sys.c
index 89d5be4..b481a64 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2072,6 +2072,31 @@ static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
}
#endif
+#ifdef CONFIG_NETPOLICY
+static int prctl_set_netpolicy(struct task_struct *me, int policy)
+{
+ return netpolicy_register(&me->task_netpolicy, policy);
+}
+
+static int prctl_get_netpolicy(struct task_struct *me, unsigned long adr)
+{
+ return put_user(me->task_netpolicy.policy, (int __user *)adr);
+}
+
+#else /* CONFIG_NETPOLICY */
+
+static int prctl_set_netpolicy(struct task_struct *me, int policy)
+{
+ return -EINVAL;
+}
+
+static int prctl_get_netpolicy(struct task_struct *me, unsigned long adr)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_NETPOLICY */
+
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
unsigned long, arg4, unsigned long, arg5)
{
@@ -2270,6 +2295,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
case PR_GET_FP_MODE:
error = GET_FP_MODE(me);
break;
+ case PR_SET_NETPOLICY:
+ error = prctl_set_netpolicy(me, arg2);
+ break;
+ case PR_GET_NETPOLICY:
+ error = prctl_get_netpolicy(me, arg2);
+ break;
default:
error = -EINVAL;
break;
diff --git a/net/core/netpolicy.c b/net/core/netpolicy.c
index 89c65d9..4b844d8 100644
--- a/net/core/netpolicy.c
+++ b/net/core/netpolicy.c
@@ -24,6 +24,35 @@
* is too difficult for users.
* So, it is a big challenge to get good network performance.
*
+ * NET policy supports four policies per device, and three policies per task
+ * and per socket. For using NET policy, the device policy must be set in
+ * advance. The task policy or socket policy must be compatible with device
+ * policy.
+ *
+ * BULK policy This policy is designed for high throughput. It can be
+ * applied to either device policy or task/socket policy.
+ * If it is applied to device policy, the only compatible
+ * task/socket policy is BULK policy itself.
+ * CPU policy This policy is designed for high throughput and lower
+ * CPU utilization. It can be applied to either device
+ * policy or task/socket policy. If it is applied to
+ * device policy, the only compatible task/socket policy
+ * is CPU policy itself.
+ * LATENCY policy This policy is designed for low latency. It can be
+ * applied to either device policy or task/socket policy.
+ * If it is applied to device policy, the only compatible
+ * task/socket policy is LATENCY policy itself.
+ * MIX policy This policy can only be applied to device policy. It
+ * is compatible with BULK and LATENCY policy. This
+ * policy is designed for the case which miscellaneous
+ * types of workload running on the device.
+ *
+ * The device policy changes the system configuration and reorganize the
+ * resource on the device, but it does not change the packets behavior.
+ * The task policy and socket policy redirect the packets to get good
+ * performance. If both task policy and socket policy are set in the same
+ * task, task policy will be applied. The task policy can also be inherited by
+ * children.
*/
#include <linux/module.h>
#include <linux/kernel.h>
@@ -399,6 +428,12 @@ static inline bool policy_validate(struct netpolicy_instance *instance)
policy_name[instance->policy]);
return false;
}
+
+ /* task policy is dominant policy */
+ if (is_net_policy_valid(current->task_netpolicy.policy) &&
+ (current->task_netpolicy.policy != instance->policy))
+ return false;
+
return true;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index 77f226b..117cff7 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1006,7 +1006,13 @@ set_rcvbuf:
#ifdef CONFIG_NETPOLICY
case SO_NETPOLICY:
- ret = netpolicy_register(&sk->sk_netpolicy, val);
+ if (is_net_policy_valid(current->task_netpolicy.policy) &&
+ (current->task_netpolicy.policy != val)) {
+ printk_ratelimited(KERN_WARNING "NETPOLICY: new policy is not compatible with task netpolicy\n");
+ ret = -EINVAL;
+ } else {
+ ret = netpolicy_register(&sk->sk_netpolicy, val);
+ }
break;
#endif
default:
@@ -1621,6 +1627,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
#ifdef CONFIG_NETPOLICY
newsk->sk_netpolicy.ptr = (void *)newsk;
+ if (is_net_policy_valid(current->task_netpolicy.policy))
+ newsk->sk_netpolicy.policy = NET_POLICY_INVALID;
if (is_net_policy_valid(newsk->sk_netpolicy.policy))
netpolicy_register(&newsk->sk_netpolicy, newsk->sk_netpolicy.policy);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f536da3..b26e606 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -771,8 +771,11 @@ static void sock_netpolicy_manage_flow(struct sock *sk, struct msghdr *msg)
if (!instance)
return;
- if (!instance->dev)
- return;
+ if (!instance->dev) {
+ if (!sk->sk_netpolicy.dev)
+ return;
+ instance->dev = sk->sk_netpolicy.dev;
+ }
flow = &instance->flow;
/* TODO: need to change here and add more protocol support */
--
2.5.5
next prev parent reply other threads:[~2016-08-04 19:36 UTC|newest]
Thread overview: 38+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-04 19:36 [RFC V2 PATCH 00/25] Kernel NET policy kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 01/25] net: introduce " kan.liang
2016-08-04 20:09 ` Randy Dunlap
2016-08-04 19:36 ` [RFC V2 PATCH 02/25] net/netpolicy: init " kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 03/25] net/netpolicy: get device queue irq information kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 04/25] net/netpolicy: get CPU information kan.liang
2016-08-05 11:00 ` Sergei Shtylyov
2016-08-04 19:36 ` [RFC V2 PATCH 05/25] net/netpolicy: create CPU and queue mapping kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 06/25] net/netpolicy: set and remove IRQ affinity kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 07/25] net/netpolicy: enable and disable NET policy kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 08/25] net/netpolicy: introduce NET policy object kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 09/25] net/netpolicy: set NET policy by policy name kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 10/25] net/netpolicy: add three new NET policies kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 11/25] net/netpolicy: add MIX policy kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 12/25] net/netpolicy: NET device hotplug kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 13/25] net/netpolicy: support CPU hotplug kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 14/25] net/netpolicy: handle channel changes kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 15/25] net/netpolicy: implement netpolicy register kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 16/25] net/netpolicy: introduce per socket netpolicy kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 17/25] net/netpolicy: introduce netpolicy_pick_queue kan.liang
2016-08-04 20:21 ` John Fastabend
2016-08-04 22:39 ` Daniel Borkmann
2016-08-04 22:54 ` Andi Kleen
2016-08-05 0:17 ` Daniel Borkmann
2016-08-05 14:41 ` Tom Herbert
2016-08-05 3:51 ` Tom Herbert
2016-08-05 13:55 ` Liang, Kan
2016-08-05 14:38 ` Tom Herbert
2016-08-04 19:36 ` [RFC V2 PATCH 18/25] net/netpolicy: set Tx queues according to policy kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 19/25] net/netpolicy: set Rx " kan.liang
2016-08-04 19:36 ` kan.liang [this message]
2016-08-04 19:36 ` [RFC V2 PATCH 21/25] net/netpolicy: set per task policy by proc kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 22/25] net/netpolicy: fast path for finding the queues kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 23/25] net/netpolicy: optimize for queue pair kan.liang
2016-08-04 19:36 ` [RFC V2 PATCH 24/25] net/netpolicy: limit the total record number kan.liang
2016-08-17 1:43 ` [lkp] [net/netpolicy] 19e7d15d66: EIP: [<c735077b>] netpolicy_unregister+0x23a/0x28a SS:ESP 0068:ceb19d94 kernel test robot
2016-08-04 19:36 ` [RFC V2 PATCH 25/25] Documentation/networking: Document NET policy kan.liang
-- strict thread matches above, loose matches on Subject: below --
2015-01-01 1:38 [RFC V2 PATCH 00/25] Kernel " kan.liang
2015-01-01 1:39 ` [RFC V2 PATCH 20/25] net/netpolicy: introduce per task net policy kan.liang
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1470339389-8542-21-git-send-email-kan.liang@intel.com \
--to=kan.liang@intel.com \
--cc=aduyck@mirantis.com \
--cc=akpm@linux-foundation.org \
--cc=alexander.duyck@gmail.com \
--cc=andi@firstfloor.org \
--cc=ben@decadent.org.uk \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=decot@googlers.com \
--cc=fw@strlen.de \
--cc=gorcunov@openvz.org \
--cc=hannes@stressinduktion.org \
--cc=jesse.brandeburg@intel.com \
--cc=jmorris@namei.org \
--cc=john.stultz@linaro.org \
--cc=kaber@trash.net \
--cc=keescook@chromium.org \
--cc=kuznet@ms2.inr.ac.ru \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=peterz@infradead.org \
--cc=rdunlap@infradead.org \
--cc=tom@herbertland.com \
--cc=viro@zeniv.linux.org.uk \
--cc=xiyou.wangcong@gmail.com \
--cc=yoshfuji@linux-ipv6.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).