netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: kan.liang@intel.com
To: davem@davemloft.net, linux-kernel@vger.kernel.org,
	netdev@vger.kernel.org
Cc: jeffrey.t.kirsher@intel.com, mingo@redhat.com,
	peterz@infradead.org, kuznet@ms2.inr.ac.ru, jmorris@namei.org,
	yoshfuji@linux-ipv6.org, kaber@trash.net,
	akpm@linux-foundation.org, keescook@chromium.org,
	viro@zeniv.linux.org.uk, gorcunov@openvz.org,
	john.stultz@linaro.org, aduyck@mirantis.com, ben@decadent.org.uk,
	decot@googlers.com, fw@strlen.de, alexander.duyck@gmail.com,
	daniel@iogearbox.net, tom@herbertland.com, rdunlap@infradead.org,
	xiyou.wangcong@gmail.com, hannes@stressinduktion.org,
	stephen@networkplumber.org, alexei.starovoitov@gmail.com,
	jesse.brandeburg@intel.com, andi@firstfloor.org,
	Kan Liang <kan.liang@intel.com>
Subject: [RFC V3 PATCH 19/26] net/netpolicy: tc bpf extension to pick Tx queue
Date: Mon, 12 Sep 2016 07:55:52 -0700	[thread overview]
Message-ID: <1473692159-4017-20-git-send-email-kan.liang@intel.com> (raw)
In-Reply-To: <1473692159-4017-1-git-send-email-kan.liang@intel.com>

From: Kan Liang <kan.liang@intel.com>

This patch extends the netpolicy to support tc bpf when selecting Tx
queue. It implements a bpf classifier for clsact qdisc. The classifier
will pick up the proper queue from net policy subsystem. This queue
selection from tc is not compatible with XPS. So XPS will be invalid.

Currently, tc bpf extension only supports the queue selection on egress.
To enable the extension, the following command must be applied.
 # ./tc qdisc add dev $DEVNAME clsact
 # ./tc filter add dev $DEVNAME egress bpf obj netpolicy_kern.o

Signed-off-by: Kan Liang <kan.liang@intel.com>
---
 include/uapi/linux/bpf.h  |  8 ++++++++
 net/core/dev.c            |  4 ++--
 net/core/filter.c         | 36 ++++++++++++++++++++++++++++++++++++
 samples/bpf/Makefile      |  1 +
 samples/bpf/bpf_helpers.h |  2 ++
 5 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f896dfa..9c7d847 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -398,6 +398,14 @@ enum bpf_func_id {
 	 */
 	BPF_FUNC_skb_change_tail,
 
+	/**
+	 * bpf_netpolicy(skb)
+	 * Netpolicy tc extension. Search for proper Tx queue
+	 * @skb: pointer to skb
+	 * Return: 0 on success or negative error
+	 */
+	BPF_FUNC_netpolicy,
+
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index b9a8044..82304ce 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3285,8 +3285,8 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 #ifdef CONFIG_NETPOLICY
 			struct netpolicy_instance *instance;
 
-			queue_index = -1;
-			if (dev->netpolicy && sk) {
+			queue_index = sk_tx_queue_get(sk);
+			if ((queue_index < 0) && dev->netpolicy && sk) {
 				instance = netpolicy_find_instance(sk);
 				if (instance) {
 					if (!instance->dev)
diff --git a/net/core/filter.c b/net/core/filter.c
index a83766b..ce32288 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2351,6 +2351,38 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_opt_proto = {
 	.arg3_type	= ARG_CONST_STACK_SIZE,
 };
 
+#ifdef CONFIG_NETPOLICY
+static u64 bpf_netpolicy(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (unsigned long) r1;
+	struct netpolicy_instance *instance;
+	struct net_device *dev = skb->dev;
+	struct sock *sk = skb->sk;
+	int queue_index;
+
+	if (dev->netpolicy && sk) {
+		instance = netpolicy_find_instance(sk);
+		if (instance) {
+			if (!instance->dev)
+				instance->dev = dev;
+			queue_index = netpolicy_pick_queue(instance, false);
+			if ((queue_index >= 0) && sk_fullsock(sk) &&
+			    rcu_access_pointer(sk->sk_dst_cache))
+				sk_tx_queue_set(sk, queue_index);
+		}
+	}
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_netpolicy_proto = {
+	.func		= bpf_netpolicy,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+};
+#endif
+
 static const struct bpf_func_proto *
 bpf_get_skb_set_tunnel_proto(enum bpf_func_id which)
 {
@@ -2515,6 +2547,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_smp_processor_id_proto;
 	case BPF_FUNC_skb_under_cgroup:
 		return &bpf_skb_under_cgroup_proto;
+#ifdef CONFIG_NETPOLICY
+	case BPF_FUNC_netpolicy:
+		return &bpf_netpolicy_proto;
+#endif
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 12b7304..4aedbb9 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -85,6 +85,7 @@ always += xdp2_kern.o
 always += test_current_task_under_cgroup_kern.o
 always += trace_event_kern.o
 always += sampleip_kern.o
+always += netpolicy_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index 90f44bd..b295bbc 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -88,6 +88,8 @@ static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flag
 	(void *) BPF_FUNC_l4_csum_replace;
 static int (*bpf_skb_under_cgroup)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_skb_under_cgroup;
+static int (*bpf_netpolicy)(void *ctx) =
+	(void *) BPF_FUNC_netpolicy;
 
 #if defined(__x86_64__)
 
-- 
2.5.5

  parent reply	other threads:[~2016-09-12 14:56 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-12 14:55 [RFC V3 PATCH 00/26] Kernel NET policy kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 01/26] net: introduce " kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 02/26] net/netpolicy: init " kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 03/26] net/netpolicy: get device queue irq information kan.liang
2016-09-12 16:48   ` Sergei Shtylyov
2016-09-13 12:23     ` Liang, Kan
2016-09-13 13:14       ` Alexander Duyck
2016-09-13 13:22         ` Liang, Kan
2016-09-12 14:55 ` [RFC V3 PATCH 04/26] net/netpolicy: get CPU information kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 05/26] net/netpolicy: create CPU and queue mapping kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 06/26] net/netpolicy: set and remove IRQ affinity kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 07/26] net/netpolicy: enable and disable NET policy kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 08/26] net/netpolicy: introduce NET policy object kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 09/26] net/netpolicy: set NET policy by policy name kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 10/26] net/netpolicy: add three new NET policies kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 11/26] net/netpolicy: add MIX policy kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 12/26] net/netpolicy: NET device hotplug kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 13/26] net/netpolicy: support CPU hotplug kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 14/26] net/netpolicy: handle channel changes kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 15/26] net/netpolicy: implement netpolicy register kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 16/26] net/netpolicy: introduce per socket netpolicy kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 17/26] net/netpolicy: introduce netpolicy_pick_queue kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 18/26] net/netpolicy: set tx queues according to policy kan.liang
2016-09-12 20:23   ` Tom Herbert
2016-09-13 12:22     ` Liang, Kan
2016-09-12 14:55 ` kan.liang [this message]
2016-09-12 14:55 ` [RFC V3 PATCH 20/26] net/netpolicy: set Rx " kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 21/26] net/netpolicy: introduce per task net policy kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 22/26] net/netpolicy: set per task policy by proc kan.liang
2016-09-12 17:01   ` Sergei Shtylyov
2016-09-12 14:55 ` [RFC V3 PATCH 23/26] net/netpolicy: fast path for finding the queues kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 24/26] net/netpolicy: optimize for queue pair kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 25/26] net/netpolicy: limit the total record number kan.liang
2016-09-12 14:55 ` [RFC V3 PATCH 26/26] Documentation/networking: Document NET policy kan.liang
2016-09-12 15:38 ` [RFC V3 PATCH 00/26] Kernel " Florian Westphal
2016-09-12 17:21   ` Cong Wang
2016-09-12 15:52 ` Eric Dumazet
2016-09-19 20:39   ` Stephen Hemminger

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1473692159-4017-20-git-send-email-kan.liang@intel.com \
    --to=kan.liang@intel.com \
    --cc=aduyck@mirantis.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.duyck@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andi@firstfloor.org \
    --cc=ben@decadent.org.uk \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=decot@googlers.com \
    --cc=fw@strlen.de \
    --cc=gorcunov@openvz.org \
    --cc=hannes@stressinduktion.org \
    --cc=jeffrey.t.kirsher@intel.com \
    --cc=jesse.brandeburg@intel.com \
    --cc=jmorris@namei.org \
    --cc=john.stultz@linaro.org \
    --cc=kaber@trash.net \
    --cc=keescook@chromium.org \
    --cc=kuznet@ms2.inr.ac.ru \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=peterz@infradead.org \
    --cc=rdunlap@infradead.org \
    --cc=stephen@networkplumber.org \
    --cc=tom@herbertland.com \
    --cc=viro@zeniv.linux.org.uk \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yoshfuji@linux-ipv6.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).