netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
@ 2007-05-30  9:40 Patrick McHardy
  2007-05-30  9:40 ` [RFC NET_SCHED 01/02]: sch_sfq: add support for external classifiers Patrick McHardy
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Patrick McHardy @ 2007-05-30  9:40 UTC (permalink / raw)
  To: netdev; +Cc: Patrick McHardy, hadi

One good thing about ESFQ is the more flexible flow classification, but
I don't like the concept of having a set of selectable hash functions
very much.

These patches change SFQ to allow attaching external classifiers and add
a new "flow" classifier that allows to classify flows based on an arbitary
combination of pre-defined keys. Its probably not the fastest classifier
when used with multiple keys, but frankly, I don't think speed is very
important in most situations where the current SFQ implementation is used.

It currently does not support perturbation, I didn't want to move this into
the classifier, so I need to think about a way to handle it within SFQ.


Some examples:

# behave identical to internal SFQ hash
tc filter add ... flow baseclass x:1 classes 1024 \
		       keys src,dst,proto-src,proto-dst

# the same, but based on source address/port before NAT
tc filter add ... flow baseclass x:1 classes 1024 \
		       keys nfct-src,dst,nfct-proto-src,proto-dst

# classify based on UID
tc filter add ... flow baseclass x:1 classes 1024 \
		       keys sk-uid


and so on .. check out the iproute help text for the full set of supported
keys.


Comments welcome.


 include/linux/pkt_cls.h |   37 +++
 net/sched/Kconfig       |   11 
 net/sched/Makefile      |    1 
 net/sched/cls_flow.c    |  570 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/sched/sch_sfq.c     |   98 +++++++-
 5 files changed, 713 insertions(+), 4 deletions(-)

Patrick McHardy (2):
      [NET_SCHED]: sch_sfq: add support for external classifiers
      [NET_SCHED]: Add flow classifier

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [RFC NET_SCHED 01/02]: sch_sfq: add support for external classifiers
  2007-05-30  9:40 [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Patrick McHardy
@ 2007-05-30  9:40 ` Patrick McHardy
  2007-05-30  9:40 ` [RFC NET_SCHED 02/02]: Add flow classifier Patrick McHardy
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: Patrick McHardy @ 2007-05-30  9:40 UTC (permalink / raw)
  To: netdev; +Cc: Patrick McHardy, hadi

[NET_SCHED]: sch_sfq: add support for external classifiers

Add support for external classifiers to allow using different flow hash
functions similar to ESFQ. As long as no classifier is attached the
built-in hash is used as before.

Signed-off-by: Patrick McHardy <kaber@trash.net>

---
commit 666e402224e5ca36af0b5a07d424b9c092bce91e
tree f0e020de640693ba9601cc2e4b82daba7bf03689
parent c420bc9f09a0926b708c3edb27eacba434a4f4ba
author Patrick McHardy <kaber@trash.net> Wed, 30 May 2007 11:21:05 +0200
committer Patrick McHardy <kaber@trash.net> Wed, 30 May 2007 11:21:05 +0200

 net/sched/sch_sfq.c |   98 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 96dfdf7..42cd4fc 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -108,6 +108,7 @@ struct sfq_sched_data
 	int		limit;
 
 /* Variables */
+	struct tcf_proto *filter_list;
 	struct timer_list perturb_timer;
 	int		perturbation;
 	sfq_index	tail;		/* Index of current slot in round */
@@ -172,6 +173,42 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
 	return sfq_fold_hash(q, h, h2);
 }
 
+static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
+				 int *qerr)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+	struct tcf_result res;
+	int result;
+
+	if (TC_H_MAJ(skb->priority) == sch->handle &&
+	    TC_H_MIN(skb->priority) > 0 &&
+	    TC_H_MIN(skb->priority) <= SFQ_HASH_DIVISOR)
+		return TC_H_MIN(skb->priority);
+
+	if (!q->filter_list)
+		return sfq_hash(q, skb) + 1;
+
+	*qerr = NET_XMIT_BYPASS;
+	result = tc_classify(skb, q->filter_list, &res);
+	if (result >= 0) {
+#ifdef CONFIG_NET_CLS_ACT
+		switch (result) {
+		case TC_ACT_STOLEN:
+		case TC_ACT_QUEUED:
+			*qerr = NET_XMIT_SUCCESS;
+		case TC_ACT_SHOT:
+			return 0;
+		}
+#elif defined(CONFIG_NET_CLS_POLICE)
+		if (result == TC_POLICE_SHOT)
+			return 0;
+#endif
+		if (TC_H_MIN(res.classid) <= SFQ_HASH_DIVISOR)
+			return TC_H_MIN(res.classid);
+	}
+	return 0;
+}
+
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
@@ -262,8 +299,18 @@ static int
 sfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned hash = sfq_hash(q, skb);
+	unsigned int hash;
 	sfq_index x;
+	int ret;
+
+	hash = sfq_classify(skb, sch, &ret);
+	if (hash == 0) {
+		if (ret == NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+	hash--;
 
 	x = q->ht[hash];
 	if (x == SFQ_DEPTH) {
@@ -298,8 +345,18 @@ static int
 sfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
-	unsigned hash = sfq_hash(q, skb);
+	unsigned int hash;
 	sfq_index x;
+	int ret;
+
+	hash = sfq_classify(skb, sch, &ret);
+	if (hash == 0) {
+		if (ret == NET_XMIT_BYPASS)
+			sch->qstats.drops++;
+		kfree_skb(skb);
+		return ret;
+	}
+	hash--;
 
 	x = q->ht[hash];
 	if (x == SFQ_DEPTH) {
@@ -456,6 +513,8 @@ static int sfq_init(struct Qdisc *sch, struct rtattr *opt)
 static void sfq_destroy(struct Qdisc *sch)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
+
+	tcf_destroy_chain(q->filter_list);
 	del_timer(&q->perturb_timer);
 }
 
@@ -481,9 +540,40 @@ rtattr_failure:
 	return -1;
 }
 
+static int sfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
+			    struct rtattr **tca, unsigned long *arg)
+{
+	return -ENOSYS;
+}
+
+static unsigned long sfq_get(struct Qdisc *sch, u32 classid)
+{
+	return 0;
+}
+
+static struct tcf_proto **sfq_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+	struct sfq_sched_data *q = qdisc_priv(sch);
+
+	if (cl)
+		return NULL;
+	return &q->filter_list;
+}
+
+static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *walker)
+{
+	return;
+}
+
+static struct Qdisc_class_ops sfq_class_ops = {
+	.get		=	sfq_get,
+	.change		=	sfq_change_class,
+	.tcf_chain	=	sfq_find_tcf,
+	.walk		=	sfq_walk,
+};
+
 static struct Qdisc_ops sfq_qdisc_ops = {
-	.next		=	NULL,
-	.cl_ops		=	NULL,
+	.cl_ops		=	&sfq_class_ops,
 	.id		=	"sfq",
 	.priv_size	=	sizeof(struct sfq_sched_data),
 	.enqueue	=	sfq_enqueue,

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [RFC NET_SCHED 02/02]: Add flow classifier
  2007-05-30  9:40 [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Patrick McHardy
  2007-05-30  9:40 ` [RFC NET_SCHED 01/02]: sch_sfq: add support for external classifiers Patrick McHardy
@ 2007-05-30  9:40 ` Patrick McHardy
  2007-05-30 11:18 ` [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Andy Furniss
  2007-05-30 14:56 ` jamal
  3 siblings, 0 replies; 14+ messages in thread
From: Patrick McHardy @ 2007-05-30  9:40 UTC (permalink / raw)
  To: netdev; +Cc: Patrick McHardy, hadi

[NET_SCHED]: Add flow classifier

Signed-off-by: Patrick McHardy <kaber@trash.net>

---
commit e98a4dd4f6cef4d46f180dee2e91e7f359a854c3
tree f56ea6da8115c5ee9fbc30421f9c4e392a8c55a7
parent 666e402224e5ca36af0b5a07d424b9c092bce91e
author Patrick McHardy <kaber@trash.net> Wed, 30 May 2007 11:22:49 +0200
committer Patrick McHardy <kaber@trash.net> Wed, 30 May 2007 11:22:49 +0200

 include/linux/pkt_cls.h |   37 +++
 net/sched/Kconfig       |   11 +
 net/sched/Makefile      |    1 
 net/sched/cls_flow.c    |  570 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 619 insertions(+), 0 deletions(-)

diff --git a/include/linux/pkt_cls.h b/include/linux/pkt_cls.h
index c3f01b3..0137591 100644
--- a/include/linux/pkt_cls.h
+++ b/include/linux/pkt_cls.h
@@ -328,6 +328,43 @@ enum
 
 #define TCA_TCINDEX_MAX     (__TCA_TCINDEX_MAX - 1)
 
+/* Flow filter */
+
+enum
+{
+	FLOW_KEY_SRC,
+	FLOW_KEY_DST,
+	FLOW_KEY_PROTO_SRC,
+	FLOW_KEY_PROTO_DST,
+	FLOW_KEY_PRIORITY,
+	FLOW_KEY_MARK,
+	FLOW_KEY_NFCT,
+	FLOW_KEY_NFCT_SRC,
+	FLOW_KEY_NFCT_DST,
+	FLOW_KEY_NFCT_PROTO_SRC,
+	FLOW_KEY_NFCT_PROTO_DST,
+	FLOW_KEY_RTIIF,
+	FLOW_KEY_RTCLASSID,
+	FLOW_KEY_SKUID,
+	FLOW_KEY_SKGID,
+	__FLOW_KEY_MAX,
+};
+
+#define FLOW_KEY_MAX	(__FLOW_KEY_MAX - 1)
+
+enum
+{
+	TCA_FLOW_UNSPEC,
+	TCA_FLOW_KEYS,
+	TCA_FLOW_BASECLASS,
+	TCA_FLOW_CLASSES,
+	TCA_FLOW_ACT,
+	TCA_FLOW_POLICE,
+	__TCA_FLOW_MAX
+};
+
+#define TCA_FLOW_MAX	(__TCA_FLOW_MAX - 1)
+
 /* Basic filter */
 
 enum
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 475df84..6f33a79 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -302,6 +302,17 @@ config NET_CLS_RSVP6
 	  To compile this code as a module, choose M here: the
 	  module will be called cls_rsvp6.
 
+config NET_CLS_FLOW
+	tristate "Flow classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  a configurable combination of packet keys. This is mostly useful
+	  in combination with SFQ.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called cls_flow.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 020767a..54ad85a 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -35,6 +35,7 @@ obj-$(CONFIG_NET_CLS_RSVP)	+= cls_rsvp.o
 obj-$(CONFIG_NET_CLS_TCINDEX)	+= cls_tcindex.o
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
+obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
new file mode 100644
index 0000000..ebb79ea
--- /dev/null
+++ b/net/sched/cls_flow.c
@@ -0,0 +1,570 @@
+/*
+ * net/sched/cls_flow.c		Generic flow classifier
+ *
+ * Copyright (c) 2007 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/jhash.h>
+#include <linux/pkt_cls.h>
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/route.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined (CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+
+struct flow_head
+{
+	struct list_head	filters;
+};
+
+struct flow_filter
+{
+	struct list_head	list;
+	struct tcf_exts		exts;
+	u32			handle;
+	u32			nkeys;
+	u32			keymask;
+	u32			baseclass;
+	u16			classes;
+};
+
+static struct tcf_ext_map flow_ext_map = {
+	.action	= TCA_FLOW_ACT,
+	.police	= TCA_FLOW_POLICE,
+};
+
+static inline u32 addr_fold(void *addr)
+{
+	unsigned long a = (unsigned long)addr;
+
+	return (a & 0xFFFFFFFF) ^ (BITS_PER_LONG > 32 ? a >> 32 : 0);
+}
+
+static u32 flow_get_src(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		return ip_hdr(skb)->saddr;
+	case __constant_htons(ETH_P_IPV6):
+		return ipv6_hdr(skb)->saddr.s6_addr[3];
+	default:
+		return addr_fold(skb->sk);
+	}
+}
+
+static u32 flow_get_dst(const struct sk_buff *skb)
+{
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		return ip_hdr(skb)->daddr;
+	case __constant_htons(ETH_P_IPV6):
+		return ipv6_hdr(skb)->daddr.s6_addr[3];
+	default:
+		return addr_fold(skb->dst) ^ skb->protocol;
+	}
+}
+
+static int has_ports(int protocol)
+{
+	switch (protocol) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_UDPLITE:
+	case IPPROTO_SCTP:
+	case IPPROTO_DCCP:
+	case IPPROTO_ESP:
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static u32 flow_get_proto_src(const struct sk_buff *skb)
+{
+	u32 res;
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP): {
+		struct iphdr *iph = ip_hdr(skb);
+
+		res = iph->protocol;
+		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+		    has_ports(iph->protocol))
+			res ^= *(u16 *)((void *)iph + iph->ihl * 4);
+		break;
+	}
+	case __constant_htons(ETH_P_IPV6): {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		res = iph->nexthdr;
+		if (has_ports(iph->nexthdr))
+			res ^= *(u16 *)&iph[1];
+		break;
+	}
+	default:
+		res = addr_fold(skb->sk);
+	}
+
+	return res;
+}
+
+static u32 flow_get_proto_dst(const struct sk_buff *skb)
+{
+	u32 res;
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP): {
+		struct iphdr *iph = ip_hdr(skb);
+
+		res = iph->protocol;
+		if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+		    has_ports(iph->protocol))
+			res ^= *(u16 *)((void *)iph + iph->ihl * 4 + 2);
+		break;
+	}
+	case __constant_htons(ETH_P_IPV6): {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+
+		res = iph->nexthdr;
+		if (has_ports(iph->nexthdr))
+			res ^= *(u16 *)((void *)&iph[1] + 2);
+		break;
+	}
+	default:
+		res = addr_fold(skb->dst) ^ skb->protocol;
+	}
+
+	return res;
+}
+
+static u32 flow_get_priority(const struct sk_buff *skb)
+{
+	return skb->priority;
+}
+
+static u32 flow_get_mark(const struct sk_buff *skb)
+{
+	return skb->mark;
+}
+
+static u32 flow_get_nfct(const struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined (CONFIG_NF_CONNTRACK_MODULE)
+	return addr_fold(skb->nfct);
+#else
+	return 0;
+#endif
+}
+
+#define CTTUPLE(skb, member)						\
+({									\
+	enum ip_conntrack_info ctinfo;					\
+	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);			\
+	if (ct == NULL)							\
+		goto fallback;						\
+	ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.member;			\
+})
+
+static u32 flow_get_nfct_src(const struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined (CONFIG_NF_CONNTRACK_MODULE)
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		return CTTUPLE(skb, src.u3.ip);
+	case __constant_htons(ETH_P_IPV6):
+		return CTTUPLE(skb, src.u3.ip6[3]);
+	}
+fallback:
+#endif
+	return flow_get_src(skb);
+}
+
+static u32 flow_get_nfct_dst(const struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined (CONFIG_NF_CONNTRACK_MODULE)
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		return CTTUPLE(skb, dst.u3.ip);
+	case __constant_htons(ETH_P_IPV6):
+		return CTTUPLE(skb, dst.u3.ip6[3]);
+	}
+fallback:
+#endif
+	return flow_get_dst(skb);
+}
+
+static u32 flow_get_nfct_proto_src(const struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined (CONFIG_NF_CONNTRACK_MODULE)
+	return CTTUPLE(skb, src.u.all);
+fallback:
+#endif
+	return flow_get_proto_src(skb);
+}
+
+static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb)
+{
+#if defined(CONFIG_NF_CONNTRACK) || defined (CONFIG_NF_CONNTRACK_MODULE)
+	return CTTUPLE(skb, dst.u.all);
+fallback:
+#endif
+	return flow_get_proto_dst(skb);
+}
+
+static u32 flow_get_rtclassid(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_CLS_ROUTE
+	if (skb->dst)
+		return skb->dst->tclassid;
+#endif
+	return 0;
+}
+
+static u32 flow_get_rtiif(const struct sk_buff *skb)
+{
+	if (skb->dst && skb->dst->ops->family == AF_INET)
+		return ((struct rtable *)skb->dst)->fl.iif;
+	return 0;
+}
+
+static u32 flow_get_skuid(const struct sk_buff *skb)
+{
+	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
+		return skb->sk->sk_socket->file->f_uid;
+	return 0;
+}
+
+static u32 flow_get_skgid(const struct sk_buff *skb)
+{
+	if (skb->sk && skb->sk->sk_socket && skb->sk->sk_socket->file)
+		return skb->sk->sk_socket->file->f_gid;
+	return 0;
+}
+
+static u32 flow_key_get(const struct sk_buff *skb, int key)
+{
+	switch (key) {
+	case FLOW_KEY_SRC:
+		return flow_get_src(skb);
+	case FLOW_KEY_DST:
+		return flow_get_dst(skb);
+	case FLOW_KEY_PROTO_SRC:
+		return flow_get_proto_src(skb);
+	case FLOW_KEY_PROTO_DST:
+		return flow_get_proto_dst(skb);
+	case FLOW_KEY_PRIORITY:
+		return flow_get_priority(skb);
+	case FLOW_KEY_MARK:
+		return flow_get_mark(skb);
+	case FLOW_KEY_NFCT:
+		return flow_get_nfct(skb);
+	case FLOW_KEY_NFCT_SRC:
+		return flow_get_nfct_src(skb);
+	case FLOW_KEY_NFCT_DST:
+		return flow_get_nfct_dst(skb);
+	case FLOW_KEY_NFCT_PROTO_SRC:
+		return flow_get_nfct_proto_src(skb);
+	case FLOW_KEY_NFCT_PROTO_DST:
+		return flow_get_nfct_proto_dst(skb);
+	case FLOW_KEY_RTIIF:
+		return flow_get_rtiif(skb);
+	case FLOW_KEY_RTCLASSID:
+		return flow_get_rtclassid(skb);
+	case FLOW_KEY_SKUID:
+		return flow_get_skuid(skb);
+	case FLOW_KEY_SKGID:
+		return flow_get_skgid(skb);
+	default:
+		BUG();
+		return 0;
+	}
+}
+
+static int flow_classify(struct sk_buff *skb, struct tcf_proto *tp,
+			 struct tcf_result *res)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f;
+	u32 keymask;
+	u32 classid;
+	unsigned int n, key;
+	int r;
+
+	list_for_each_entry(f, &head->filters, list) {
+		u32 keys[f->nkeys];
+
+		keymask = f->keymask;
+
+		for (n = 0; n < f->nkeys; n++) {
+			key = ffs(keymask) - 1;
+			keymask &= ~(1 << key);
+			keys[n] = flow_key_get(skb, key);
+		}
+
+		classid = jhash2(keys, f->nkeys, 0);
+		if (f->classes)
+			classid %= f->classes;
+
+		res->class   = 0;
+		res->classid = TC_H_MAKE(f->baseclass, f->baseclass + classid);
+
+		r = tcf_exts_exec(skb, &f->exts, res);
+		if (r < 0)
+			continue;
+		return r;
+	}
+	return -1;
+}
+
+static int flow_change(struct tcf_proto *tp, unsigned long base,
+		       u32 handle, struct rtattr **tca,
+		       unsigned long *arg)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f;
+	struct rtattr *opt = tca[TCA_OPTIONS-1];
+	struct rtattr *tb[TCA_FLOW_MAX];
+	struct tcf_exts e;
+	unsigned int nkeys = 0;
+	u32 baseclass = 0;
+	u32 keymask = 0;
+	int err;
+
+	if (opt == NULL)
+		return -EINVAL;
+
+	err = rtattr_parse_nested(tb, TCA_FLOW_MAX, opt);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_FLOW_BASECLASS-1]) {
+		if (RTA_PAYLOAD(tb[TCA_FLOW_BASECLASS-1]) < sizeof(u32))
+			return -EINVAL;
+		baseclass = *(u32 *)RTA_DATA(tb[TCA_FLOW_BASECLASS-1]);
+		if (TC_H_MIN(baseclass) == 0)
+			return -EINVAL;
+	}
+
+	if (tb[TCA_FLOW_CLASSES-1] &&
+	    RTA_PAYLOAD(tb[TCA_FLOW_CLASSES-1]) < sizeof(u16))
+		return -EINVAL;
+
+	if (tb[TCA_FLOW_KEYS-1]) {
+		if (RTA_PAYLOAD(tb[TCA_FLOW_KEYS-1]) < sizeof(u32))
+			return -EINVAL;
+
+		keymask = *(u32 *)RTA_DATA(tb[TCA_FLOW_KEYS-1]);
+		if (fls(keymask) - 1 > FLOW_KEY_MAX)
+			return -EOPNOTSUPP;
+
+		nkeys = hweight32(keymask);
+		if (nkeys == 0)
+			return -EINVAL;
+	}
+
+	err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &flow_ext_map);
+	if (err < 0)
+		return err;
+
+	f = (struct flow_filter *)*arg;
+	if (f != NULL) {
+		err = -EINVAL;
+		if (f->handle != handle && handle)
+			goto errout;
+	} else {
+		err = -EINVAL;
+		if (!handle)
+			goto errout;
+		if (!tb[TCA_FLOW_KEYS-1])
+			goto errout;
+
+		if (TC_H_MAJ(baseclass) == 0)
+			baseclass = TC_H_MAKE(tp->q->handle, baseclass);
+		if (TC_H_MIN(baseclass) == 0)
+			baseclass = TC_H_MAKE(baseclass, 1);
+
+		err = -ENOBUFS;
+		f = kzalloc(sizeof(*f), GFP_KERNEL);
+		if (f == NULL)
+			goto errout;
+
+		f->handle = handle;
+	}
+
+	if (tb[TCA_FLOW_KEYS-1]) {
+		f->keymask = keymask;
+		f->nkeys   = nkeys;
+	}
+	if (baseclass)
+		f->baseclass = baseclass;
+	if (tb[TCA_FLOW_CLASSES-1])
+		f->classes = *(u16 *)RTA_DATA(tb[TCA_FLOW_CLASSES-1]);
+	tcf_exts_change(tp, &f->exts, &e);
+
+	tcf_tree_lock(tp);
+	if (*arg == 0)
+		list_add(&f->list, &head->filters);
+	tcf_tree_unlock(tp);
+
+	*arg = (unsigned long)f;
+	return 0;
+
+errout:
+	tcf_exts_destroy(tp, &e);
+	return err;
+}
+
+static void flow_destroy_filter(struct tcf_proto *tp, struct flow_filter *f)
+{
+	tcf_exts_destroy(tp, &f->exts);
+	kfree(f);
+}
+
+static int flow_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct flow_filter *f = (struct flow_filter *)arg;
+
+	tcf_tree_lock(tp);
+	list_del(&f->list);
+	tcf_tree_unlock(tp);
+	flow_destroy_filter(tp, f);
+	return 0;
+}
+
+static int flow_init(struct tcf_proto *tp)
+{
+	struct flow_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (head == NULL)
+		return -ENOBUFS;
+	INIT_LIST_HEAD(&head->filters);
+	tp->root = head;
+	return 0;
+}
+
+static void flow_destroy(struct tcf_proto *tp)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f, *next;
+
+	list_for_each_entry_safe(f, next, &head->filters, list) {
+		list_del(&f->list);
+		flow_destroy_filter(tp, f);
+	}
+	kfree(head);
+}
+
+static unsigned long flow_get(struct tcf_proto *tp, u32 handle)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f;
+
+	list_for_each_entry(f, &head->filters, list)
+		if (f->handle == handle)
+			return (unsigned long)f;
+	return 0;
+}
+
+static void flow_put(struct tcf_proto *tp, unsigned long f)
+{
+	return;
+}
+
+static int flow_dump(struct tcf_proto *tp, unsigned long fh,
+		     struct sk_buff *skb, struct tcmsg *t)
+{
+	struct flow_filter *f = (struct flow_filter *)fh;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct rtattr *rta;
+
+	if (f == NULL)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	rta = (struct rtattr *)b;
+	RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
+
+	RTA_PUT(skb, TCA_FLOW_KEYS, sizeof(u32), &f->keymask);
+	if (f->baseclass)
+		RTA_PUT(skb, TCA_FLOW_BASECLASS, sizeof(u32), &f->baseclass);
+	if (f->classes)
+		RTA_PUT(skb, TCA_FLOW_CLASSES, sizeof(u16), &f->classes);
+
+	if (tcf_exts_dump(skb, &f->exts, &flow_ext_map) < 0)
+		goto rtattr_failure;
+
+	rta->rta_len = skb_tail_pointer(skb) - b;
+
+	if (tcf_exts_dump_stats(skb, &f->exts, &flow_ext_map) < 0)
+		goto rtattr_failure;
+
+	return skb->len;
+
+rtattr_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct flow_head *head = tp->root;
+	struct flow_filter *f;
+
+	list_for_each_entry(f, &head->filters, list) {
+		if (arg->count < arg->skip)
+			goto skip;
+		if (arg->fn(tp, (unsigned long)f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static struct tcf_proto_ops cls_flow_ops = {
+	.kind		= "flow",
+	.classify	= flow_classify,
+	.init		= flow_init,
+	.destroy	= flow_destroy,
+	.change		= flow_change,
+	.delete		= flow_delete,
+	.get		= flow_get,
+	.put		= flow_put,
+	.dump		= flow_dump,
+	.walk		= flow_walk,
+	.owner		= THIS_MODULE,
+};
+
+static int __init cls_flow_init(void)
+{
+	return register_tcf_proto_ops(&cls_flow_ops);
+}
+
+static void __exit cls_flow_exit(void)
+{
+	unregister_tcf_proto_ops(&cls_flow_ops);
+}
+
+module_init(cls_flow_init);
+module_exit(cls_flow_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_DESCRIPTION("TC flow classifier");

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30  9:40 [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Patrick McHardy
  2007-05-30  9:40 ` [RFC NET_SCHED 01/02]: sch_sfq: add support for external classifiers Patrick McHardy
  2007-05-30  9:40 ` [RFC NET_SCHED 02/02]: Add flow classifier Patrick McHardy
@ 2007-05-30 11:18 ` Andy Furniss
  2007-05-30 15:32   ` Patrick McHardy
  2007-05-30 14:56 ` jamal
  3 siblings, 1 reply; 14+ messages in thread
From: Andy Furniss @ 2007-05-30 11:18 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev, hadi

Patrick McHardy wrote:
> One good thing about ESFQ is the more flexible flow classification, but
> I don't like the concept of having a set of selectable hash functions
> very much.
> 
> These patches change SFQ to allow attaching external classifiers and add
> a new "flow" classifier that allows to classify flows based on an arbitary
> combination of pre-defined keys. Its probably not the fastest classifier
> when used with multiple keys, but frankly, I don't think speed is very
> important in most situations where the current SFQ implementation is used.
> 
> It currently does not support perturbation, I didn't want to move this into
> the classifier, so I need to think about a way to handle it within SFQ.
>

Cool, but isn't this going to show the same collision problems that the
pre jhash esfq saw?

Andy.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30  9:40 [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Patrick McHardy
                   ` (2 preceding siblings ...)
  2007-05-30 11:18 ` [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Andy Furniss
@ 2007-05-30 14:56 ` jamal
  2007-05-30 15:27   ` Patrick McHardy
  2007-08-09  4:12   ` Paul E. McKenney
  3 siblings, 2 replies; 14+ messages in thread
From: jamal @ 2007-05-30 14:56 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev


On Wed, 2007-30-05 at 11:40 +0200, Patrick McHardy wrote:
> One good thing about ESFQ is the more flexible flow classification, but
> I don't like the concept of having a set of selectable hash functions
> very much.
> 

In the spirit of SFQ it is probably ok to do that; 
i.e iirc,  the idea for justification behind sfq was to provide a
"computationaly feasible/reasonable" way to provide fair queueing with a
gazillion queues. 
Classification was considered damn expensive in those days (when MC68K
was sort of state of the art); it still is but maybe a much much lesser
issue now for what "gazillion" was in those days. So a simple hash on a
few fields with pertubation (the part that makes allows the "stochastic"
part in the name) was deemed the way to go and in order to provide
fairness (while avoiding packet reordering).
So if you want to keep that spirit it is ok to do what ESFQ does;
I think the assumptions will still be valid if you have a gazillion
queues in todays terms. A number like say 128K may make sense.
(Hey Paul M is still around, just too focused on the wrong things like
RCU;-> so we can ask his opinion)

> These patches change SFQ to allow attaching external classifiers and add
> a new "flow" classifier that allows to classify flows based on an arbitary
> combination of pre-defined keys. Its probably not the fastest classifier
> when used with multiple keys, but frankly, I don't think speed is very
> important in most situations where the current SFQ implementation is used.

The only one thing i noticed that changes the behavior is the use of
skb->prio as a selector. I think if you removed that it should be fine.
Another alternative is to create a brand new FQ qdisc and leave the
classification to the classifiers.

> It currently does not support perturbation, I didn't want to move this into
> the classifier, so I need to think about a way to handle it within SFQ.

It is kind of hard to put it back into the current approach because the
basic assumptions of ensuring no re-ordering and a "fast" classifier are
gone.

I am almost tempted to say go back and write a qdisc called FQ.

cheers,
jamal


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 14:56 ` jamal
@ 2007-05-30 15:27   ` Patrick McHardy
  2007-05-30 16:10     ` jamal
  2007-08-09  4:12   ` Paul E. McKenney
  1 sibling, 1 reply; 14+ messages in thread
From: Patrick McHardy @ 2007-05-30 15:27 UTC (permalink / raw)
  To: hadi; +Cc: netdev

jamal wrote:
> On Wed, 2007-30-05 at 11:40 +0200, Patrick McHardy wrote:
> 
>>One good thing about ESFQ is the more flexible flow classification, but
>>I don't like the concept of having a set of selectable hash functions
>>very much.
>>
> 
> 
> In the spirit of SFQ it is probably ok to do that; 
> [..]
> So if you want to keep that spirit it is ok to do what ESFQ does;
> I think the assumptions will still be valid if you have a gazillion
> queues in todays terms. A number like say 128K may make sense.


Sure. The thing I don't like about the predefined hash functions is
that its unflexible.

>>These patches change SFQ to allow attaching external classifiers and add
>>a new "flow" classifier that allows to classify flows based on an arbitary
>>combination of pre-defined keys. Its probably not the fastest classifier
>>when used with multiple keys, but frankly, I don't think speed is very
>>important in most situations where the current SFQ implementation is used.
> 
> 
> The only one thing i noticed that changes the behavior is the use of
> skb->prio as a selector. I think if you removed that it should be fine.


I don't think thats a problem, it needs to point to the correct major
to have any effect, which can only happen if it is set by the user.
I would prefer to keep it for consistency with other qdiscs.

> Another alternative is to create a brand new FQ qdisc and leave the
> classification to the classifiers.


I created a new classifier to leave classification to the classifiers ..
Not sure exactly why I would need a new qdisc to do that :)

>>It currently does not support perturbation, I didn't want to move this into
>>the classifier, so I need to think about a way to handle it within SFQ.
> 
> 
> It is kind of hard to put it back into the current approach because the
> basic assumptions of ensuring no re-ordering and a "fast" classifier are
> gone.


It doesn't affect performance in any way, but I agree that it doesn't
belong in a classifier. But it should be possible to do it in SFQ.

> I am almost tempted to say go back and write a qdisc called FQ.


Funny, last the this came up you suggested to do basically exactly
what this classifier does, which I thought made sense :)

http://www.mail-archive.com/netdev@vger.kernel.org/msg06801.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 11:18 ` [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Andy Furniss
@ 2007-05-30 15:32   ` Patrick McHardy
  2007-05-30 16:57     ` Andy Furniss
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick McHardy @ 2007-05-30 15:32 UTC (permalink / raw)
  To: lists; +Cc: netdev, hadi

Andy Furniss wrote:
> Patrick McHardy wrote:
> 
>> It currently does not support perturbation, I didn't want to move this
>> into
>> the classifier, so I need to think about a way to handle it within SFQ.
>>
> 
> Cool, but isn't this going to show the same collision problems that the
> pre jhash esfq saw?


Perturbation doesn't prevent collisions, it just distributes them
(hopefully evenly). My classifier uses jhash, but that won't prevent
collisions either. Anyways, I'm going to change SFQ so perturbation
can also be used with external classifiers.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 15:27   ` Patrick McHardy
@ 2007-05-30 16:10     ` jamal
  2007-05-30 16:23       ` Patrick McHardy
  0 siblings, 1 reply; 14+ messages in thread
From: jamal @ 2007-05-30 16:10 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

On Wed, 2007-30-05 at 17:27 +0200, Patrick McHardy wrote:
> jamal wrote:

> Sure. The thing I don't like about the predefined hash functions is
> that its unflexible.
> 

agreed.

> > skb->prio as a selector. I think if you removed that it should be fine.
>
> I don't think thats a problem, it needs to point to the correct major
> to have any effect, which can only happen if it is set by the user.
> I would prefer to keep it for consistency with other qdiscs.
> 
> > Another alternative is to create a brand new FQ qdisc and leave the
> > classification to the classifiers.
>
> I created a new classifier to leave classification to the classifiers ..
> Not sure exactly why I would need a new qdisc to do that :)
> 

The caveat/difference with current SFQ is you have allowed the user to
define which queue is selected. It is/was dynamically selected based on
packet header now/before. Thats the main reason i said maybe you
separate the two components totaly. i.e while FQ is useful on its own
and can use other classifiers; a useful classifier IMO for SFQ is one
that rips out the sfq_hash or whatever other schemes used in ESFQ into a
classifier (I suppose then the user can select which hash is used). In
such a classifier you can restore the pertub into it.
I am not sure i made sense.

> > I am almost tempted to say go back and write a qdisc called FQ.
> 
> 
> Funny, last the this came up you suggested to do basically exactly
> what this classifier does, which I thought made sense :)
> 
> http://www.mail-archive.com/netdev@vger.kernel.org/msg06801.html

I am almost sensing i am contradicting myself in that thread;-> It is
hard to tell and i admit to being forgetful - but what i probably meant
is what i said above. 

cheers,
jamal


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 16:10     ` jamal
@ 2007-05-30 16:23       ` Patrick McHardy
  2007-05-30 16:34         ` jamal
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick McHardy @ 2007-05-30 16:23 UTC (permalink / raw)
  To: hadi; +Cc: netdev

jamal wrote:
> On Wed, 2007-30-05 at 17:27 +0200, Patrick McHardy wrote:
> 
>>>Another alternative is to create a brand new FQ qdisc and leave the
>>>classification to the classifiers.
>>
>>I created a new classifier to leave classification to the classifiers ..
>>Not sure exactly why I would need a new qdisc to do that :)
>>
> 
> 
> The caveat/difference with current SFQ is you have allowed the user to
> define which queue is selected.


I think exposing SFQ's queues as classes is a good thing, it allows
you to do whatever classification you want. In fact I'm probably
going to add patch on top to also dump them to userspace. What
remains for SFQ to do is serve the queues evenly.

> It is/was dynamically selected based on
> packet header now/before. Thats the main reason i said maybe you
> separate the two components totaly. i.e while FQ is useful on its own
> and can use other classifiers; a useful classifier IMO for SFQ is one
> that rips out the sfq_hash or whatever other schemes used in ESFQ into a
> classifier (I suppose then the user can select which hash is used). In
> such a classifier you can restore the pertub into it.
> I am not sure i made sense.


My classifier seperates them entirely. The only thing it keeps
in SFQ is the old classifier for compatibility, besides that its
exactly what you say. It should be easily possible to remove it
entirely and use my classifier in a compatible configuration
automatically.

>>>I am almost tempted to say go back and write a qdisc called FQ.
>>
>>
>>Funny, last the this came up you suggested to do basically exactly
>>what this classifier does, which I thought made sense :)
>>
>>http://www.mail-archive.com/netdev@vger.kernel.org/msg06801.html
> 
> 
> I am almost sensing i am contradicting myself in that thread;-> It is
> hard to tell and i admit to being forgetful - but what i probably meant
> is what i said above. 


I'm sensing you didn't look at the patch :)

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 16:23       ` Patrick McHardy
@ 2007-05-30 16:34         ` jamal
  2007-05-30 16:55           ` Patrick McHardy
  0 siblings, 1 reply; 14+ messages in thread
From: jamal @ 2007-05-30 16:34 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

On Wed, 2007-30-05 at 18:23 +0200, Patrick McHardy wrote:

> I think exposing SFQ's queues as classes is a good thing, it allows
> you to do whatever classification you want. In fact I'm probably
> going to add patch on top to also dump them to userspace. 

Yes, that would be useful.

> What
> remains for SFQ to do is serve the queues evenly.

And it does (yes, I looked at the patch;->).  

> My classifier seperates them entirely. The only thing it keeps
> in SFQ is the old classifier for compatibility, besides that its
> exactly what you say. It should be easily possible to remove it
> entirely and use my classifier in a compatible configuration
> automatically.
> 

If you removed it entirely (and had it as a separate classifier) IMO
that would be a better approach. Then what you have is a pure FQ qdisc.
In which case, you leave alone SFQ and have a new qdisc.
The "removed" hashing dynamic classifier would of course be better off
it allowed the user to select a hash algorithm such as the other ones
specified in ESFQ.

cheers,
jamal


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 16:34         ` jamal
@ 2007-05-30 16:55           ` Patrick McHardy
  2007-05-30 17:02             ` jamal
  0 siblings, 1 reply; 14+ messages in thread
From: Patrick McHardy @ 2007-05-30 16:55 UTC (permalink / raw)
  To: hadi; +Cc: netdev

jamal wrote:
> On Wed, 2007-30-05 at 18:23 +0200, Patrick McHardy wrote:
> 
>>My classifier seperates them entirely. The only thing it keeps
>>in SFQ is the old classifier for compatibility, besides that its
>>exactly what you say. It should be easily possible to remove it
>>entirely and use my classifier in a compatible configuration
>>automatically.
>>
> 
> 
> If you removed it entirely (and had it as a separate classifier) IMO
> that would be a better approach. Then what you have is a pure FQ qdisc.
> In which case, you leave alone SFQ and have a new qdisc.


I could do that, but I'm perfectly happy with the qdisc part of SFQ.
Without the classifier SFQ is of course simply a FQ qdisc, all it
cares about is serving queues equally.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 15:32   ` Patrick McHardy
@ 2007-05-30 16:57     ` Andy Furniss
  0 siblings, 0 replies; 14+ messages in thread
From: Andy Furniss @ 2007-05-30 16:57 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev, hadi

Patrick McHardy wrote:
  My classifier uses jhash,

Ahh that's OK - I thought it still used the old sfq hash, which collided 
alot with a low number of consecutive addresses.

Andy.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 16:55           ` Patrick McHardy
@ 2007-05-30 17:02             ` jamal
  0 siblings, 0 replies; 14+ messages in thread
From: jamal @ 2007-05-30 17:02 UTC (permalink / raw)
  To: Patrick McHardy; +Cc: netdev

On Wed, 2007-30-05 at 18:55 +0200, Patrick McHardy wrote:

> I could do that, but I'm perfectly happy with the qdisc part of SFQ.
> Without the classifier SFQ is of course simply a FQ qdisc, all it
> cares about is serving queues equally.

Sure. You are writting the code - your call.

cheers,
jamal



^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [RFC NET_SCHED 00/02]: Flexible SFQ flow classification
  2007-05-30 14:56 ` jamal
  2007-05-30 15:27   ` Patrick McHardy
@ 2007-08-09  4:12   ` Paul E. McKenney
  1 sibling, 0 replies; 14+ messages in thread
From: Paul E. McKenney @ 2007-08-09  4:12 UTC (permalink / raw)
  To: jamal; +Cc: Patrick McHardy, netdev

On Wed, May 30, 2007 at 10:56:27AM -0400, jamal wrote:
> 
> On Wed, 2007-30-05 at 11:40 +0200, Patrick McHardy wrote:
> > One good thing about ESFQ is the more flexible flow classification, but
> > I don't like the concept of having a set of selectable hash functions
> > very much.
> > 
> 
> In the spirit of SFQ it is probably ok to do that; 
> i.e iirc,  the idea for justification behind sfq was to provide a
> "computationaly feasible/reasonable" way to provide fair queueing with a
> gazillion queues. 
> Classification was considered damn expensive in those days (when MC68K
> was sort of state of the art); it still is but maybe a much much lesser
> issue now for what "gazillion" was in those days. So a simple hash on a
> few fields with pertubation (the part that makes allows the "stochastic"
> part in the name) was deemed the way to go and in order to provide
> fairness (while avoiding packet reordering).
> So if you want to keep that spirit it is ok to do what ESFQ does;
> I think the assumptions will still be valid if you have a gazillion
> queues in todays terms. A number like say 128K may make sense.
> (Hey Paul M is still around, just too focused on the wrong things like
> RCU;-> so we can ask his opinion)

Not only that, he is -really- slow about getting to some of his email
sometimes.  :-/  In my defense, if you CC me, I will spot it more quickly.

Yeah, the 1980s were indeed over a -long- time ago, and architectures
certainly have changed.  I wrote my first parallel kernel code about
a year after the SFQ paper was published, and not too many years after
that began my longstanding RCU habit.  And not only did I inhale at
the time, I am still inhaling deeply.  ;-)

In any case, if you can feasibly do an exact classification, then doing so
would most likely be better than using SFQ.  And with today's hardware,
exact classifications are much reasonable than they were back on my old
handful-of-MHz 68K-based Sun workstation.  If an exact classification is
unworkable, but you absolutely have to maintain perfect ordering, then
one approach is to let the SFQ drain before perturbing the hash function.

One way to do this is to have a pair of hash tables, and switch back and
forth between them on each perturbation.  Perturbation then goes as follows:

1.	Select a new hash perturbation value, and associate it with the
	currently-unused hash table (call it B).

2.	Swing pointers so that subsequent incoming packets go to B.

3.	Continue outputting from A until it is drained.  If the
	perturbation is lockless, you might need to ensure that a
	grace period passes during this drain operation.  (Yep, still
	inhaling!!!)

4.	Start outputting packets from B.

But there might be better approaches.

> > These patches change SFQ to allow attaching external classifiers and add
> > a new "flow" classifier that allows to classify flows based on an arbitary
> > combination of pre-defined keys. Its probably not the fastest classifier
> > when used with multiple keys, but frankly, I don't think speed is very
> > important in most situations where the current SFQ implementation is used.
> 
> The only one thing i noticed that changes the behavior is the use of
> skb->prio as a selector. I think if you removed that it should be fine.
> Another alternative is to create a brand new FQ qdisc and leave the
> classification to the classifiers.
> 
> > It currently does not support perturbation, I didn't want to move this into
> > the classifier, so I need to think about a way to handle it within SFQ.
> 
> It is kind of hard to put it back into the current approach because the
> basic assumptions of ensuring no re-ordering and a "fast" classifier are
> gone.
> 
> I am almost tempted to say go back and write a qdisc called FQ.

And this might well be a better approach.  ;-)

							Thanx, Paul

> cheers,
> jamal
> 
> -
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2007-08-09  4:12 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-05-30  9:40 [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Patrick McHardy
2007-05-30  9:40 ` [RFC NET_SCHED 01/02]: sch_sfq: add support for external classifiers Patrick McHardy
2007-05-30  9:40 ` [RFC NET_SCHED 02/02]: Add flow classifier Patrick McHardy
2007-05-30 11:18 ` [RFC NET_SCHED 00/02]: Flexible SFQ flow classification Andy Furniss
2007-05-30 15:32   ` Patrick McHardy
2007-05-30 16:57     ` Andy Furniss
2007-05-30 14:56 ` jamal
2007-05-30 15:27   ` Patrick McHardy
2007-05-30 16:10     ` jamal
2007-05-30 16:23       ` Patrick McHardy
2007-05-30 16:34         ` jamal
2007-05-30 16:55           ` Patrick McHardy
2007-05-30 17:02             ` jamal
2007-08-09  4:12   ` Paul E. McKenney

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).