From: Andy Furniss <andy.furniss@dsl.pipex.com>
To: lartc@vger.kernel.org
Subject: Re: [LARTC] ESFQ?
Date: Tue, 11 Jan 2005 15:28:08 +0000 [thread overview]
Message-ID: <41E3F088.6060708@dsl.pipex.com> (raw)
In-Reply-To: <41DAB1B4.6030902@expertron.co.za>
[-- Attachment #1: Type: text/plain, Size: 1025 bytes --]
Justin Schoeman wrote:
> Woohoo - that would be great!
>
> -justin
>
> Andy Furniss wrote:
>
>> Justin Schoeman wrote:
>>
>>> Ouch... Is there any other way to do host-based fair sharing (well,
>>> other than actually classifying each host :-( )?
>>
>>
>>
>> I don't think it will take much to get it to work - though I haven't
>> tried :-) .
>>
>> I'll have a look at doing a 2.6.10 in the next few days.
Well I gave it a go (first patches I've made) and they work for me
though Thomas or Stephen may notice something :-) .
Hopefully they won't be needed in the future if Thomas gets esfq in
mainline.
They are based on Alexander Clouters patches at www.digriz.org.uk. I
only used the first iproute one.
I was hampered a bit because kernel.org have turned off the diff viewer.
The remove db iproute patch is from LFS, you may not need it if you have
Berkley DB installed ( search for db_185.h ).
If you don't have it *and* you don't use arpd then use the patch, it
just removes arpd from the build.
Andy.
[-- Attachment #2: esfq-iproute2-2.6.9-041019.patch --]
[-- Type: text/plain, Size: 5595 bytes --]
diff -urN iproute2-2.6.9.orig/include/linux/pkt_sched.h iproute2-2.6.9/include/linux/pkt_sched.h
--- iproute2-2.6.9.orig/include/linux/pkt_sched.h 2004-10-19 21:49:02.000000000 +0100
+++ iproute2-2.6.9/include/linux/pkt_sched.h 2005-01-11 11:46:45.395401296 +0000
@@ -126,6 +126,13 @@
/* SFQ section */
+enum
+{
+ TCA_SFQ_HASH_CLASSIC,
+ TCA_SFQ_HASH_DST,
+ TCA_SFQ_HASH_SRC,
+};
+
struct tc_sfq_qopt
{
unsigned quantum; /* Bytes per round allocated to flow */
@@ -133,6 +140,7 @@
__u32 limit; /* Maximal packets in queue */
unsigned divisor; /* Hash divisor */
unsigned flows; /* Maximal number of flows */
+ unsigned hash_kind; /* Hash function to use for flow identification */
};
/*
@@ -142,6 +150,8 @@
*
* The only reason for this is efficiency, it is possible
* to change these parameters in compile time.
+ *
+ * If you need to play with this values use esfq
*/
/* RED section */
diff -urN iproute2-2.6.9.orig/tc/Makefile iproute2-2.6.9/tc/Makefile
--- iproute2-2.6.9.orig/tc/Makefile 2004-10-19 21:49:02.000000000 +0100
+++ iproute2-2.6.9/tc/Makefile 2005-01-11 11:46:45.396401144 +0000
@@ -6,6 +6,7 @@
TCMODULES :=
TCMODULES += q_fifo.o
TCMODULES += q_sfq.o
+TCMODULES += q_esfq.o
TCMODULES += q_red.o
TCMODULES += q_prio.o
TCMODULES += q_tbf.o
diff -urN iproute2-2.6.9.orig/tc/q_esfq.c iproute2-2.6.9/tc/q_esfq.c
--- iproute2-2.6.9.orig/tc/q_esfq.c 1970-01-01 01:00:00.000000000 +0100
+++ iproute2-2.6.9/tc/q_esfq.c 2005-01-11 11:47:29.424707824 +0000
@@ -0,0 +1,168 @@
+/*
+ * q_esfq.c ESFQ.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes: Alexander Atanasov, <alex@ssi.bg>
+ * Added depth,limit,divisor,hash_kind options.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+ fprintf(stderr, "Usage: ... esfq [ perturb SECS ] [ quantum BYTES ] [ depth FLOWS ]\n\t[ divisor HASHBITS ] [ limit PKTS ] [ hash HASHTYPE]\n");
+ fprintf(stderr,"Where: \n");
+ fprintf(stderr,"HASHTYPE := { classic | src | dst }\n");
+}
+
+#define usage() return(-1)
+
+static int esfq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+ int ok=0;
+ struct tc_sfq_qopt opt;
+
+ memset(&opt, 0, sizeof(opt));
+
+ opt.hash_kind= TCA_SFQ_HASH_CLASSIC;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "quantum") == 0) {
+ NEXT_ARG();
+ if (get_size(&opt.quantum, *argv)) {
+ fprintf(stderr, "Illegal \"quantum\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "perturb") == 0) {
+ NEXT_ARG();
+ if (get_integer(&opt.perturb_period, *argv, 0)) {
+ fprintf(stderr, "Illegal \"perturb\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "depth") == 0) {
+ NEXT_ARG();
+ if (get_integer(&opt.flows, *argv, 0)) {
+ fprintf(stderr, "Illegal \"depth\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "divisor") == 0) {
+ NEXT_ARG();
+ if (get_integer(&opt.divisor, *argv, 0)) {
+ fprintf(stderr, "Illegal \"divisor\"\n");
+ return -1;
+ }
+ if(opt.divisor >= 15) {
+ fprintf(stderr, "Illegal \"divisor\" must be < 15\n");
+ return -1;
+ }
+ opt.divisor=pow(2,opt.divisor);
+ ok++;
+ } else if (strcmp(*argv, "limit") == 0) {
+ NEXT_ARG();
+ if (get_integer(&opt.limit, *argv, 0)) {
+ fprintf(stderr, "Illegal \"limit\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "hash") == 0) {
+ NEXT_ARG();
+ if(strcmp(*argv,"classic") == 0) {
+ opt.hash_kind= TCA_SFQ_HASH_CLASSIC;
+ } else
+ if(strcmp(*argv,"dst") == 0) {
+ opt.hash_kind= TCA_SFQ_HASH_DST;
+ } else
+ if(strcmp(*argv,"src") == 0) {
+ opt.hash_kind= TCA_SFQ_HASH_SRC;
+ } else {
+ fprintf(stderr, "Illegal \"hash\"\n");
+ explain();
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+ if (ok)
+ addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
+ return 0;
+}
+
+static int esfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct tc_sfq_qopt *qopt;
+ SPRINT_BUF(b1);
+
+ if (opt == NULL)
+ return 0;
+
+ if (RTA_PAYLOAD(opt) < sizeof(*qopt))
+ return -1;
+ qopt = RTA_DATA(opt);
+ fprintf(f, "quantum %s ", sprint_size(qopt->quantum, b1));
+ if (show_details) {
+ fprintf(f, "limit %up flows %u/%u ",
+ qopt->limit, qopt->flows, qopt->divisor);
+ }
+ if (qopt->perturb_period)
+ fprintf(f, "perturb %dsec ", qopt->perturb_period);
+
+ fprintf(f,"hash: ");
+ switch(qopt->hash_kind)
+ {
+ case TCA_SFQ_HASH_CLASSIC:
+ fprintf(f,"classic");
+ break;
+ case TCA_SFQ_HASH_DST:
+ fprintf(f,"dst");
+ break;
+ case TCA_SFQ_HASH_SRC:
+ fprintf(f,"src");
+ break;
+ default:
+ fprintf(f,"Unknown");
+ }
+ return 0;
+}
+
+static int esfq_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+ return 0;
+}
+
+
+struct qdisc_util esfq_qdisc_util = {
+ .id = "esfq",
+ .parse_qopt = esfq_parse_opt,
+ .print_qopt = esfq_print_opt,
+ .print_xstats = esfq_print_xstats,
+};
[-- Attachment #3: esfq-kernel-2.6.10.patch --]
[-- Type: text/plain, Size: 16448 bytes --]
diff -urN linux-2.6.10.orig/include/linux/pkt_sched.h linux-2.6.10/include/linux/pkt_sched.h
--- linux-2.6.10.orig/include/linux/pkt_sched.h Fri Dec 24 21:35:23 2004
+++ linux-2.6.10/include/linux/pkt_sched.h Mon Jan 10 15:32:43 2005
@@ -129,6 +129,13 @@
/* SFQ section */
+enum
+{
+ TCA_SFQ_HASH_CLASSIC,
+ TCA_SFQ_HASH_DST,
+ TCA_SFQ_HASH_SRC,
+};
+
struct tc_sfq_qopt
{
unsigned quantum; /* Bytes per round allocated to flow */
@@ -136,6 +143,7 @@
__u32 limit; /* Maximal packets in queue */
unsigned divisor; /* Hash divisor */
unsigned flows; /* Maximal number of flows */
+ unsigned hash_kind; /* Hash function to use for flow identification */
};
/*
@@ -145,6 +153,8 @@
*
* The only reason for this is efficiency, it is possible
* to change these parameters in compile time.
+ *
+ * If you need to play with this values use esfq.
*/
/* RED section */
diff -urN linux-2.6.10.orig/net/sched/Kconfig linux-2.6.10/net/sched/Kconfig
--- linux-2.6.10.orig/net/sched/Kconfig Fri Dec 24 21:35:27 2004
+++ linux-2.6.10/net/sched/Kconfig Mon Jan 10 15:32:43 2005
@@ -154,6 +154,24 @@
To compile this code as a module, choose M here: the
module will be called sch_sfq.
+config NET_SCH_ESFQ
+ tristate "ESFQ queue"
+ depends on NET_SCHED
+ ---help---
+ Say Y here if you want to use the Enhanced Stochastic Fairness
+ Queueing (ESFQ) packet scheduling algorithm for some of your network
+ devices or as a leaf discipline for the CBQ scheduling algorithm (see
+ the top of <file:net/sched/sch_esfq.c> for details and references
+ about the SFQ algorithm).
+
+ This is an enchanced SFQ version which allows you to control the
+ hardcoded values in the SFQ scheduler: queue depth, hash table size,
+ queues limit. Also adds control to the hash function used to identify
+ packet flows. Hash by src or dst ip and original sfq hash.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_esfq.
+
config NET_SCH_TEQL
tristate "TEQL queue"
depends on NET_SCHED
diff -urN linux-2.6.10.orig/net/sched/Makefile linux-2.6.10/net/sched/Makefile
--- linux-2.6.10.orig/net/sched/Makefile Fri Dec 24 21:34:45 2004
+++ linux-2.6.10/net/sched/Makefile Mon Jan 10 15:32:43 2005
@@ -22,6 +22,7 @@
obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
+obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
diff -urN linux-2.6.10.orig/net/sched/sch_esfq.c linux-2.6.10/net/sched/sch_esfq.c
--- linux-2.6.10.orig/net/sched/sch_esfq.c Thu Jan 1 01:00:00 1970
+++ linux-2.6.10/net/sched/sch_esfq.c Mon Jan 10 22:40:09 2005
@@ -0,0 +1,585 @@
+/*
+ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes: Alexander Atanasov, <alex@ssi.bg>
+ * Added dynamic depth,limit,divisor,hash_kind options.
+ * Added dst and src hashes.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/* Stochastic Fairness Queuing algorithm.
+ For more comments look at sch_sfq.c.
+ The difference is that you can change limit, depth,
+ hash table size and choose 3 hash types.
+
+ classic: same as in sch_sfq.c
+ dst: destination IP address
+ src: source IP address
+
+ TODO:
+ make sfq_change work.
+*/
+
+
+/* This type should contain at least SFQ_DEPTH*2 values */
+typedef unsigned int esfq_index;
+
+struct esfq_head
+{
+ esfq_index next;
+ esfq_index prev;
+};
+
+struct esfq_sched_data
+{
+/* Parameters */
+ int perturb_period;
+ unsigned quantum; /* Allotment per round: MUST BE >= MTU */
+ int limit;
+ unsigned depth;
+ unsigned hash_divisor;
+ unsigned hash_kind;
+/* Variables */
+ struct timer_list perturb_timer;
+ int perturbation;
+ esfq_index tail; /* Index of current slot in round */
+ esfq_index max_depth; /* Maximal depth */
+
+ esfq_index *ht; /* Hash table */
+ esfq_index *next; /* Active slots link */
+ short *allot; /* Current allotment per slot */
+ unsigned short *hash; /* Hash value indexed by slots */
+ struct sk_buff_head *qs; /* Slot queue */
+ struct esfq_head *dep; /* Linked list of slots, indexed by depth */
+};
+
+static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h)
+{
+ int pert = q->perturbation;
+
+ if (pert)
+ h = (h<<pert) ^ (h>>(0x1F - pert));
+
+ h = ntohl(h) * 2654435761UL;
+ return h & (q->hash_divisor-1);
+}
+
+static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1)
+{
+ int pert = q->perturbation;
+
+ /* Have we any rotation primitives? If not, WHY? */
+ h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
+ h ^= h>>10;
+ return h & (q->hash_divisor-1);
+}
+
+static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
+{
+ u32 h, h2;
+ u32 hs;
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ {
+ struct iphdr *iph = skb->nh.iph;
+ h = iph->daddr;
+ hs = iph->saddr;
+ h2 = hs^iph->protocol;
+ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+ (iph->protocol == IPPROTO_TCP ||
+ iph->protocol == IPPROTO_UDP ||
+ iph->protocol == IPPROTO_ESP))
+ h2 ^= *(((u32*)iph) + iph->ihl);
+ break;
+ }
+ case __constant_htons(ETH_P_IPV6):
+ {
+ struct ipv6hdr *iph = skb->nh.ipv6h;
+ h = iph->daddr.s6_addr32[3];
+ hs = iph->saddr.s6_addr32[3];
+ h2 = hs^iph->nexthdr;
+ if (iph->nexthdr == IPPROTO_TCP ||
+ iph->nexthdr == IPPROTO_UDP ||
+ iph->nexthdr == IPPROTO_ESP)
+ h2 ^= *(u32*)&iph[1];
+ break;
+ }
+ default:
+ h = (u32)(unsigned long)skb->dst;
+ hs = (u32)(unsigned long)skb->sk;
+ h2 = hs^skb->protocol;
+ }
+ switch(q->hash_kind)
+ {
+ case TCA_SFQ_HASH_CLASSIC:
+ return esfq_fold_hash_classic(q, h, h2);
+ case TCA_SFQ_HASH_DST:
+ return esfq_hash_u32(q,h);
+ case TCA_SFQ_HASH_SRC:
+ return esfq_hash_u32(q,hs);
+ default:
+ if (net_ratelimit())
+ printk(KERN_DEBUG "esfq unknown hash method, fallback to classic\n");
+ }
+ return esfq_fold_hash_classic(q, h, h2);
+}
+
+static inline void esfq_link(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+ int d = q->qs[x].qlen + q->depth;
+
+ p = d;
+ n = q->dep[d].next;
+ q->dep[x].next = n;
+ q->dep[x].prev = p;
+ q->dep[p].next = q->dep[n].prev = x;
+}
+
+static inline void esfq_dec(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+
+ n = q->dep[x].next;
+ p = q->dep[x].prev;
+ q->dep[p].next = n;
+ q->dep[n].prev = p;
+
+ if (n == p && q->max_depth == q->qs[x].qlen + 1)
+ q->max_depth--;
+
+ esfq_link(q, x);
+}
+
+static inline void esfq_inc(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+ int d;
+
+ n = q->dep[x].next;
+ p = q->dep[x].prev;
+ q->dep[p].next = n;
+ q->dep[n].prev = p;
+ d = q->qs[x].qlen;
+ if (q->max_depth < d)
+ q->max_depth = d;
+
+ esfq_link(q, x);
+}
+
+static unsigned int esfq_drop(struct Qdisc *sch)
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ esfq_index d = q->max_depth;
+ struct sk_buff *skb;
+ unsigned int len;
+
+ /* Queue is full! Find the longest slot and
+ drop a packet from it */
+
+ if (d > 1) {
+ esfq_index x = q->dep[d+q->depth].next;
+ skb = q->qs[x].prev;
+ len = skb->len;
+ __skb_unlink(skb, &q->qs[x]);
+ kfree_skb(skb);
+ esfq_dec(q, x);
+ sch->q.qlen--;
+ sch->qstats.drops++;
+ return len;
+ }
+
+ if (d == 1) {
+ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
+ d = q->next[q->tail];
+ q->next[q->tail] = q->next[d];
+ q->allot[q->next[d]] += q->quantum;
+ skb = q->qs[d].prev;
+ len = skb->len;
+ __skb_unlink(skb, &q->qs[d]);
+ kfree_skb(skb);
+ esfq_dec(q, d);
+ sch->q.qlen--;
+ q->ht[q->hash[d]] = q->depth;
+ sch->qstats.drops++;
+ return len;
+ }
+
+ return 0;
+}
+
+static int
+esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ unsigned hash = esfq_hash(q, skb);
+ unsigned depth = q->depth;
+ esfq_index x;
+
+ x = q->ht[hash];
+ if (x == depth) {
+ q->ht[hash] = x = q->dep[depth].next;
+ q->hash[x] = hash;
+ }
+ __skb_queue_tail(&q->qs[x], skb);
+ esfq_inc(q, x);
+ if (q->qs[x].qlen == 1) { /* The flow is new */
+ if (q->tail == depth) { /* It is the first flow */
+ q->tail = x;
+ q->next[x] = x;
+ q->allot[x] = q->quantum;
+ } else {
+ q->next[x] = q->next[q->tail];
+ q->next[q->tail] = x;
+ q->tail = x;
+ }
+ }
+ if (++sch->q.qlen < q->limit-1) {
+ sch->bstats.bytes += skb->len;
+ sch->bstats.packets++;
+ return 0;
+ }
+
+ esfq_drop(sch);
+ return NET_XMIT_CN;
+}
+
+static int
+esfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ unsigned hash = esfq_hash(q, skb);
+ unsigned depth = q->depth;
+ esfq_index x;
+
+ x = q->ht[hash];
+ if (x == depth) {
+ q->ht[hash] = x = q->dep[depth].next;
+ q->hash[x] = hash;
+ }
+ __skb_queue_head(&q->qs[x], skb);
+ esfq_inc(q, x);
+ if (q->qs[x].qlen == 1) { /* The flow is new */
+ if (q->tail == depth) { /* It is the first flow */
+ q->tail = x;
+ q->next[x] = x;
+ q->allot[x] = q->quantum;
+ } else {
+ q->next[x] = q->next[q->tail];
+ q->next[q->tail] = x;
+ q->tail = x;
+ }
+ }
+ if (++sch->q.qlen < q->limit - 1) {
+ sch->qstats.requeues++;
+ return 0;
+ }
+
+ sch->qstats.drops++;
+ esfq_drop(sch);
+ return NET_XMIT_CN;
+}
+
+
+
+
+static struct sk_buff *
+esfq_dequeue(struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ struct sk_buff *skb;
+ unsigned depth = q->depth;
+ esfq_index a, old_a;
+
+ /* No active slots */
+ if (q->tail == depth)
+ return NULL;
+
+ a = old_a = q->next[q->tail];
+
+ /* Grab packet */
+ skb = __skb_dequeue(&q->qs[a]);
+ esfq_dec(q, a);
+ sch->q.qlen--;
+
+ /* Is the slot empty? */
+ if (q->qs[a].qlen == 0) {
+ q->ht[q->hash[a]] = depth;
+ a = q->next[a];
+ if (a == old_a) {
+ q->tail = depth;
+ return skb;
+ }
+ q->next[q->tail] = a;
+ q->allot[a] += q->quantum;
+ } else if ((q->allot[a] -= skb->len) <= 0) {
+ q->tail = a;
+ a = q->next[a];
+ q->allot[a] += q->quantum;
+ }
+
+ return skb;
+}
+
+static void
+esfq_reset(struct Qdisc* sch)
+{
+ struct sk_buff *skb;
+
+ while ((skb = esfq_dequeue(sch)) != NULL)
+ kfree_skb(skb);
+}
+
+static void esfq_perturbation(unsigned long arg)
+{
+ struct Qdisc *sch = (struct Qdisc*)arg;
+ struct esfq_sched_data *q = qdisc_priv(sch);
+
+ q->perturbation = net_random()&0x1F;
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ }
+}
+
+static int esfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+ int old_perturb = q->perturb_period;
+
+ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ sch_tree_lock(sch);
+ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+ q->perturb_period = ctl->perturb_period*HZ;
+// q->hash_divisor = ctl->divisor;
+// q->tail = q->limit = q->depth = ctl->flows;
+
+ if (ctl->limit)
+ q->limit = min_t(u32, ctl->limit, q->depth);
+
+ if (ctl->hash_kind) {
+ q->hash_kind = ctl->hash_kind;
+ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)
+ q->perturb_period = 0;
+ }
+
+ // is sch_tree_lock enough to do this ?
+ while (sch->q.qlen >= q->limit-1)
+ esfq_drop(sch);
+
+ if (old_perturb)
+ del_timer(&q->perturb_timer);
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ } else {
+ q->perturbation = 0;
+ }
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static int esfq_init(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ struct tc_sfq_qopt *ctl;
+ esfq_index p = ~0UL/2;
+ int i;
+
+ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ init_timer(&q->perturb_timer);
+ q->perturb_timer.data = (unsigned long)sch;
+ q->perturb_timer.function = esfq_perturbation;
+ q->perturbation = 0;
+ q->hash_kind = TCA_SFQ_HASH_CLASSIC;
+ q->max_depth = 0;
+ if (opt == NULL) {
+ q->quantum = psched_mtu(sch->dev);
+ q->perturb_period = 0;
+ q->hash_divisor = 1024;
+ q->tail = q->limit = q->depth = 128;
+
+ } else {
+ ctl = RTA_DATA(opt);
+ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+ q->perturb_period = ctl->perturb_period*HZ;
+ q->hash_divisor = ctl->divisor ? : 1024;
+ q->tail = q->limit = q->depth = ctl->flows ? : 128;
+
+ if ( q->depth > p - 1 )
+ return -EINVAL;
+
+ if (ctl->limit)
+ q->limit = min_t(u32, ctl->limit, q->depth);
+
+ if (ctl->hash_kind) {
+ q->hash_kind = ctl->hash_kind;
+ }
+
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ }
+ }
+
+ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
+ if (!q->ht)
+ goto err_case;
+
+ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
+ if (!q->dep)
+ goto err_case;
+ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
+ if (!q->next)
+ goto err_case;
+
+ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
+ if (!q->allot)
+ goto err_case;
+ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
+ if (!q->hash)
+ goto err_case;
+ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
+ if (!q->qs)
+ goto err_case;
+
+ for (i=0; i< q->hash_divisor; i++)
+ q->ht[i] = q->depth;
+ for (i=0; i<q->depth; i++) {
+ skb_queue_head_init(&q->qs[i]);
+ q->dep[i+q->depth].next = i+q->depth;
+ q->dep[i+q->depth].prev = i+q->depth;
+ }
+
+ for (i=0; i<q->depth; i++)
+ esfq_link(q, i);
+ return 0;
+err_case:
+ if (q->ht)
+ kfree(q->ht);
+ if (q->dep)
+ kfree(q->dep);
+ if (q->next)
+ kfree(q->next);
+ if (q->allot)
+ kfree(q->allot);
+ if (q->hash)
+ kfree(q->hash);
+ if (q->qs)
+ kfree(q->qs);
+ return -ENOBUFS;
+}
+
+static void esfq_destroy(struct Qdisc *sch)
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ del_timer(&q->perturb_timer);
+ if(q->ht)
+ kfree(q->ht);
+ if(q->dep)
+ kfree(q->dep);
+ if(q->next)
+ kfree(q->next);
+ if(q->allot)
+ kfree(q->allot);
+ if(q->hash)
+ kfree(q->hash);
+ if(q->qs)
+ kfree(q->qs);
+}
+
+static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct esfq_sched_data *q = qdisc_priv(sch);
+ unsigned char *b = skb->tail;
+ struct tc_sfq_qopt opt;
+
+ opt.quantum = q->quantum;
+ opt.perturb_period = q->perturb_period/HZ;
+
+ opt.limit = q->limit;
+ opt.divisor = q->hash_divisor;
+ opt.flows = q->depth;
+ opt.hash_kind = q->hash_kind;
+
+ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+ return skb->len;
+
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+static struct Qdisc_ops esfq_qdisc_ops =
+{
+ .next = NULL,
+ .cl_ops = NULL,
+ .id = "esfq",
+ .priv_size = sizeof(struct esfq_sched_data),
+ .enqueue = esfq_enqueue,
+ .dequeue = esfq_dequeue,
+ .requeue = esfq_requeue,
+ .drop = esfq_drop,
+ .init = esfq_init,
+ .reset = esfq_reset,
+ .destroy = esfq_destroy,
+ .change = NULL, /* esfq_change - needs more work */
+ .dump = esfq_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init esfq_module_init(void)
+{
+ return register_qdisc(&esfq_qdisc_ops);
+}
+static void __exit esfq_module_exit(void)
+{
+ unregister_qdisc(&esfq_qdisc_ops);
+}
+module_init(esfq_module_init)
+module_exit(esfq_module_exit)
+MODULE_LICENSE("GPL");
[-- Attachment #4: iproute2-2.6.9_041019-remove_db-1.patch --]
[-- Type: text/plain, Size: 990 bytes --]
Submitted By: Jeremy Utley <jeremy@linuxfromscratch.org>
Date: 2004-11-04
Initial Package Version: 2.6.9-041019
Upstream Status - Not submitted - LFS Specific
Origin: Based on inital work by Jim Gifford, ported to apply to the latest version
Description: Disables compilation of the arpd program, which requres Berkley DB.
diff -Naur iproute2-2.6.9/misc/Makefile iproute2-2.6.9-new/misc/Makefile
--- iproute2-2.6.9/misc/Makefile 2004-10-19 20:49:02.000000000 +0000
+++ iproute2-2.6.9-new/misc/Makefile 2004-11-04 19:14:43.704002010 +0000
@@ -1,7 +1,7 @@
SSOBJ=ss.o ssfilter.o
LNSTATOBJ=lnstat.o lnstat_util.o
-TARGETS=ss nstat ifstat rtacct arpd lnstat
+TARGETS=ss nstat ifstat rtacct lnstat
include ../Config
@@ -18,9 +18,6 @@
rtacct: rtacct.c
$(CC) $(CFLAGS) $(LDFLAGS) -o rtacct rtacct.c $(LIBNETLINK) -lm
-arpd: arpd.c
- $(CC) $(CFLAGS) -I$(DBM_INCLUDE) $(LDFLAGS) -o arpd arpd.c $(LIBNETLINK) -ldb -lpthread
-
ssfilter.c: ssfilter.y
bison ssfilter.y -o ssfilter.c
next prev parent reply other threads:[~2005-01-11 15:28 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-01-04 15:09 [LARTC] ESFQ? Justin Schoeman
2005-01-04 18:04 ` Jonathan Day
2005-01-05 7:18 ` Justin Schoeman
2005-01-05 8:20 ` Andy Furniss
2005-01-05 8:46 ` Justin Schoeman
2005-01-11 15:28 ` Andy Furniss [this message]
2005-01-11 15:33 ` Brian Carrig
2005-01-11 15:51 ` Justin Schoeman
2005-01-11 20:38 ` Thomas Graf
2005-01-11 23:06 ` Andy Furniss
2005-01-12 0:21 ` Stephen Hemminger
2005-01-12 1:08 ` Andy Furniss
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=41E3F088.6060708@dsl.pipex.com \
--to=andy.furniss@dsl.pipex.com \
--cc=lartc@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.