* [LARTC] Some work to extend SFQ to be more configurable
@ 2002-06-04 23:08 Alexander Atanasov
0 siblings, 0 replies; only message in thread
From: Alexander Atanasov @ 2002-06-04 23:08 UTC (permalink / raw)
To: lartc
[-- Attachment #1: Type: TEXT/PLAIN, Size: 1141 bytes --]
Hi there!
On Tue, 4 Jun 2002, Martin Devera wrote:
> This is often discussed and is on "TODO" for someone ;)
>
> >
> > SFQ is connection oriented. right?
> > Would be a good idea to make the queues per ip rather than per tcp flow?
> > So there would be per host fairnes.
I've done some in this direction , probably needs more work, and
it's poorly tested - expect b00ms ;)
This adds a new qdisc for now - esfq which is a 100% clone of
original sfq.
- You can set all sfq parameters: hash table size, queue depths,
queue limits.
- You can choose from 3 hash types: original(classic), dst ip, src
ip.
Things to consider: perturbation with dst and src hashes is not
good IMHO, you can try with perturb 0 if it couses trouble.
Please, see the attached files.
Plaing with it gives interesting results:
higher depth -> makes flows equal slower
small depth -> makes flows equal faster
limit kills big delays when set at about 75-85% of depth.
Needs testings and mesurements - that's why i made it
separate qdisc and not a patch over sfq, i wanted to compare both.
Any feedback good or bad is welcome.
--
have fun,
alex
[-- Attachment #2: Type: TEXT/PLAIN, Size: 1838 bytes --]
You need:
iproute2-2.2.4-now-ss001007.tar.gz
linux-2.4.18.tar.gz
This may work with next versions too.
Example Step by Step install
------------------------------------------------------------------
Install kernel:
tar zxvf linux-2.4.18.tar.gz
cd linux
cat linux-2.4.18-esfq.diff | patch -p1
make menuconfig
Now you have it in:
Networking options ---> QoS and/or fair queueing ---> ESFQ queue
Configure and install kernel
make dep clean bzImage modules_install
cp System.map /boot/
cp arch/i386/boot/bzImage /boot/bzImage
Edit lilo.conf and add your new kernel.
------------------------------------------------------------------
Install iproute:
tar zxvf iproute2-2.2.4-now-ss001007.tar.gz
cd iproute2
cat iproute2-2.2.4-now-ss001007-esfq.diff | patch -p1
make
/* Note: If you get an error in lib/ll_proto.c comment _PF(ECHO,echo) */
cp tc/tc your_favourite_iproute_path/tc
cp ip/ip your_favourite_iproute_path/ip
------------------------------------------------------------------
Usage: ... esfq [ perturb SECS ] [ quantum BYTES ] [ depth FLOWS ]
[ divisor HASHBITS ] [ limit PKTS ] [ hash HASHTYPE]
Where:
HASHTYPE := { classic | src | dst }
Examples:
tc qdisc add dev eth0 root esfq limit 128 depth 128 divisor 10 \
hash classic perturb 15
Setups a classic SFQ.
tc qdisc add dev eth0 root esfq limit 64 depth 64 divisor 11 \
hash dst
Setups a dst SFQ with limit and depth of 64 packets and
11bits (2048 rows) hash table. 1:1 with sch_sfq.
You can experiment with the values as you like to find
the best which sfq can do for you.
More can be found in:
linux/net/sched/sch_sfq.c
linux/net/sched/sch_esfq.c
Limits:
- limit must be less than depth
- divisor must be less than 15
[-- Attachment #3: Type: TEXT/PLAIN, Size: 4716 bytes --]
diff -urN iproute2.orig/tc/Makefile iproute2/tc/Makefile
--- iproute2.orig/tc/Makefile Sun Apr 16 20:42:53 2000
+++ iproute2/tc/Makefile Tue May 14 23:04:10 2002
@@ -5,6 +5,7 @@
TCMODULES :=
TCMODULES += q_fifo.o
TCMODULES += q_sfq.o
+TCMODULES += q_esfq.o
TCMODULES += q_red.o
TCMODULES += q_prio.o
TCMODULES += q_tbf.o
diff -urN iproute2.orig/tc/q_esfq.c iproute2/tc/q_esfq.c
--- iproute2.orig/tc/q_esfq.c Thu Jan 1 02:00:00 1970
+++ iproute2/tc/q_esfq.c Thu May 16 02:13:30 2002
@@ -0,0 +1,169 @@
+/*
+ * q_esfq.c ESFQ.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes: Alexander Atanasov, <alex@ssi.bg>
+ * Added depth,limit,divisor,hash_kind options.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+ fprintf(stderr, "Usage: ... esfq [ perturb SECS ] [ quantum BYTES ] [ depth FLOWS ]\n\t[ divisor HASHBITS ] [ limit PKTS ] [ hash HASHTYPE]\n");
+ fprintf(stderr,"Where: \n");
+ fprintf(stderr,"HASHTYPE := { classic | src | dst }\n");
+}
+
+#define usage() return(-1)
+
+static int esfq_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+ int ok=0;
+ struct tc_sfq_qopt opt;
+
+ memset(&opt, 0, sizeof(opt));
+
+ opt.hash_kind= TCA_SFQ_HASH_CLASSIC;
+
+ while (argc > 0) {
+ if (strcmp(*argv, "quantum") == 0) {
+ NEXT_ARG();
+ if (get_size(&opt.quantum, *argv)) {
+ fprintf(stderr, "Illegal \"quantum\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "perturb") == 0) {
+ NEXT_ARG();
+ if (get_integer(&opt.perturb_period, *argv, 0)) {
+ fprintf(stderr, "Illegal \"perturb\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "depth") == 0) {
+ NEXT_ARG();
+ if (get_integer(&opt.flows, *argv, 0)) {
+ fprintf(stderr, "Illegal \"depth\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "divisor") == 0) {
+ NEXT_ARG();
+ if (get_integer(&opt.divisor, *argv, 0)) {
+ fprintf(stderr, "Illegal \"divisor\"\n");
+ return -1;
+ }
+ if(opt.divisor >= 15) {
+ fprintf(stderr, "Illegal \"divisor\" must be < 15\n");
+ return -1;
+ }
+ opt.divisor=pow(2,opt.divisor);
+ ok++;
+ } else if (strcmp(*argv, "limit") == 0) {
+ NEXT_ARG();
+ if (get_integer(&opt.limit, *argv, 0)) {
+ fprintf(stderr, "Illegal \"limit\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "hash") == 0) {
+ NEXT_ARG();
+ if(strcmp(*argv,"classic") == 0) {
+ opt.hash_kind= TCA_SFQ_HASH_CLASSIC;
+ } else
+ if(strcmp(*argv,"dst") == 0) {
+ opt.hash_kind= TCA_SFQ_HASH_DST;
+ } else
+ if(strcmp(*argv,"src") == 0) {
+ opt.hash_kind= TCA_SFQ_HASH_SRC;
+ } else {
+ fprintf(stderr, "Illegal \"hash\"\n");
+ explain();
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+ if (ok)
+ addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
+ return 0;
+}
+
+static int esfq_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct tc_sfq_qopt *qopt;
+ SPRINT_BUF(b1);
+
+ if (opt == NULL)
+ return 0;
+
+ if (RTA_PAYLOAD(opt) < sizeof(*qopt))
+ return -1;
+ qopt = RTA_DATA(opt);
+ fprintf(f, "quantum %s ", sprint_size(qopt->quantum, b1));
+ if (show_details) {
+ fprintf(f, "limit %up flows %u/%u ",
+ qopt->limit, qopt->flows, qopt->divisor);
+ }
+ if (qopt->perturb_period)
+ fprintf(f, "perturb %dsec ", qopt->perturb_period);
+
+ fprintf(f,"hash: ");
+ switch(qopt->hash_kind)
+ {
+ case TCA_SFQ_HASH_CLASSIC:
+ fprintf(f,"classic");
+ break;
+ case TCA_SFQ_HASH_DST:
+ fprintf(f,"dst");
+ break;
+ case TCA_SFQ_HASH_SRC:
+ fprintf(f,"src");
+ break;
+ default:
+ fprintf(f,"Unknown");
+ }
+ return 0;
+}
+
+static int esfq_print_xstats(struct qdisc_util *qu, FILE *f, struct rtattr *xstats)
+{
+ return 0;
+}
+
+
+struct qdisc_util esfq_util = {
+ NULL,
+ "esfq",
+ esfq_parse_opt,
+ esfq_print_opt,
+ esfq_print_xstats,
+};
[-- Attachment #4: Type: TEXT/PLAIN, Size: 17783 bytes --]
--- linux-2.4.18/include/linux/pkt_sched.h.orig Tue May 14 23:25:13 2002
+++ linux-2.4.18/include/linux/pkt_sched.h Tue May 14 23:34:57 2002
@@ -157,6 +157,13 @@
/* SFQ section */
+enum
+{
+ TCA_SFQ_HASH_CLASSIC,
+ TCA_SFQ_HASH_DST,
+ TCA_SFQ_HASH_SRC,
+};
+
struct tc_sfq_qopt
{
unsigned quantum; /* Bytes per round allocated to flow */
@@ -164,6 +171,7 @@
__u32 limit; /* Maximal packets in queue */
unsigned divisor; /* Hash divisor */
unsigned flows; /* Maximal number of flows */
+ unsigned hash_kind; /* Hash function to use for flow identification */
};
/*
@@ -173,6 +181,8 @@
*
* The only reason for this is efficiency, it is possible
* to change these parameters in compile time.
+ *
+ * If you need to play with this values use esfq.
*/
/* RED section */
--- linux-2.4.18/net/ipv4/netfilter/ipchains_core.c.orig Fri May 24 19:27:01 2002
+++ linux-2.4.18/net/ipv4/netfilter/ipchains_core.c Fri May 24 19:31:24 2002
@@ -723,6 +723,7 @@
src_port, dst_port,
count, tcpsyn)) {
ret = FW_BLOCK;
+ cleanup(chain, 0, slot);
goto out;
}
break;
--- linux-2.4.18/net/sched/Makefile.orig Tue May 14 23:06:55 2002
+++ linux-2.4.18/net/sched/Makefile Tue May 14 23:07:08 2002
@@ -17,6 +17,7 @@
obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
+obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
--- linux-2.4.18/net/sched/Config.in.orig Tue May 14 23:07:15 2002
+++ linux-2.4.18/net/sched/Config.in Tue May 14 23:09:03 2002
@@ -11,6 +11,7 @@
tristate ' The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO
tristate ' RED queue' CONFIG_NET_SCH_RED
tristate ' SFQ queue' CONFIG_NET_SCH_SFQ
+tristate ' ESFQ queue' CONFIG_NET_SCH_ESFQ
tristate ' TEQL queue' CONFIG_NET_SCH_TEQL
tristate ' TBF queue' CONFIG_NET_SCH_TBF
tristate ' GRED queue' CONFIG_NET_SCH_GRED
--- linux-2.4.18/Documentation/Configure.help.orig Thu May 16 01:37:22 2002
+++ linux-2.4.18/Documentation/Configure.help Mon May 27 01:09:03 2002
@@ -9433,6 +9433,24 @@
whenever you want). If you want to compile it as a module, say M
here and read <file:Documentation/modules.txt>.
+ESFQ queue
+CONFIG_NET_SCH_ESFQ
+ Say Y here if you want to use the Stochastic Fairness Queueing (SFQ)
+ packet scheduling algorithm for some of your network devices or as a
+ leaf discipline for the CBQ scheduling algorithm (see the top of
+ <file:net/sched/sch_esfq.c> for details and references about the SFQ
+ algorithm).
+
+ This is an enchanced SFQ version which allows you to control the
+ hardcoded values in the SFQ scheduler: queue depth, hash table size,
+ queues limit. Also adds control to the hash function used to identify
+ packet flows. Hash by src or dst ip and original sfq hash.
+
+ This code is also available as a module called sch_esfq.o ( = code
+ which can be inserted in and removed from the running kernel
+ whenever you want). If you want to compile it as a module, say M
+ here and read <file:Documentation/modules.txt>.
+
TEQL queue
CONFIG_NET_SCH_TEQL
Say Y here if you want to use the True Link Equalizer (TLE) packet
--- /dev/null Mon Jul 18 02:46:18 1994
+++ linux-2.4.18/net/sched/sch_esfq.c Mon May 27 01:49:19 2002
@@ -0,0 +1,591 @@
+/*
+ * net/sched/sch_esfq.c Extended Stochastic Fairness Queueing discipline.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ *
+ * Changes: Alexander Atanasov, <alex@ssi.bg>
+ * Added dynamic depth,limit,divisor,hash_kind options.
+ * Added dst and src hashes.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <asm/system.h>
+#include <asm/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/init.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+
+
+/* Stochastic Fairness Queuing algorithm.
+ For more comments look at sch_sfq.c.
+ The difference is that you can change limit, depth,
+ hash table size and choose 3 hash types.
+
+ classic: same as in sch_sfq.c
+ dst: destination IP address
+ src: source IP address
+
+ TODO:
+ make sfq_change work.
+*/
+
+
+/* This type should contain at least SFQ_DEPTH*2 values */
+typedef unsigned int esfq_index;
+
+struct esfq_head
+{
+ esfq_index next;
+ esfq_index prev;
+};
+
+struct esfq_sched_data
+{
+/* Parameters */
+ int perturb_period;
+ unsigned quantum; /* Allotment per round: MUST BE >= MTU */
+ int limit;
+ unsigned depth;
+ unsigned hash_divisor;
+ unsigned hash_kind;
+/* Variables */
+ struct timer_list perturb_timer;
+ int perturbation;
+ esfq_index tail; /* Index of current slot in round */
+ esfq_index max_depth; /* Maximal depth */
+
+ esfq_index *ht; /* Hash table */
+ esfq_index *next; /* Active slots link */
+ short *allot; /* Current allotment per slot */
+ unsigned short *hash; /* Hash value indexed by slots */
+ struct sk_buff_head *qs; /* Slot queue */
+ struct esfq_head *dep; /* Linked list of slots, indexed by depth */
+};
+
+static __inline__ unsigned esfq_hash_u32(struct esfq_sched_data *q,u32 h)
+{
+ int pert = q->perturbation;
+
+ if (pert)
+ h = (h<<pert) ^ (h>>(0x1F - pert));
+ h = ntohl(h) * 2654435761UL;
+ return h & (q->hash_divisor-1);
+}
+
+static __inline__ unsigned esfq_fold_hash_classic(struct esfq_sched_data *q, u32 h, u32 h1)
+{
+ int pert = q->perturbation;
+
+ /* Have we any rotation primitives? If not, WHY? */
+ h ^= (h1<<pert) ^ (h1>>(0x1F - pert));
+ h ^= h>>10;
+ return h & (q->hash_divisor-1);
+}
+
+#ifndef IPPROTO_ESP
+#define IPPROTO_ESP 50
+#endif
+
+static unsigned esfq_hash(struct esfq_sched_data *q, struct sk_buff *skb)
+{
+ u32 h, h2;
+ u32 hs;
+
+ switch (skb->protocol) {
+ case __constant_htons(ETH_P_IP):
+ {
+ struct iphdr *iph = skb->nh.iph;
+ h = iph->daddr;
+ hs = iph->saddr;
+ h2 = hs^iph->protocol;
+ if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
+ (iph->protocol == IPPROTO_TCP ||
+ iph->protocol == IPPROTO_UDP ||
+ iph->protocol == IPPROTO_ESP))
+ h2 ^= *(((u32*)iph) + iph->ihl);
+ break;
+ }
+ case __constant_htons(ETH_P_IPV6):
+ {
+ struct ipv6hdr *iph = skb->nh.ipv6h;
+ h = iph->daddr.s6_addr32[3];
+ hs = iph->saddr.s6_addr32[3];
+ h2 = hs^iph->nexthdr;
+ if (iph->nexthdr == IPPROTO_TCP ||
+ iph->nexthdr == IPPROTO_UDP ||
+ iph->nexthdr == IPPROTO_ESP)
+ h2 ^= *(u32*)&iph[1];
+ break;
+ }
+ default:
+ h = (u32)(unsigned long)skb->dst;
+ hs = (u32)(unsigned long)skb->sk;
+ h2 = hs^skb->protocol;
+ }
+ switch(q->hash_kind)
+ {
+ case TCA_SFQ_HASH_CLASSIC:
+ return esfq_fold_hash_classic(q, h, h2);
+ case TCA_SFQ_HASH_DST:
+ return esfq_hash_u32(q,h);
+ case TCA_SFQ_HASH_SRC:
+ return esfq_hash_u32(q,hs);
+ default:
+ if (net_ratelimit())
+ printk(KERN_DEBUG "esfq unknown hash method, fallback to classic\n");
+ }
+ return esfq_fold_hash_classic(q, h, h2);
+}
+
+extern __inline__ void esfq_link(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+ int d = q->qs[x].qlen + q->depth;
+
+ p = d;
+ n = q->dep[d].next;
+ q->dep[x].next = n;
+ q->dep[x].prev = p;
+ q->dep[p].next = q->dep[n].prev = x;
+}
+
+extern __inline__ void esfq_dec(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+
+ n = q->dep[x].next;
+ p = q->dep[x].prev;
+ q->dep[p].next = n;
+ q->dep[n].prev = p;
+
+ if (n == p && q->max_depth == q->qs[x].qlen + 1)
+ q->max_depth--;
+
+ esfq_link(q, x);
+}
+
+extern __inline__ void esfq_inc(struct esfq_sched_data *q, esfq_index x)
+{
+ esfq_index p, n;
+ int d;
+
+ n = q->dep[x].next;
+ p = q->dep[x].prev;
+ q->dep[p].next = n;
+ q->dep[n].prev = p;
+ d = q->qs[x].qlen;
+ if (q->max_depth < d)
+ q->max_depth = d;
+
+ esfq_link(q, x);
+}
+
+static int esfq_drop(struct Qdisc *sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ esfq_index d = q->max_depth;
+ struct sk_buff *skb;
+
+ /* Queue is full! Find the longest slot and
+ drop a packet from it */
+
+ if (d > 1) {
+ esfq_index x = q->dep[d+q->depth].next;
+ skb = q->qs[x].prev;
+ __skb_unlink(skb, &q->qs[x]);
+ kfree_skb(skb);
+ esfq_dec(q, x);
+ sch->q.qlen--;
+ sch->stats.drops++;
+ return 1;
+ }
+
+ if (d == 1) {
+ /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */
+ d = q->next[q->tail];
+ q->next[q->tail] = q->next[d];
+ q->allot[q->next[d]] += q->quantum;
+ skb = q->qs[d].prev;
+ __skb_unlink(skb, &q->qs[d]);
+ kfree_skb(skb);
+ esfq_dec(q, d);
+ sch->q.qlen--;
+ q->ht[q->hash[d]] = q->depth;
+ sch->stats.drops++;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+esfq_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ unsigned hash = esfq_hash(q, skb);
+ unsigned depth = q->depth;
+ esfq_index x;
+
+ x = q->ht[hash];
+ if (x == depth) {
+ q->ht[hash] = x = q->dep[depth].next;
+ q->hash[x] = hash;
+ }
+ __skb_queue_tail(&q->qs[x], skb);
+ esfq_inc(q, x);
+ if (q->qs[x].qlen == 1) { /* The flow is new */
+ if (q->tail == depth) { /* It is the first flow */
+ q->tail = x;
+ q->next[x] = x;
+ q->allot[x] = q->quantum;
+ } else {
+ q->next[x] = q->next[q->tail];
+ q->next[q->tail] = x;
+ q->tail = x;
+ }
+ }
+ if (++sch->q.qlen < q->limit-1) {
+ sch->stats.bytes += skb->len;
+ sch->stats.packets++;
+ return 0;
+ }
+
+ esfq_drop(sch);
+ return NET_XMIT_CN;
+}
+
+static int
+esfq_requeue(struct sk_buff *skb, struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ unsigned hash = esfq_hash(q, skb);
+ unsigned depth = q->depth;
+ esfq_index x;
+
+ x = q->ht[hash];
+ if (x == depth) {
+ q->ht[hash] = x = q->dep[depth].next;
+ q->hash[x] = hash;
+ }
+ __skb_queue_head(&q->qs[x], skb);
+ esfq_inc(q, x);
+ if (q->qs[x].qlen == 1) { /* The flow is new */
+ if (q->tail == depth) { /* It is the first flow */
+ q->tail = x;
+ q->next[x] = x;
+ q->allot[x] = q->quantum;
+ } else {
+ q->next[x] = q->next[q->tail];
+ q->next[q->tail] = x;
+ q->tail = x;
+ }
+ }
+ if (++sch->q.qlen < q->limit - 1)
+ return 0;
+
+ sch->stats.drops++;
+ esfq_drop(sch);
+ return NET_XMIT_CN;
+}
+
+
+
+
+static struct sk_buff *
+esfq_dequeue(struct Qdisc* sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ struct sk_buff *skb;
+ unsigned depth = q->depth;
+ esfq_index a, old_a;
+
+ /* No active slots */
+ if (q->tail == depth)
+ return NULL;
+
+ a = old_a = q->next[q->tail];
+
+ /* Grab packet */
+ skb = __skb_dequeue(&q->qs[a]);
+ esfq_dec(q, a);
+ sch->q.qlen--;
+
+ /* Is the slot empty? */
+ if (q->qs[a].qlen == 0) {
+ a = q->next[a];
+ if (a == old_a) {
+ q->tail = depth;
+ return skb;
+ }
+ q->next[q->tail] = a;
+ q->allot[a] += q->quantum;
+ } else if ((q->allot[a] -= skb->len) <= 0) {
+ q->tail = a;
+ a = q->next[a];
+ q->allot[a] += q->quantum;
+ }
+
+ return skb;
+}
+
+static void
+esfq_reset(struct Qdisc* sch)
+{
+ struct sk_buff *skb;
+
+ while ((skb = esfq_dequeue(sch)) != NULL)
+ kfree_skb(skb);
+}
+
+static void esfq_perturbation(unsigned long arg)
+{
+ struct Qdisc *sch = (struct Qdisc*)arg;
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+
+ q->perturbation = net_random()&0x1F;
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ }
+}
+
+static int esfq_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ struct tc_sfq_qopt *ctl = RTA_DATA(opt);
+ int old_perturb = q->perturb_period;
+
+ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ sch_tree_lock(sch);
+ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+ q->perturb_period = ctl->perturb_period*HZ;
+// q->hash_divisor = ctl->divisor;
+// q->tail = q->limit = q->depth = ctl->flows;
+
+ if (ctl->limit)
+ q->limit = min_t(u32, ctl->limit, q->depth);
+
+ if (ctl->hash_kind) {
+ q->hash_kind = ctl->hash_kind;
+ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)
+ q->perturb_period = 0;
+ }
+
+ // is sch_tree_lock enough to do this ?
+ while (sch->q.qlen >= q->limit-1)
+ esfq_drop(sch);
+
+ if (old_perturb)
+ del_timer(&q->perturb_timer);
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ } else {
+ q->perturbation = 0;
+ }
+ sch_tree_unlock(sch);
+ return 0;
+}
+
+static int esfq_init(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ struct tc_sfq_qopt *ctl;
+ esfq_index p = ~0UL/2;
+ int i;
+
+ if (opt && opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+ q->perturb_timer.data = (unsigned long)sch;
+ q->perturb_timer.function = esfq_perturbation;
+ init_timer(&q->perturb_timer);
+ q->perturbation = 0;
+ q->hash_kind = TCA_SFQ_HASH_CLASSIC;
+ q->max_depth = 0;
+ if (opt == NULL) {
+ q->quantum = psched_mtu(sch->dev);
+ q->perturb_period = 0;
+ q->hash_divisor = 1024;
+ q->tail = q->limit = q->depth = 128;
+
+ } else {
+ ctl = RTA_DATA(opt);
+ q->quantum = ctl->quantum ? : psched_mtu(sch->dev);
+ q->perturb_period = ctl->perturb_period*HZ;
+ q->hash_divisor = ctl->divisor;
+ q->tail = q->limit = q->depth = ctl->flows;
+
+ if ( q->depth > p - 1 )
+ return -EINVAL;
+
+ if (ctl->limit)
+ q->limit = min_t(u32, ctl->limit, q->depth);
+
+ if (ctl->hash_kind) {
+ q->hash_kind = ctl->hash_kind;
+ if (q->hash_kind != TCA_SFQ_HASH_CLASSIC)
+ q->perturb_period = 0;
+ }
+
+ while (sch->q.qlen >= q->limit-1)
+ esfq_drop(sch);
+
+ if (q->perturb_period) {
+ q->perturb_timer.expires = jiffies + q->perturb_period;
+ add_timer(&q->perturb_timer);
+ }
+ }
+
+ q->ht = kmalloc(q->hash_divisor*sizeof(esfq_index), GFP_KERNEL);
+ if (!q->ht)
+ goto err_case;
+
+ q->dep = kmalloc((1+q->depth*2)*sizeof(struct esfq_head), GFP_KERNEL);
+ if (!q->dep)
+ goto err_case;
+ q->next = kmalloc(q->depth*sizeof(esfq_index), GFP_KERNEL);
+ if (!q->next)
+ goto err_case;
+
+ q->allot = kmalloc(q->depth*sizeof(short), GFP_KERNEL);
+ if (!q->allot)
+ goto err_case;
+ q->hash = kmalloc(q->depth*sizeof(unsigned short), GFP_KERNEL);
+ if (!q->hash)
+ goto err_case;
+ q->qs = kmalloc(q->depth*sizeof(struct sk_buff_head), GFP_KERNEL);
+ if (!q->qs)
+ goto err_case;
+
+ for (i=0; i< q->hash_divisor; i++)
+ q->ht[i] = q->depth;
+ for (i=0; i<q->depth; i++) {
+ skb_queue_head_init(&q->qs[i]);
+ q->dep[i+q->depth].next = i+q->depth;
+ q->dep[i+q->depth].prev = i+q->depth;
+ }
+
+ for (i=0; i<q->depth; i++)
+ esfq_link(q, i);
+ MOD_INC_USE_COUNT;
+ return 0;
+err_case:
+ if (q->ht)
+ kfree(q->ht);
+ if (q->dep)
+ kfree(q->dep);
+ if (q->next)
+ kfree(q->next);
+ if (q->allot)
+ kfree(q->allot);
+ if (q->hash)
+ kfree(q->hash);
+ if (q->qs)
+ kfree(q->qs);
+ return -ENOBUFS;
+}
+
+static void esfq_destroy(struct Qdisc *sch)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ del_timer(&q->perturb_timer);
+ if(q->ht)
+ kfree(q->ht);
+ if(q->dep)
+ kfree(q->dep);
+ if(q->next)
+ kfree(q->next);
+ if(q->allot)
+ kfree(q->allot);
+ if(q->hash)
+ kfree(q->hash);
+ if(q->qs)
+ kfree(q->qs);
+ MOD_DEC_USE_COUNT;
+}
+
+static int esfq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct esfq_sched_data *q = (struct esfq_sched_data *)sch->data;
+ unsigned char *b = skb->tail;
+ struct tc_sfq_qopt opt;
+
+ opt.quantum = q->quantum;
+ opt.perturb_period = q->perturb_period/HZ;
+
+ opt.limit = q->limit;
+ opt.divisor = q->hash_divisor;
+ opt.flows = q->depth;
+ opt.hash_kind = q->hash_kind;
+
+ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+ return skb->len;
+
+rtattr_failure:
+ skb_trim(skb, b - skb->data);
+ return -1;
+}
+
+struct Qdisc_ops esfq_qdisc_ops =
+{
+ NULL,
+ NULL,
+ "esfq",
+ sizeof(struct esfq_sched_data),
+
+ esfq_enqueue,
+ esfq_dequeue,
+ esfq_requeue,
+ esfq_drop,
+
+ esfq_init,
+ esfq_reset,
+ esfq_destroy,
+ NULL, /* esfq_change - needs more work */
+
+ esfq_dump,
+};
+
+#ifdef MODULE
+int init_module(void)
+{
+ return register_qdisc(&esfq_qdisc_ops);
+}
+
+void cleanup_module(void)
+{
+ unregister_qdisc(&esfq_qdisc_ops);
+}
+#endif
+MODULE_LICENSE("GPL");
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2002-06-04 23:08 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2002-06-04 23:08 [LARTC] Some work to extend SFQ to be more configurable Alexander Atanasov
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.