* [LARTC] new perflow rate control queue
@ 2005-04-04 7:21 Wang Jian
2005-04-04 8:51 ` Patrick McHardy
` (11 more replies)
0 siblings, 12 replies; 13+ messages in thread
From: Wang Jian @ 2005-04-04 7:21 UTC (permalink / raw)
To: lartc
[-- Attachment #1: Type: text/plain, Size: 1355 bytes --]
Hi,
One of my customer needs per flow rate control, so I write one.
The code I post here is not finished, but it seems to work as expected.
The kernel patch is agains kernel 2.6.11, the iproute2 patch is against
iproute2-2.6.11-050314.
I write the code in a hurry to meet deadline. There are many other things
to do ahead for me. The code is written in 2 days (including read other
queue's code) and tested for a while to find obvious mistake. Don't be
suprised when you find many many bugs.
The test scenario is like this
www server <- [ eth0 eth1 ] -> www clients
The attached t.sh is used to generate test rules. Clients download a
big ISO file from www server, so flows' rate can be estimated by view
progress. However I use wget to test the speed, so the speed is
accumulated, not current.
The problems I know:
1. The rtnetlink related code is quick hack. I am not familiar with
rtnetlink, so I look at other queue's code and use the simplest one.
2. perflow queue has no stats code. It will be added later.
3. I don't know what is the dump() method 's purpose, so I didn't write
dump() method. I will add it later when I know what it is for and how to
write rtnetlink code.
Any feedback is welcome. And test it if you can :)
PS: the code is licensed under GPL. If it is acceptable by upstream, it
will be submitted.
--
lark
[-- Attachment #2: iproute2-2.6.11-050314-perflow.diff --]
[-- Type: application/octet-stream, Size: 4842 bytes --]
Index: iproute2-2.6.11-w/include/linux/pkt_sched.h
===================================================================
--- iproute2-2.6.11-w/include/linux/pkt_sched.h (revision 1)
+++ iproute2-2.6.11-w/include/linux/pkt_sched.h (working copy)
@@ -272,6 +272,28 @@
__u32 ctokens;
};
+
+/* PERFLOW section */
+
+#define TC_PERFLOW_DEFAULTLIMIT 1024
+
+struct tc_perflow_qopt
+{
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 limit;
+ __u32 qlen;
+};
+enum
+{
+ TCA_PERFLOW_UNSPEC,
+ TCA_PERFLOW_PARMS,
+ __TCA_PERFLOW_MAX,
+};
+
+#define TCA_PERFLOW_MAX (__TCA_PERFLOW_MAX - 1)
+
+
/* HFSC section */
struct tc_hfsc_qopt
Index: iproute2-2.6.11-w/tc/q_perflow.c
===================================================================
--- iproute2-2.6.11-w/tc/q_perflow.c (revision 0)
+++ iproute2-2.6.11-w/tc/q_perflow.c (revision 0)
@@ -0,0 +1,153 @@
+/*
+ * q_perflow.c PERFLOW.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Wang Jian <lark@linux.net.cn>, 2005
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+ fprintf(stderr,
+"Usage: ... perflow rate RATE [ ceil CEIL ] [ limit LIMIT ] [ qlen QLEN ]\n"
+"\n"
+"rate rate allocated to each flow (flow can still borrow)\n"
+"ceil upper limit for each flow (flow can not borrow)\n"
+"limit maximum concurrent flows\n"
+"qlen maximum queue length\n"
+ );
+}
+
+#define usage() return(-1)
+
+static int perflow_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+ struct tc_perflow_qopt opt;
+ struct rtattr *tail;
+ int ok = 0;
+
+ memset(&opt, 0, sizeof(opt));
+
+ while (argc > 0) {
+ if (strcmp(*argv, "rate") == 0) {
+ NEXT_ARG();
+ if (opt.rate.rate) {
+ fprintf(stderr, "Double \"ceil\" spec\n");
+ return -1;
+ }
+ if (get_rate(&opt.rate.rate, *argv)) {
+ fprintf(stderr, "Illegal \"rate\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "ceil") == 0) {
+ NEXT_ARG();
+ if (opt.ceil.rate) {
+ fprintf(stderr, "Double \"ceil\" spec\n");
+ return -1;
+ }
+ if (get_rate(&opt.ceil.rate, *argv)) {
+ fprintf(stderr, "Illegal \"ceil\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "limit") == 0) {
+ NEXT_ARG();
+ if (get_size(&opt.limit, *argv)) {
+ fprintf(stderr, "Illegal \"limit\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "qlen") == 0) {
+ NEXT_ARG();
+ if (get_size(&opt.qlen, *argv)) {
+ fprintf(stderr, "Illegal \"limit\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+ if (!ok)
+ return 0;
+
+ if (opt.rate.rate == 0) {
+ fprintf(stderr, "\"rate\" is required.\n");
+ return -1;
+ }
+
+ if (opt.ceil.rate == 0)
+ opt.ceil = opt.rate;
+
+ if (opt.ceil.rate < opt.rate.rate) {
+ fprintf(stderr, "\"ceil\" must be >= \"rate\".\n");
+ return -1;
+ }
+
+ if (opt.limit == 0)
+ opt.limit = TC_PERFLOW_DEFAULTLIMIT;
+
+ if (opt.qlen > 0 && opt.qlen < 5) {
+ fprintf(stderr, "\"qlen\" must be >= 5.\n");
+ return -1;
+ }
+
+ tail = NLMSG_TAIL(n);
+ //addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+ //addattr_l(n, 1024, TCA_PERFLOW_PARMS, &opt, sizeof(opt));
+ addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
+ tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
+
+ return 0;
+}
+
+static int perflow_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct tc_perflow_qopt *qopt;
+
+ if (opt == NULL)
+ return 0;
+
+ if (RTA_PAYLOAD(opt) < sizeof(*qopt))
+ return -1;
+
+ qopt = RTA_DATA(opt);
+
+ SPRINT_BUF(b1);
+ fprintf(f, "rate %s ", sprint_rate(qopt->rate.rate, b1));
+ fprintf(f, "ceil %s ", sprint_rate(qopt->ceil.rate, b1));
+ fprintf(f, "limit %s ", sprint_size(qopt->rate.rate, b1));
+
+ return 0;
+}
+
+struct qdisc_util perflow_qdisc_util = {
+ .id = "perflow",
+ .parse_qopt = perflow_parse_opt,
+ .print_qopt = perflow_print_opt,
+};
Index: iproute2-2.6.11-w/tc/Makefile
===================================================================
--- iproute2-2.6.11-w/tc/Makefile (revision 1)
+++ iproute2-2.6.11-w/tc/Makefile (working copy)
@@ -7,6 +7,7 @@
TCMODULES += q_fifo.o
TCMODULES += q_sfq.o
TCMODULES += q_red.o
+TCMODULES += q_perflow.o
TCMODULES += q_prio.o
TCMODULES += q_tbf.o
TCMODULES += q_cbq.o
[-- Attachment #3: linux-2.6.11-perflow.diff --]
[-- Type: application/octet-stream, Size: 15639 bytes --]
Index: linux-2.6.11-w/include/linux/pkt_sched.h
===================================================================
--- linux-2.6.11-w/include/linux/pkt_sched.h (revision 1)
+++ linux-2.6.11-w/include/linux/pkt_sched.h (working copy)
@@ -272,6 +272,28 @@
__u32 ctokens;
};
+
+/* PERFLOW section */
+
+#define TC_PERFLOW_DEFAULTLIMIT 1024
+
+struct tc_perflow_qopt
+{
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 limit;
+ __u32 qlen;
+};
+enum
+{
+ TCA_PERFLOW_UNSPEC,
+ TCA_PERFLOW_PARMS,
+ __TCA_PERFLOW_MAX,
+};
+
+#define TCA_PERFLOW_MAX (__TCA_PERFLOW_MAX - 1)
+
+
/* HFSC section */
struct tc_hfsc_qopt
Index: linux-2.6.11-w/net/sched/Kconfig
===================================================================
--- linux-2.6.11-w/net/sched/Kconfig (revision 1)
+++ linux-2.6.11-w/net/sched/Kconfig (working copy)
@@ -192,6 +192,15 @@
To compile this code as a module, choose M here: the
module will be called sch_gred.
+config NET_SCH_PERFLOW
+ tristate "PERFLOW queue"
+ depends on NET_SCHED
+ ---help---
+ Say Y here if you want to use per flow rate control.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_perflow.
+
config NET_SCH_DSMARK
tristate "Diffserv field marker"
depends on NET_SCHED
Index: linux-2.6.11-w/net/sched/Makefile
===================================================================
--- linux-2.6.11-w/net/sched/Makefile (revision 1)
+++ linux-2.6.11-w/net/sched/Makefile (working copy)
@@ -19,6 +19,7 @@
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
+obj-$(CONFIG_NET_SCH_PERFLOW) += sch_perflow.o
obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
Index: linux-2.6.11-w/net/sched/sch_perflow.c
===================================================================
--- linux-2.6.11-w/net/sched/sch_perflow.c (revision 0)
+++ linux-2.6.11-w/net/sched/sch_perflow.c (revision 0)
@@ -0,0 +1,551 @@
+/*
+ * net/sched/sch_perflow.c Per flow rate control queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Wang Jian <lark@linux.net.cn>, 2005
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/jhash.h>
+#include <linux/timer.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/dsfield.h>
+
+/* Per flow rate control algorithm
+
+Description
+-----------
+
+ When a new packet arrives, we lookup in the table to see if it
+ belongs to a flow being traced. If not, we create new entry for it.
+
+ For a packet belongs to a flow being traced, we see if it has
+ available token to send it out.
+
+ The 'rate' and 'ceil' have same meaning of HTB qdisc. The 'limit'
+ parameter defined how many flows we will trace, which defaults to
+ 1024.
+
+ Any flow entry without traffic in LIFETIME seconds will be wiped
+ to free slot.
+
+
+Algorithm
+---------
+
+ The algorithm is simple and naive. We have a token bucket, which
+ generate grate/10 (= rate * limit / 10) token in 1/10 seconds.
+
+ A flow can always send when it is under rate. When a flow is over
+ rate but under ceil, every time it borrows, it is put into borrow
+ list, so it may receive penalty.
+
+ When a under-rate flow sends but token is short, penalty is added
+ to one of flows in borrow list, and this punished flow is
+ removed from borrow list.
+
+ When a flow carrying penalty has packet, the penalty is
+ added to used token. This means, this flow can borrow less than
+ last time (if in a new time slot), or can borrow less
+ (ceil - rate - penalty).
+
+ There are other rules that handle fairness and low rate situation.
+ See code for details.
+
+ NOTE: the implementation of algorithm is not timer driven, but
+ packet driven.
+
+ The ideas behind this algorithm are
+
+ 1. We assume that perflow qdisc has rate * limit guaranteed.
+ 2. We can't affect past. We can only affect future.
+ 3. If a flow's borrowing leads to overlimit, we let this flow
+ borrow less in future.
+ 4. With a round robin punishment style, a flow borrows more times,
+ it stays in borrow list more times, and so receives more penalty.
+ (But we should consider more about this, I think the fairness
+ should be improved by sort borrow list.)
+
+
+Security Consideration
+----------------------
+
+ It's dangerous to create new entry for any new packets not belongs
+ to a flow being traced. For example, syn flood attack on a single
+ port may cause thousands of entries created. The 'limit' parameter
+ is used to set limit on maximum entries we will create.
+
+ But even with 'limit', port scan can fill the slots and make valid
+ new flow has no slot available and not be traced.
+
+ One of solution is to use netfilter MARK for classification
+ (a.k.a. cls_fw.c). Only established session is given a fw mark, and
+ then, if port scan use a spoofing source address, no fw mark gotten.
+
+
+Application
+-----------
+
+ You should use HTB or other classful qdisc to enclose this qdisc.
+
+ */
+
+
+#define PERFLOW_HSIZE 251
+#define PERFLOW_LIFETIME (10*HZ)
+
+struct perflow_entry
+{
+ u32 src;
+ u32 dst;
+ u16 sport;
+ u16 dport;
+
+ struct list_head hlist;
+ struct list_head borrow;
+ struct timer_list timer;
+ struct perflow_sched_data *q;
+ u32 jiffies;
+ u32 used;
+ u32 penalty;
+
+ u8 protocol;
+};
+
+struct perflow_sched_data
+{
+ /* parameters */
+ u32 rate; /* guaranted rate */
+ u32 ceil; /* upper bound */
+ u32 limit; /* maximum flows we trace */
+ u32 qlen; /* maximum queue length */
+
+ /* variables */
+ u32 grate; /* aggregative rate */
+
+ u32 jiffies;
+ u32 token;
+ u32 flow_count; /* how many flows we are tracing */
+ struct list_head ht[PERFLOW_HSIZE]; /* hash table */
+ struct list_head borrow; /* lists of borrowing flow */
+};
+
+struct commonhdr
+{
+ u16 source;
+ u16 dest;
+};
+
+static __inline__ u32 perflow_hash(struct perflow_entry *tuple)
+{
+ return (jhash_3words(tuple->src ^ tuple->protocol,
+ tuple->dst,
+ (tuple->sport << 16 | tuple->dport),
+ 0x5e83ad03) % PERFLOW_HSIZE);
+}
+
+static __inline__ int perflow_is_valid(struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return 0;
+
+ if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_TCP
+ && iph->protocol != IPPROTO_SCTP)
+ return 0;
+
+ return 1;
+}
+
+static __inline__ void perflow_flow_timer(unsigned long arg)
+{
+ struct perflow_entry *e = (struct perflow_entry *) arg;
+
+ e->q->flow_count--;
+ list_del_init(&e->hlist);
+ if (!list_empty(&e->borrow))
+ list_del_init(&e->borrow);
+ printk(KERN_WARNING "delete flow\n");
+ kfree(e);
+}
+
+static __inline__ struct perflow_entry *
+perflow_new_flow(u32 hash, struct perflow_entry *tuple, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct perflow_entry *e;
+
+ if (q->flow_count >= q->limit)
+ return NULL;
+
+ e = kmalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return NULL;
+
+ q->flow_count++;
+
+ memset(e, 0, sizeof(*e));
+
+ e->src = tuple->src;
+ e->dst = tuple->dst;
+ e->sport = tuple->sport;
+ e->dport = tuple->dport;
+ e->protocol = tuple->protocol;
+
+ INIT_LIST_HEAD(&e->hlist);
+ INIT_LIST_HEAD(&e->borrow);
+ init_timer(&e->timer);
+ e->timer.function = perflow_flow_timer;
+ e->timer.data = (unsigned long) e;
+ e->jiffies = jiffies;
+ e->q = q;
+
+ list_add(&e->hlist, q->ht + hash);
+
+ return e;
+}
+
+static __inline__ void
+perflow_fill_tuple(struct perflow_entry *tuple, struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+ struct commonhdr *ch;
+
+ memset(tuple, 0, sizeof(struct perflow_entry));
+#if 1
+ /* not a traceable flow, do nothing */
+ if (!perflow_is_valid(skb))
+ return;
+#endif
+
+ ch = (struct commonhdr *)((void *)iph + (iph->ihl << 2));
+ tuple->src = iph->saddr;
+ tuple->dst = iph->daddr;
+ tuple->sport = ch->source;
+ tuple->dport = ch->dest;
+ tuple->protocol = iph->protocol;
+}
+
+static struct perflow_entry *
+perflow_find_flow(u32 *hash, struct perflow_entry *flow, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct list_head *h, *head;
+ struct perflow_entry *e;
+
+ *hash = perflow_hash(flow);
+ head = q->ht + (*hash);
+
+ list_for_each(h, head) {
+ e = list_entry(h, struct perflow_entry, hlist);
+ if (e->src == flow->src
+ && e->dst == flow->dst
+ && e->sport == flow->sport
+ && e->dport == flow->dport
+ && e->protocol == flow->protocol)
+ del_timer(&e->timer);
+ return e;
+ }
+ return NULL;
+}
+
+static void perflow_punish(int penalty, struct list_head *borrow)
+{
+ struct perflow_entry *e;
+ struct list_head *h;
+
+ if (!list_empty(borrow)) {
+ h = borrow->next;
+ list_del_init(h);
+ e = list_entry(h, struct perflow_entry, hlist);
+ e->penalty += penalty;
+ }
+}
+
+static __inline__ void
+perflow_adjust_used(struct perflow_entry *flow, u32 rate, u32 ceil)
+{
+ int cycles;
+
+ /* still in this time slot */
+ if ((jiffies - flow->jiffies) <= HZ) {
+ if ((ceil - flow->used) > flow->penalty) {
+ flow->used += flow->penalty;
+ flow->penalty = 0;
+ } else {
+ flow->used = ceil;
+ flow->penalty -= ceil - flow->used;
+ }
+ return;
+ }
+
+ /* the packet arrives after cycles slots */
+ cycles = (jiffies - flow->jiffies) / HZ;
+ flow->jiffies += cycles * HZ;
+
+ if ((cycles * ceil - flow->used) > flow->penalty) {
+ flow->used = 0;
+ flow->penalty = 0;
+ } else {
+ flow->used = flow->penalty + flow->used - cycles * ceil;
+ if (flow->used > ceil) {
+ flow->used = ceil;
+ flow->penalty -= ceil;
+ }
+ }
+}
+
+static int perflow_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct perflow_entry tuple, *flow;
+ u32 hash;
+
+ /* not a traceable flow */
+ if (!perflow_is_valid(skb))
+ goto drop;
+
+ /* if max qlen is set and current queue is full, drop */
+ if (q->qlen && sch->q.qlen >= q->qlen)
+ goto drop;
+
+ perflow_fill_tuple(&tuple, skb);
+ flow = perflow_find_flow(&hash, &tuple, sch);
+
+ if (flow == NULL) {
+ flow = perflow_new_flow(hash, &tuple, sch);
+ if (flow == NULL)
+ goto drop;
+ }
+
+ /* renew timer for flow */
+ flow->timer.expires = jiffies + PERFLOW_LIFETIME;
+ add_timer(&flow->timer);
+
+ /* renew used for this flow */
+ perflow_adjust_used(flow, q->rate, q->ceil);
+
+ if ((jiffies - q->jiffies) > HZ/10) {
+ q->token = q->grate / 10;
+ q->jiffies = jiffies;
+ }
+
+ /* we always satisfy flow under rate */
+ if (flow->used < q->rate) {
+ flow->used += skb->len;
+ if (flow->used > q->rate) {
+ /* if we borrow, we may receive penalty */
+ if (list_empty(&flow->borrow))
+ list_add_tail(&flow->borrow, &q->borrow);
+ }
+ if (flow->used > q->ceil)
+ flow->penalty += flow->used - q->ceil;
+ if (q->token < skb->len) {
+ q->token = 0;
+ perflow_punish(skb->len - q->token, &q->borrow);
+ } else {
+ q->token -= skb->len;
+ }
+ } else {
+ if (q->token < skb->len || flow->used >= q->ceil)
+ goto drop;
+
+ flow->used += skb->len;
+ q->token -= skb->len;
+
+ if (list_empty(&flow->borrow))
+ list_add_tail(&flow->borrow, &q->borrow);
+ if (flow->used > q->ceil)
+ flow->penalty += flow->used - q->ceil;
+ }
+
+enqueue:
+ sch->qstats.backlog += skb->len;
+ sch->bstats.bytes += skb->len;
+ sch->bstats.packets++;
+ __skb_queue_tail(&sch->q, skb);
+ return NET_XMIT_SUCCESS;
+
+drop:
+ printk(KERN_WARNING "perflow drop\n");
+ sch->qstats.overlimits++;
+ kfree_skb(skb);
+ sch->qstats.drops++;
+ return NET_XMIT_CN;
+}
+
+static int perflow_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ __skb_queue_head(&sch->q, skb);
+ sch->qstats.backlog += skb->len;
+ sch->qstats.requeues++;
+ return 0;
+}
+
+static struct sk_buff *perflow_dequeue(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue(&sch->q);
+ if (skb) {
+ sch->qstats.backlog -= skb->len;
+ }
+ return skb;
+}
+
+static unsigned int perflow_drop(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue_tail(&sch->q);
+ if (skb) {
+ unsigned int len = skb->len;
+ sch->qstats.backlog -= len;
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return len;
+ }
+ return 0;
+}
+
+static void perflow_reset(struct Qdisc *sch)
+{
+ skb_queue_purge(&sch->q);
+ sch->qstats.backlog = 0;
+}
+
+static int perflow_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct tc_perflow_qopt *ctl;
+
+ /* if limit and qlen are lowered, and we are in a over limit
+ * state, we still don't drop flow or drop packet.
+ * sch->q will decrease to allowed length
+ * q->flow_count will decrease because no new flow is traced
+ */
+ if (opt == NULL) {
+ return -EINVAL;
+ } else {
+ ctl = RTA_DATA(opt);
+ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ q->rate = ctl->rate.rate;
+ q->ceil = ctl->ceil.rate;
+ q->limit = ctl->limit;
+ q->qlen = ctl->qlen;
+
+ if (q->limit == 0)
+ q->limit = 1024;
+ printk(KERN_WARNING "(rate,ceil,limit,qlen)=(%u,%u,%u,%u)\n",
+ q->rate, q->ceil, q->limit, q->qlen);
+ /* aggregative rate is (rate * 1.05 * limit) */
+ q->grate = (q->rate + q->rate/20) * q->limit;
+ }
+
+ return 0;
+}
+
+static int perflow_init(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ int i, ret;
+
+ ret = perflow_change(sch, opt);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&q->borrow);
+
+ for (i = 0; i < PERFLOW_HSIZE; i++)
+ INIT_LIST_HEAD(q->ht + i);
+
+ return 0;
+}
+
+static void perflow_destroy(struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct list_head *h, *n;
+ struct perflow_entry *e;
+ int i;
+
+ for (i = 0; i < PERFLOW_HSIZE; i++) {
+ list_for_each_safe (h, n, q->ht + i) {
+ e = list_entry(h, struct perflow_entry, hlist);
+ del_timer(&e->timer);
+ if (!list_empty(&e->borrow))
+ list_del_init(&e->borrow);
+ list_del(h);
+ kfree(e);
+ }
+ }
+}
+
+static int perflow_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ return -1;
+}
+
+static struct Qdisc_ops perflow_qdisc_ops = {
+ .next = NULL,
+ .cl_ops = NULL,
+ .id = "perflow",
+ .priv_size = sizeof(struct perflow_sched_data),
+ .enqueue = perflow_enqueue,
+ .dequeue = perflow_dequeue,
+ .requeue = perflow_requeue,
+ .drop = perflow_drop,
+ .init = perflow_init,
+ .reset = perflow_reset,
+ .destroy = perflow_destroy,
+ .change = perflow_change,
+ .dump = perflow_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init perflow_module_init(void)
+{
+ return register_qdisc(&perflow_qdisc_ops);
+}
+
+static void __exit perflow_module_exit(void)
+{
+ unregister_qdisc(&perflow_qdisc_ops);
+}
+
+module_init(perflow_module_init);
+module_exit(perflow_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wang Jian <lark@linux.net.cn>");
[-- Attachment #4: t.sh --]
[-- Type: application/x-sh, Size: 489 bytes --]
[-- Attachment #5: Type: text/plain, Size: 143 bytes --]
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
@ 2005-04-04 8:51 ` Patrick McHardy
2005-04-04 9:10 ` Wang Jian
` (10 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Patrick McHardy @ 2005-04-04 8:51 UTC (permalink / raw)
To: lartc
Wang Jian wrote:
> Hi,
>
> One of my customer needs per flow rate control, so I write one.
>
> The code I post here is not finished, but it seems to work as expected.
>
> The kernel patch is agains kernel 2.6.11, the iproute2 patch is against
> iproute2-2.6.11-050314.
>
> I write the code in a hurry to meet deadline. There are many other things
> to do ahead for me. The code is written in 2 days (including read other
> queue's code) and tested for a while to find obvious mistake. Don't be
> suprised when you find many many bugs.
It looks quite clean, but couldn't the same be achieved with just
providing per-flow fairness and leaving the rate-limiting to an
upper qdisc like HTB or HFSC?
Regards
Patrick
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
2005-04-04 8:51 ` Patrick McHardy
@ 2005-04-04 9:10 ` Wang Jian
2005-04-04 11:42 ` Andy Furniss
` (9 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Wang Jian @ 2005-04-04 9:10 UTC (permalink / raw)
To: lartc
Hi Patrick McHardy,
HTB + SQF can only achieve part of funcionality.
Per flow rate control means per flow bandwidth assurance + bandwidth
constraint.
When we use HTB + SQF,
1. We can't achieve bandwidth assurance when flow count is higher than expected;
this often means we fail to meet the quality requirement.
2. We can't enforce bandwidth constraint when flow count is very low;
this often means waste of bandwidth.
On Mon, 04 Apr 2005 10:51:15 +0200, Patrick McHardy <kaber@trash.net> wrote:
> Wang Jian wrote:
> > Hi,
> >
> > One of my customer needs per flow rate control, so I write one.
> >
> > The code I post here is not finished, but it seems to work as expected.
> >
> > The kernel patch is agains kernel 2.6.11, the iproute2 patch is against
> > iproute2-2.6.11-050314.
> >
> > I write the code in a hurry to meet deadline. There are many other things
> > to do ahead for me. The code is written in 2 days (including read other
> > queue's code) and tested for a while to find obvious mistake. Don't be
> > suprised when you find many many bugs.
>
> It looks quite clean, but couldn't the same be achieved with just
> providing per-flow fairness and leaving the rate-limiting to an
> upper qdisc like HTB or HFSC?
>
> Regards
> Patrick
--
lark
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
2005-04-04 8:51 ` Patrick McHardy
2005-04-04 9:10 ` Wang Jian
@ 2005-04-04 11:42 ` Andy Furniss
2005-04-04 13:53 ` Wang Jian
` (8 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Andy Furniss @ 2005-04-04 11:42 UTC (permalink / raw)
To: lartc
Wang Jian wrote:
> Hi,
>
> One of my customer needs per flow rate control, so I write one.
>
> The code I post here is not finished, but it seems to work as expected.
>
> The kernel patch is agains kernel 2.6.11, the iproute2 patch is against
> iproute2-2.6.11-050314.
>
> I write the code in a hurry to meet deadline. There are many other things
> to do ahead for me. The code is written in 2 days (including read other
> queue's code) and tested for a while to find obvious mistake. Don't be
> suprised when you find many many bugs.
Wow - I wish I could write that in 2 days :-)
>
> The test scenario is like this
>
> www server <- [ eth0 eth1 ] -> www clients
>
> The attached t.sh is used to generate test rules. Clients download a
> big ISO file from www server, so flows' rate can be estimated by view
> progress. However I use wget to test the speed, so the speed is
> accumulated, not current.
What if the client uses a download accelerator and has 12 connections (I
suppose server could limit this - but if client is behind nat you may
hurt others - which is what sfq does now AIUI, because it doesn't hash
on dst port.)
>
> The problems I know:
>
> 1. The rtnetlink related code is quick hack. I am not familiar with
> rtnetlink, so I look at other queue's code and use the simplest one.
>
> 2. perflow queue has no stats code. It will be added later.
>
> 3. I don't know what is the dump() method 's purpose, so I didn't write
> dump() method. I will add it later when I know what it is for and how to
> write rtnetlink code.
>
> Any feedback is welcome. And test it if you can :)
>
> PS: the code is licensed under GPL. If it is acceptable by upstream, it
> will be submitted.
Having per flow without the drawbacks of sfq is really cool, but I agree
with Patrick about letting htb/hfsc limit. You say in the code -
"You should use HTB or other classful qdisc to enclose this qdisc"
So if you do that (unless you meant should not) then you can't guarentee
per flow rate anyway without knowing the number of flows, unless you can
set rate so high that max flows x flow rate < htb rate.
I think you can still limit per flow ceil if you use htb/hfsc to ratelimit.
I suppose you are solving a different problem with this than I normally
shape for ie. you have loads of bandwidth and I have hardly any.
It still could be something really usefull for me though, as I suspect
it wouldn't be too hard to add lots of features/switches which (e)sfq
doesn't have like -
Per flow queue length limit - and more choice than just tail drop (I am
thinking of me shaping from wrong and of link here - server with BIC tcp
is horrible with tail drop - others are not as bad).
For people who use esfq for hundreds of users, you could still do
fairness of tcp flows within fairness per user address.
Requeue properly which (e)sfq doesn't.
Andy.
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (2 preceding siblings ...)
2005-04-04 11:42 ` Andy Furniss
@ 2005-04-04 13:53 ` Wang Jian
2005-04-04 14:39 ` Wang Jian
` (7 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Wang Jian @ 2005-04-04 13:53 UTC (permalink / raw)
To: lartc
Hi Andy Furniss,
On Mon, 04 Apr 2005 12:42:21 +0100, Andy Furniss <andy.furniss@dsl.pipex.com> wrote:
>
> Wow - I wish I could write that in 2 days :-)
>
I think if you have a deadline, then you can do that :-)
> >
> > The test scenario is like this
> >
> > www server <- [ eth0 eth1 ] -> www clients
> >
> > The attached t.sh is used to generate test rules. Clients download a
> > big ISO file from www server, so flows' rate can be estimated by view
> > progress. However I use wget to test the speed, so the speed is
> > accumulated, not current.
>
> What if the client uses a download accelerator and has 12 connections (I
> suppose server could limit this - but if client is behind nat you may
> hurt others - which is what sfq does now AIUI, because it doesn't hash
> on dst port.)
>
The test scenario is not the real scenario under which it will be used.
I just use this for test, because it is simple.
This per flow control is good when used for VoIP (Voice and Video).
> >
> > The problems I know:
> >
> > 1. The rtnetlink related code is quick hack. I am not familiar with
> > rtnetlink, so I look at other queue's code and use the simplest one.
> >
> > 2. perflow queue has no stats code. It will be added later.
> >
> > 3. I don't know what is the dump() method 's purpose, so I didn't write
> > dump() method. I will add it later when I know what it is for and how to
> > write rtnetlink code.
> >
> > Any feedback is welcome. And test it if you can :)
> >
> > PS: the code is licensed under GPL. If it is acceptable by upstream, it
> > will be submitted.
>
> Having per flow without the drawbacks of sfq is really cool, but I agree
> with Patrick about letting htb/hfsc limit. You say in the code -
>
> "You should use HTB or other classful qdisc to enclose this qdisc"
>
> So if you do that (unless you meant should not) then you can't guarentee
> per flow rate anyway without knowing the number of flows, unless you can
> set rate so high that max flows x flow rate < htb rate.
HTB is providing guaranteed bandwidth. per flow control has its own
bandwidth limit ( rate * 1.05 * limit ).
>
> I think you can still limit per flow ceil if you use htb/hfsc to ratelimit.
>
> I suppose you are solving a different problem with this than I normally
> shape for ie. you have loads of bandwidth and I have hardly any.
>
Let me explain the idea more clear.
For example, you may have 50 streams. These stream can work perfectly at
10kbps - 15kbps.
With HTB + SFQ, you should give 50*15 guaranteed. but then, if only one
stream is using this, it can use up to 50*15 guaranteed. You have risk
of waste 49*15 on it.
In another hand, if your have more than 50 streams, say, 80 streams.
With perfect fairness, every stream can get less than 10kbps. The
quality is not met however, no one is satisfied with fairness.
So, you have risk of waste and still you don't have guarantee.
With per flow rate control, you can give a guaranteed 12*65, and set per
flow control to rate\x12,ceil\x15,limit`. When you have only a few
streams, you don't worry that you waste bandwidth. If more than 60
streams occurs, the first 60 streams still works fine.
Fairness is good, but sometimes, fairness means everyone hurts. If you
have more than enough bandwidth, you can use fairness to get good QoS.
But it is not the case when bandwidth is not so enough.
BTW: Are there any good document for HFSC? I don't even know how it
works :( Maybe it's can be used to achieve per flow control.
> It still could be something really usefull for me though, as I suspect
> it wouldn't be too hard to add lots of features/switches which (e)sfq
> doesn't have like -
>
> Per flow queue length limit - and more choice than just tail drop (I am
> thinking of me shaping from wrong and of link here - server with BIC tcp
> is horrible with tail drop - others are not as bad).
>
> For people who use esfq for hundreds of users, you could still do
> fairness of tcp flows within fairness per user address.
>
> Requeue properly which (e)sfq doesn't.
Because this per-flow queue is new, you can add things useful to it.
>
> Andy.
--
lark
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (3 preceding siblings ...)
2005-04-04 13:53 ` Wang Jian
@ 2005-04-04 14:39 ` Wang Jian
2005-04-04 15:10 ` Andy Furniss
` (6 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Wang Jian @ 2005-04-04 14:39 UTC (permalink / raw)
To: lartc
Hi Andy Furniss,
I just tried HTB+SFQ. I replace 'perflow ...' in t.sh with 'sfq'.
The test result is very bad. The speed is not stable, and speed
variation is too large when considering fairness.
The HTB is rate€kbps,ceil€kbps. I use 7 streams to test. Streams's
speed vary from 3.4kbps to 28.7kbps. The test last about 10 minutes, and
the speeds don't like to converge.
Maybe the fairness is achived in long run, but it hurts applications
that need bandwidth guarantee.
On Mon, 04 Apr 2005 12:42:21 +0100, Andy Furniss <andy.furniss@dsl.pipex.com> wrote:
> Wang Jian wrote:
> > Hi,
> >
> > One of my customer needs per flow rate control, so I write one.
> >
> > The code I post here is not finished, but it seems to work as expected.
> >
> > The kernel patch is agains kernel 2.6.11, the iproute2 patch is against
> > iproute2-2.6.11-050314.
> >
> > I write the code in a hurry to meet deadline. There are many other things
> > to do ahead for me. The code is written in 2 days (including read other
> > queue's code) and tested for a while to find obvious mistake. Don't be
> > suprised when you find many many bugs.
>
> Wow - I wish I could write that in 2 days :-)
>
> >
> > The test scenario is like this
> >
> > www server <- [ eth0 eth1 ] -> www clients
> >
> > The attached t.sh is used to generate test rules. Clients download a
> > big ISO file from www server, so flows' rate can be estimated by view
> > progress. However I use wget to test the speed, so the speed is
> > accumulated, not current.
>
> What if the client uses a download accelerator and has 12 connections (I
> suppose server could limit this - but if client is behind nat you may
> hurt others - which is what sfq does now AIUI, because it doesn't hash
> on dst port.)
>
>
> >
> > The problems I know:
> >
> > 1. The rtnetlink related code is quick hack. I am not familiar with
> > rtnetlink, so I look at other queue's code and use the simplest one.
> >
> > 2. perflow queue has no stats code. It will be added later.
> >
> > 3. I don't know what is the dump() method 's purpose, so I didn't write
> > dump() method. I will add it later when I know what it is for and how to
> > write rtnetlink code.
> >
> > Any feedback is welcome. And test it if you can :)
> >
> > PS: the code is licensed under GPL. If it is acceptable by upstream, it
> > will be submitted.
>
> Having per flow without the drawbacks of sfq is really cool, but I agree
> with Patrick about letting htb/hfsc limit. You say in the code -
>
> "You should use HTB or other classful qdisc to enclose this qdisc"
>
> So if you do that (unless you meant should not) then you can't guarentee
> per flow rate anyway without knowing the number of flows, unless you can
> set rate so high that max flows x flow rate < htb rate.
>
> I think you can still limit per flow ceil if you use htb/hfsc to ratelimit.
>
> I suppose you are solving a different problem with this than I normally
> shape for ie. you have loads of bandwidth and I have hardly any.
>
> It still could be something really usefull for me though, as I suspect
> it wouldn't be too hard to add lots of features/switches which (e)sfq
> doesn't have like -
>
> Per flow queue length limit - and more choice than just tail drop (I am
> thinking of me shaping from wrong and of link here - server with BIC tcp
> is horrible with tail drop - others are not as bad).
>
> For people who use esfq for hundreds of users, you could still do
> fairness of tcp flows within fairness per user address.
>
> Requeue properly which (e)sfq doesn't.
>
>
> Andy.
--
lark
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (4 preceding siblings ...)
2005-04-04 14:39 ` Wang Jian
@ 2005-04-04 15:10 ` Andy Furniss
2005-04-04 15:23 ` Andy Furniss
` (5 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Andy Furniss @ 2005-04-04 15:10 UTC (permalink / raw)
To: lartc
Wang Jian wrote:
> Hi Andy Furniss,
>
> I just tried HTB+SFQ. I replace 'perflow ...' in t.sh with 'sfq'.
>
> The test result is very bad. The speed is not stable, and speed
> variation is too large when considering fairness.
>
> The HTB is rate€kbps,ceil€kbps. I use 7 streams to test. Streams's
> speed vary from 3.4kbps to 28.7kbps. The test last about 10 minutes, and
> the speeds don't like to converge.
>
> Maybe the fairness is achived in long run, but it hurts applications
> that need bandwidth guarantee.
Yes - I can make sfq look bad in tests, if the only difference is dst
port then it just doesn't work and if the ip addresses are sequential
it's not too good. In practice I use esfq as you can use more hash
buckets - but perturb is horrable for the packet reordering.
I think perflow is going to be far better for me - just that having low
bandwidth means I would never send interactive to sfq anyway and only
use it for bulk whose rate is controlled by htb per user and is quite
variable - so for me just letting htb do rate would be fine.
Andy.
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (5 preceding siblings ...)
2005-04-04 15:10 ` Andy Furniss
@ 2005-04-04 15:23 ` Andy Furniss
2005-04-04 15:57 ` Wang Jian
` (4 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Andy Furniss @ 2005-04-04 15:23 UTC (permalink / raw)
To: lartc
Wang Jian wrote:
> This per flow control is good when used for VoIP (Voice and Video).
Ahh yes - your needs are totally different to mine - with low bandwidth
I just have to seperate interactive from bulk and use sfq for bulk only
as if queuing interactive would mean I have run out of bandwidth anyway.
>
> Let me explain the idea more clear.
>
> For example, you may have 50 streams. These stream can work perfectly at
> 10kbps - 15kbps.
>
> With HTB + SFQ, you should give 50*15 guaranteed. but then, if only one
> stream is using this, it can use up to 50*15 guaranteed. You have risk
> of waste 49*15 on it.
>
> In another hand, if your have more than 50 streams, say, 80 streams.
> With perfect fairness, every stream can get less than 10kbps. The
> quality is not met however, no one is satisfied with fairness.
>
> So, you have risk of waste and still you don't have guarantee.
>
> With per flow rate control, you can give a guaranteed 12*65, and set per
> flow control to rate\x12,ceil\x15,limit`. When you have only a few
> streams, you don't worry that you waste bandwidth. If more than 60
> streams occurs, the first 60 streams still works fine.
>
> Fairness is good, but sometimes, fairness means everyone hurts. If you
> have more than enough bandwidth, you can use fairness to get good QoS.
> But it is not the case when bandwidth is not so enough.
I can see now why you do it this way.
>
> BTW: Are there any good document for HFSC? I don't even know how it
> works :( Maybe it's can be used to achieve per flow control.
No not really many docs and you can't really do per flow as such - more
per user/class.
I haven't played with it enough yet, but the strength is being able to
seperate interactive from bulk and still limit per user/class , without
making each users interactive wait for other users bulk - at slow rates
the bitrate latency of single packets can add up enough to messup
interactive.
>
> Because this per-flow queue is new, you can add things useful to it.
It does look good :-) I'll test when I get time.
Andy.
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (6 preceding siblings ...)
2005-04-04 15:23 ` Andy Furniss
@ 2005-04-04 15:57 ` Wang Jian
2005-04-05 22:40 ` Andy Furniss
` (3 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Wang Jian @ 2005-04-04 15:57 UTC (permalink / raw)
To: lartc
[-- Attachment #1: Type: text/plain, Size: 898 bytes --]
Hi Andy Furniss,
On Mon, 04 Apr 2005 16:23:30 +0100, Andy Furniss <andy.furniss@dsl.pipex.com> wrote:
>
> >
> > Because this per-flow queue is new, you can add things useful to it.
>
> It does look good :-) I'll test when I get time.
>
The attached is the latest. The last one doesn't sync time: queue has a
variable time slot length; every flow has it own ticks.
This new patch against 2.6.11 sync queue and flows' time. Every new flow
has it jiffies set to q->jiffies and use that as start. As q->jiffies
and flow->jiffies increament in HZ step, time is synced. This will
improved accuracy.
But HZ is too long for token calculation. Sometimes, one of flow borrows
too much and get no enough penalty, so another flow hurts. But anyway,
per flow queue provides better fairness in my test, either in
short time period or long time period.
Looking forward to your feedback :)
--
lark
[-- Attachment #2: linux-2.6.11-perflow-r3.diff --]
[-- Type: application/octet-stream, Size: 15640 bytes --]
Index: linux-2.6.11-w/include/linux/pkt_sched.h
===================================================================
--- linux-2.6.11-w/include/linux/pkt_sched.h (revision 2)
+++ linux-2.6.11-w/include/linux/pkt_sched.h (revision 3)
@@ -272,6 +272,28 @@
__u32 ctokens;
};
+
+/* PERFLOW section */
+
+#define TC_PERFLOW_DEFAULTLIMIT 1024
+
+struct tc_perflow_qopt
+{
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 limit;
+ __u32 qlen;
+};
+enum
+{
+ TCA_PERFLOW_UNSPEC,
+ TCA_PERFLOW_PARMS,
+ __TCA_PERFLOW_MAX,
+};
+
+#define TCA_PERFLOW_MAX (__TCA_PERFLOW_MAX - 1)
+
+
/* HFSC section */
struct tc_hfsc_qopt
Index: linux-2.6.11-w/net/sched/Kconfig
===================================================================
--- linux-2.6.11-w/net/sched/Kconfig (revision 2)
+++ linux-2.6.11-w/net/sched/Kconfig (revision 3)
@@ -192,6 +192,15 @@
To compile this code as a module, choose M here: the
module will be called sch_gred.
+config NET_SCH_PERFLOW
+ tristate "PERFLOW queue"
+ depends on NET_SCHED
+ ---help---
+ Say Y here if you want to use per flow rate control.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_perflow.
+
config NET_SCH_DSMARK
tristate "Diffserv field marker"
depends on NET_SCHED
Index: linux-2.6.11-w/net/sched/Makefile
===================================================================
--- linux-2.6.11-w/net/sched/Makefile (revision 2)
+++ linux-2.6.11-w/net/sched/Makefile (revision 3)
@@ -19,6 +19,7 @@
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
+obj-$(CONFIG_NET_SCH_PERFLOW) += sch_perflow.o
obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
Index: linux-2.6.11-w/net/sched/sch_perflow.c
===================================================================
--- linux-2.6.11-w/net/sched/sch_perflow.c (revision 0)
+++ linux-2.6.11-w/net/sched/sch_perflow.c (revision 3)
@@ -0,0 +1,555 @@
+/*
+ * net/sched/sch_perflow.c Per flow rate control queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Wang Jian <lark@linux.net.cn>, 2005
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/jhash.h>
+#include <linux/timer.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/dsfield.h>
+
+/* Per flow rate control algorithm
+
+Description
+-----------
+
+ When a new packet arrives, we lookup in the table to see if it
+ belongs to a flow being traced. If not, we create new entry for it.
+
+ For a packet belongs to a flow being traced, we see if it has
+ available token to send it out.
+
+ The 'rate' and 'ceil' have same meaning of HTB qdisc. The 'limit'
+ parameter defined how many flows we will trace, which defaults to
+ 1024.
+
+ Any flow entry without traffic in LIFETIME seconds will be wiped
+ to free slot.
+
+
+Algorithm
+---------
+
+ The algorithm is simple and naive. We have a token bucket, which
+ generate grate/10 (= rate * limit / 10) token in 1/10 seconds.
+
+ A flow can always send when it is under rate. When a flow is over
+ rate but under ceil, every time it borrows, it is put into borrow
+ list, so it may receive penalty.
+
+ When a under-rate flow sends but token is short, penalty is added
+ to one of flows in borrow list, and this punished flow is
+ removed from borrow list.
+
+ When a flow carrying penalty has packet, the penalty is
+ added to used token. This means, this flow can borrow less than
+ last time (if in a new time slot), or can borrow less
+ (ceil - rate - penalty).
+
+ There are other rules that handle fairness and low rate situation.
+ See code for details.
+
+ NOTE: the implementation of algorithm is not timer driven, but
+ packet driven.
+
+ The ideas behind this algorithm are
+
+ 1. We assume that perflow qdisc has rate * limit guaranteed.
+ 2. We can't affect past. We can only affect future.
+ 3. If a flow's borrowing leads to overlimit, we let this flow
+ borrow less in future.
+ 4. With a round robin punishment style, a flow borrows more times,
+ it stays in borrow list more times, and so receives more penalty.
+ (But we should consider more about this, I think the fairness
+ should be improved by sort borrow list.)
+
+
+Security Consideration
+----------------------
+
+ It's dangerous to create new entry for any new packets not belongs
+ to a flow being traced. For example, syn flood attack on a single
+ port may cause thousands of entries created. The 'limit' parameter
+ is used to set limit on maximum entries we will create.
+
+ But even with 'limit', port scan can fill the slots and make valid
+ new flow has no slot available and not be traced.
+
+ One of solution is to use netfilter MARK for classification
+ (a.k.a. cls_fw.c). Only established session is given a fw mark, and
+ then, if port scan use a spoofing source address, no fw mark gotten.
+
+
+Application
+-----------
+
+ You should use HTB or other classful qdisc to enclose this qdisc.
+
+ */
+
+
+#define PERFLOW_HSIZE 251
+#define PERFLOW_LIFETIME (10*HZ)
+
+struct perflow_entry
+{
+ u32 src;
+ u32 dst;
+ u16 sport;
+ u16 dport;
+
+ struct list_head hlist;
+ struct list_head borrow;
+ struct timer_list timer;
+ struct perflow_sched_data *q;
+ u32 jiffies;
+ u32 used;
+ u32 penalty;
+
+ u8 protocol;
+};
+
+struct perflow_sched_data
+{
+ /* parameters */
+ u32 rate; /* guaranted rate */
+ u32 ceil; /* upper bound */
+ u32 limit; /* maximum flows we trace */
+ u32 qlen; /* maximum queue length */
+
+ /* variables */
+ u32 grate; /* aggregative rate */
+
+ u32 jiffies;
+ u32 token;
+ u32 flow_count; /* how many flows we are tracing */
+ struct list_head ht[PERFLOW_HSIZE]; /* hash table */
+ struct list_head borrow; /* lists of borrowing flow */
+};
+
+struct commonhdr
+{
+ u16 source;
+ u16 dest;
+};
+
+static __inline__ u32 perflow_hash(struct perflow_entry *tuple)
+{
+ return (jhash_3words(tuple->src ^ tuple->protocol,
+ tuple->dst,
+ (tuple->sport << 16 | tuple->dport),
+ 0x5E83AD03) % PERFLOW_HSIZE);
+}
+
+static __inline__ int perflow_is_valid(struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return 0;
+
+ if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_TCP
+ && iph->protocol != IPPROTO_SCTP)
+ return 0;
+
+ return 1;
+}
+
+static __inline__ void perflow_flow_timer(unsigned long arg)
+{
+ struct perflow_entry *e = (struct perflow_entry *) arg;
+
+ e->q->flow_count--;
+ list_del_init(&e->hlist);
+ if (!list_empty(&e->borrow))
+ list_del_init(&e->borrow);
+ kfree(e);
+}
+
+static __inline__ struct perflow_entry *
+perflow_new_flow(u32 hash, struct perflow_entry *tuple, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct perflow_entry *e;
+
+ if (q->flow_count >= q->limit)
+ return NULL;
+
+ e = kmalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return NULL;
+
+ q->flow_count++;
+
+ memset(e, 0, sizeof(*e));
+
+ e->src = tuple->src;
+ e->dst = tuple->dst;
+ e->sport = tuple->sport;
+ e->dport = tuple->dport;
+ e->protocol = tuple->protocol;
+
+ INIT_LIST_HEAD(&e->hlist);
+ INIT_LIST_HEAD(&e->borrow);
+ init_timer(&e->timer);
+ e->timer.function = perflow_flow_timer;
+ e->timer.data = (unsigned long) e;
+ /* sync flow's tick to queue's tick */
+ e->jiffies = q->jiffies;
+ e->q = q;
+
+ list_add(&e->hlist, q->ht + hash);
+
+ return e;
+}
+
+static __inline__ void
+perflow_fill_tuple(struct perflow_entry *tuple, struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+ struct commonhdr *ch;
+
+ memset(tuple, 0, sizeof(struct perflow_entry));
+#if 1
+ /* not a traceable flow, do nothing */
+ if (!perflow_is_valid(skb))
+ return;
+#endif
+
+ ch = (struct commonhdr *)((void *)iph + (iph->ihl << 2));
+ tuple->src = iph->saddr;
+ tuple->dst = iph->daddr;
+ tuple->sport = ch->source;
+ tuple->dport = ch->dest;
+ tuple->protocol = iph->protocol;
+}
+
+static struct perflow_entry *
+perflow_find_flow(u32 *hash, struct perflow_entry *flow, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct list_head *h, *head;
+ struct perflow_entry *e;
+
+ *hash = perflow_hash(flow);
+ head = q->ht + (*hash);
+
+ list_for_each(h, head) {
+ e = list_entry(h, struct perflow_entry, hlist);
+ if (e->src == flow->src
+ && e->dst == flow->dst
+ && e->sport == flow->sport
+ && e->dport == flow->dport
+ && e->protocol == flow->protocol)
+ del_timer(&e->timer);
+ return e;
+ }
+ return NULL;
+}
+
+static void perflow_punish(int penalty, struct list_head *borrow)
+{
+ struct perflow_entry *e;
+ struct list_head *h;
+
+ if (!list_empty(borrow)) {
+ h = borrow->next;
+ list_del_init(h);
+ e = list_entry(h, struct perflow_entry, hlist);
+ e->penalty += penalty;
+ }
+}
+
+static __inline__ void
+perflow_adjust_used(struct perflow_entry *flow, u32 rate, u32 ceil)
+{
+ int cycles;
+
+ /* still in this time slot */
+ if ((jiffies - flow->jiffies) <= HZ) {
+ if ((ceil - flow->used) > flow->penalty) {
+ flow->used += flow->penalty;
+ flow->penalty = 0;
+ } else {
+ flow->used = ceil;
+ flow->penalty -= ceil - flow->used;
+ }
+ return;
+ }
+
+ /* the packet arrives after cycles slots */
+ cycles = (jiffies - flow->jiffies) / HZ;
+ flow->jiffies += cycles * HZ;
+
+ if ((cycles * ceil - flow->used) > flow->penalty) {
+ flow->used = 0;
+ flow->penalty = 0;
+ } else {
+ flow->used = flow->penalty + flow->used - cycles * ceil;
+ if (flow->used > ceil) {
+ flow->used = ceil;
+ flow->penalty -= ceil;
+ }
+ }
+}
+
+static int perflow_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct perflow_entry tuple, *flow;
+ u32 hash;
+
+ /* not a traceable flow */
+ if (!perflow_is_valid(skb))
+ goto drop;
+
+ /* if max qlen is set and current queue is full, drop */
+ if (q->qlen && sch->q.qlen >= q->qlen) {
+ printk(KERN_WARNING "qlen overlimit drop\n");
+ goto drop;
+ }
+
+ perflow_fill_tuple(&tuple, skb);
+ flow = perflow_find_flow(&hash, &tuple, sch);
+
+ if (flow == NULL) {
+ flow = perflow_new_flow(hash, &tuple, sch);
+ if (flow == NULL)
+ goto drop;
+ }
+
+ /* renew timer for flow */
+ flow->timer.expires = jiffies + PERFLOW_LIFETIME;
+ add_timer(&flow->timer);
+
+ if ((jiffies - q->jiffies) > HZ) {
+ q->token = q->grate;
+ q->jiffies += (jiffies - q->jiffies) / HZ * HZ;
+ }
+
+ /* renew used for this flow */
+ perflow_adjust_used(flow, q->rate, q->ceil);
+
+ /* we always satisfy flow under rate */
+ if (flow->used < q->rate) {
+ flow->used += skb->len;
+ if (flow->used > q->rate) {
+ /* if we borrow, we may receive penalty */
+ if (list_empty(&flow->borrow))
+ list_add_tail(&flow->borrow, &q->borrow);
+ }
+ if (flow->used > q->ceil)
+ flow->penalty += flow->used - q->ceil;
+ if (q->token < skb->len) {
+ q->token = 0;
+ perflow_punish(skb->len - q->token, &q->borrow);
+ } else {
+ q->token -= skb->len;
+ }
+ } else {
+ if (q->token < skb->len || flow->used >= q->ceil)
+ goto drop;
+
+ flow->used += skb->len;
+ q->token -= skb->len;
+
+ if (list_empty(&flow->borrow))
+ list_add_tail(&flow->borrow, &q->borrow);
+ else
+ list_move(&flow->borrow, &q->borrow);
+
+ if (flow->used > q->ceil)
+ flow->penalty += flow->used - q->ceil;
+ }
+
+enqueue:
+ sch->qstats.backlog += skb->len;
+ sch->bstats.bytes += skb->len;
+ sch->bstats.packets++;
+ __skb_queue_tail(&sch->q, skb);
+ return NET_XMIT_SUCCESS;
+
+drop:
+ sch->qstats.overlimits++;
+ kfree_skb(skb);
+ sch->qstats.drops++;
+ return NET_XMIT_CN;
+}
+
+static int perflow_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ __skb_queue_head(&sch->q, skb);
+ sch->qstats.backlog += skb->len;
+ sch->qstats.requeues++;
+ return 0;
+}
+
+static struct sk_buff *perflow_dequeue(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue(&sch->q);
+ if (skb) {
+ sch->qstats.backlog -= skb->len;
+ }
+ return skb;
+}
+
+static unsigned int perflow_drop(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue_tail(&sch->q);
+ if (skb) {
+ unsigned int len = skb->len;
+ sch->qstats.backlog -= len;
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return len;
+ }
+ return 0;
+}
+
+static void perflow_reset(struct Qdisc *sch)
+{
+ skb_queue_purge(&sch->q);
+ sch->qstats.backlog = 0;
+}
+
+static int perflow_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct tc_perflow_qopt *ctl;
+
+ /* if limit and qlen are lowered, and we are in a over limit
+ * state, we still don't drop flow or drop packet.
+ * sch->q will decrease to allowed length
+ * q->flow_count will decrease because no new flow is traced
+ */
+ if (opt == NULL) {
+ return -EINVAL;
+ } else {
+ ctl = RTA_DATA(opt);
+ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ q->rate = ctl->rate.rate;
+ q->ceil = ctl->ceil.rate;
+ q->limit = ctl->limit;
+ q->qlen = ctl->qlen;
+
+ if (q->limit == 0)
+ q->limit = 1024;
+ /* aggregative rate is (rate * 1.05 * limit) */
+ q->grate = (q->rate + q->rate/20) * q->limit;
+
+ q->jiffies = jiffies;
+ }
+
+ return 0;
+}
+
+static int perflow_init(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ int i, ret;
+
+ ret = perflow_change(sch, opt);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&q->borrow);
+
+ for (i = 0; i < PERFLOW_HSIZE; i++)
+ INIT_LIST_HEAD(q->ht + i);
+
+ return 0;
+}
+
+static void perflow_destroy(struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct list_head *h, *n;
+ struct perflow_entry *e;
+ int i;
+
+ for (i = 0; i < PERFLOW_HSIZE; i++) {
+ list_for_each_safe (h, n, q->ht + i) {
+ e = list_entry(h, struct perflow_entry, hlist);
+ del_timer(&e->timer);
+ if (!list_empty(&e->borrow))
+ list_del_init(&e->borrow);
+ list_del(h);
+ kfree(e);
+ }
+ }
+}
+
+static int perflow_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ return -1;
+}
+
+static struct Qdisc_ops perflow_qdisc_ops = {
+ .next = NULL,
+ .cl_ops = NULL,
+ .id = "perflow",
+ .priv_size = sizeof(struct perflow_sched_data),
+ .enqueue = perflow_enqueue,
+ .dequeue = perflow_dequeue,
+ .requeue = perflow_requeue,
+ .drop = perflow_drop,
+ .init = perflow_init,
+ .reset = perflow_reset,
+ .destroy = perflow_destroy,
+ .change = perflow_change,
+ .dump = perflow_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init perflow_module_init(void)
+{
+ return register_qdisc(&perflow_qdisc_ops);
+}
+
+static void __exit perflow_module_exit(void)
+{
+ unregister_qdisc(&perflow_qdisc_ops);
+}
+
+module_init(perflow_module_init);
+module_exit(perflow_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wang Jian <lark@linux.net.cn>");
[-- Attachment #3: Type: text/plain, Size: 143 bytes --]
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (7 preceding siblings ...)
2005-04-04 15:57 ` Wang Jian
@ 2005-04-05 22:40 ` Andy Furniss
2005-04-06 4:17 ` Wang Jian
` (2 subsequent siblings)
11 siblings, 0 replies; 13+ messages in thread
From: Andy Furniss @ 2005-04-05 22:40 UTC (permalink / raw)
To: lartc
Wang Jian wrote:
> Hi Andy Furniss,
>
>
> On Mon, 04 Apr 2005 16:23:30 +0100, Andy Furniss <andy.furniss@dsl.pipex.com> wrote:
>
>
>>>Because this per-flow queue is new, you can add things useful to it.
>>
>>It does look good :-) I'll test when I get time.
>>
>
>
> The attached is the latest. The last one doesn't sync time: queue has a
> variable time slot length; every flow has it own ticks.
>
> This new patch against 2.6.11 sync queue and flows' time. Every new flow
> has it jiffies set to q->jiffies and use that as start. As q->jiffies
> and flow->jiffies increament in HZ step, time is synced. This will
> improved accuracy.
>
> But HZ is too long for token calculation. Sometimes, one of flow borrows
> too much and get no enough penalty, so another flow hurts. But anyway,
> per flow queue provides better fairness in my test, either in
> short time period or long time period.
>
> Looking forward to your feedback :)
It works OK for me - though I would really need it to be variable rate
to use really - but as you say it's designed for your needs.
I noticed that it drops icmp so you need to be careful about what you
send to it.
If you limit connections and use them all up then alive but not always
active connections will get locked out - there is a netfilter connection
limit already.
As you say above it's not always fair - I didn't test that much it
seemed OK apart from if htb limited it ie.
htb rate higher than sum of rates but less than sum of ceils made it
unfair to a flow with smaller packet size.
Andy.
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (8 preceding siblings ...)
2005-04-05 22:40 ` Andy Furniss
@ 2005-04-06 4:17 ` Wang Jian
2005-04-06 12:29 ` Andy Furniss
2005-04-06 12:48 ` Wang Jian
11 siblings, 0 replies; 13+ messages in thread
From: Wang Jian @ 2005-04-06 4:17 UTC (permalink / raw)
To: lartc
Hi Andy Furniss,
On Tue, 05 Apr 2005 23:40:54 +0100, Andy Furniss <andy.furniss@dsl.pipex.com> wrote:
> >
> > Looking forward to your feedback :)
>
> It works OK for me - though I would really need it to be variable rate
> to use really - but as you say it's designed for your needs.
>
> I noticed that it drops icmp so you need to be careful about what you
> send to it.
I plan to optionally reclassify unhandled traffic to another class if specified.
So a default class may handle it.
>
> If you limit connections and use them all up then alive but not always
> active connections will get locked out - there is a netfilter connection
> limit already.
>
> As you say above it's not always fair - I didn't test that much it
> seemed OK apart from if htb limited it ie.
>
> htb rate higher than sum of rates but less than sum of ceils made it
> unfair to a flow with smaller packet size.
Yes. I also think that low rate or small packet size stream will have problem.
I didn't test that case yet.
I read back your post and I think the best solution for you is use HTB +
PRIO.
Let interactive but low rate traffic have highest priority, and let bulk
transfer have lowest priority and constrain them using HTB.
TCP itself has some fairness: slower stream get faster, and faster
stream get slower. The sliding window is for this.
--
lark
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (9 preceding siblings ...)
2005-04-06 4:17 ` Wang Jian
@ 2005-04-06 12:29 ` Andy Furniss
2005-04-06 12:48 ` Wang Jian
11 siblings, 0 replies; 13+ messages in thread
From: Andy Furniss @ 2005-04-06 12:29 UTC (permalink / raw)
To: lartc
Wang Jian wrote:
>
> I read back your post and I think the best solution for you is use HTB +
> PRIO.
I sort of have htb setup like prio but it's more flexable.
>
> Let interactive but low rate traffic have highest priority, and let bulk
> transfer have lowest priority and constrain them using HTB.
>
> TCP itself has some fairness: slower stream get faster, and faster
> stream get slower. The sliding window is for this.
TCP can be very unfair in some cases - different window sizes/scale on
off and 56k vs broadband peer.
I am rebuilding stuff on my gateway at the moment and noticed the
iproute patch doesn't compile with gcc 2.95.3 it's fine with 3.3.
q_perflow.c: In function `perflow_print_opt':
q_perflow.c:141: parse error before `char'
q_perflow.c:142: `b1' undeclared (first use in this function)
q_perflow.c:142: (Each undeclared identifier is reported only once
q_perflow.c:142: for each function it appears in.)
make[1]: *** [q_perflow.o] Error 1
Andy.
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [LARTC] new perflow rate control queue
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
` (10 preceding siblings ...)
2005-04-06 12:29 ` Andy Furniss
@ 2005-04-06 12:48 ` Wang Jian
11 siblings, 0 replies; 13+ messages in thread
From: Wang Jian @ 2005-04-06 12:48 UTC (permalink / raw)
To: lartc
Hi Andy Furniss,
On Wed, 06 Apr 2005 13:29:56 +0100, Andy Furniss <andy.furniss@dsl.pipex.com> wrote:
> >
> > I read back your post and I think the best solution for you is use HTB +
> > PRIO.
>
> I sort of have htb setup like prio but it's more flexable.
I am glad to hear that :)
> >
> > Let interactive but low rate traffic have highest priority, and let bulk
> > transfer have lowest priority and constrain them using HTB.
> >
> > TCP itself has some fairness: slower stream get faster, and faster
> > stream get slower. The sliding window is for this.
>
> TCP can be very unfair in some cases - different window sizes/scale on
> off and 56k vs broadband peer.
>
Yes. This unfairness is generally a good thing (but not always). It
is in favour of tcp connection in the fast/wide path, so bandwidth can
be used "efficiently" :)
> I am rebuilding stuff on my gateway at the moment and noticed the
> iproute patch doesn't compile with gcc 2.95.3 it's fine with 3.3.
>
> q_perflow.c: In function `perflow_print_opt':
> q_perflow.c:141: parse error before `char'
> q_perflow.c:142: `b1' undeclared (first use in this function)
> q_perflow.c:142: (Each undeclared identifier is reported only once
> q_perflow.c:142: for each function it appears in.)
> make[1]: *** [q_perflow.o] Error 1
>
This is due to the included <linux/jhash.h>.
Regards
--
lark
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
^ permalink raw reply [flat|nested] 13+ messages in thread
end of thread, other threads:[~2005-04-06 12:48 UTC | newest]
Thread overview: 13+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-04-04 7:21 [LARTC] new perflow rate control queue Wang Jian
2005-04-04 8:51 ` Patrick McHardy
2005-04-04 9:10 ` Wang Jian
2005-04-04 11:42 ` Andy Furniss
2005-04-04 13:53 ` Wang Jian
2005-04-04 14:39 ` Wang Jian
2005-04-04 15:10 ` Andy Furniss
2005-04-04 15:23 ` Andy Furniss
2005-04-04 15:57 ` Wang Jian
2005-04-05 22:40 ` Andy Furniss
2005-04-06 4:17 ` Wang Jian
2005-04-06 12:29 ` Andy Furniss
2005-04-06 12:48 ` Wang Jian
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.