From: Wang Jian <lark@linux.net.cn>
To: lartc@vger.kernel.org
Subject: [LARTC] new perflow rate control queue
Date: Mon, 04 Apr 2005 07:21:17 +0000 [thread overview]
Message-ID: <20050404152117.6d90635d.lark@linux.net.cn> (raw)
[-- Attachment #1: Type: text/plain, Size: 1355 bytes --]
Hi,
One of my customer needs per flow rate control, so I write one.
The code I post here is not finished, but it seems to work as expected.
The kernel patch is agains kernel 2.6.11, the iproute2 patch is against
iproute2-2.6.11-050314.
I write the code in a hurry to meet deadline. There are many other things
to do ahead for me. The code is written in 2 days (including read other
queue's code) and tested for a while to find obvious mistake. Don't be
suprised when you find many many bugs.
The test scenario is like this
www server <- [ eth0 eth1 ] -> www clients
The attached t.sh is used to generate test rules. Clients download a
big ISO file from www server, so flows' rate can be estimated by view
progress. However I use wget to test the speed, so the speed is
accumulated, not current.
The problems I know:
1. The rtnetlink related code is quick hack. I am not familiar with
rtnetlink, so I look at other queue's code and use the simplest one.
2. perflow queue has no stats code. It will be added later.
3. I don't know what is the dump() method 's purpose, so I didn't write
dump() method. I will add it later when I know what it is for and how to
write rtnetlink code.
Any feedback is welcome. And test it if you can :)
PS: the code is licensed under GPL. If it is acceptable by upstream, it
will be submitted.
--
lark
[-- Attachment #2: iproute2-2.6.11-050314-perflow.diff --]
[-- Type: application/octet-stream, Size: 4842 bytes --]
Index: iproute2-2.6.11-w/include/linux/pkt_sched.h
===================================================================
--- iproute2-2.6.11-w/include/linux/pkt_sched.h (revision 1)
+++ iproute2-2.6.11-w/include/linux/pkt_sched.h (working copy)
@@ -272,6 +272,28 @@
__u32 ctokens;
};
+
+/* PERFLOW section */
+
+#define TC_PERFLOW_DEFAULTLIMIT 1024
+
+struct tc_perflow_qopt
+{
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 limit;
+ __u32 qlen;
+};
+enum
+{
+ TCA_PERFLOW_UNSPEC,
+ TCA_PERFLOW_PARMS,
+ __TCA_PERFLOW_MAX,
+};
+
+#define TCA_PERFLOW_MAX (__TCA_PERFLOW_MAX - 1)
+
+
/* HFSC section */
struct tc_hfsc_qopt
Index: iproute2-2.6.11-w/tc/q_perflow.c
===================================================================
--- iproute2-2.6.11-w/tc/q_perflow.c (revision 0)
+++ iproute2-2.6.11-w/tc/q_perflow.c (revision 0)
@@ -0,0 +1,153 @@
+/*
+ * q_perflow.c PERFLOW.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Wang Jian <lark@linux.net.cn>, 2005
+ *
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <syslog.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <string.h>
+
+#include "utils.h"
+#include "tc_util.h"
+
+static void explain(void)
+{
+ fprintf(stderr,
+"Usage: ... perflow rate RATE [ ceil CEIL ] [ limit LIMIT ] [ qlen QLEN ]\n"
+"\n"
+"rate rate allocated to each flow (flow can still borrow)\n"
+"ceil upper limit for each flow (flow can not borrow)\n"
+"limit maximum concurrent flows\n"
+"qlen maximum queue length\n"
+ );
+}
+
+#define usage() return(-1)
+
+static int perflow_parse_opt(struct qdisc_util *qu, int argc, char **argv, struct nlmsghdr *n)
+{
+ struct tc_perflow_qopt opt;
+ struct rtattr *tail;
+ int ok = 0;
+
+ memset(&opt, 0, sizeof(opt));
+
+ while (argc > 0) {
+ if (strcmp(*argv, "rate") == 0) {
+ NEXT_ARG();
+ if (opt.rate.rate) {
+ fprintf(stderr, "Double \"ceil\" spec\n");
+ return -1;
+ }
+ if (get_rate(&opt.rate.rate, *argv)) {
+ fprintf(stderr, "Illegal \"rate\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "ceil") == 0) {
+ NEXT_ARG();
+ if (opt.ceil.rate) {
+ fprintf(stderr, "Double \"ceil\" spec\n");
+ return -1;
+ }
+ if (get_rate(&opt.ceil.rate, *argv)) {
+ fprintf(stderr, "Illegal \"ceil\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "limit") == 0) {
+ NEXT_ARG();
+ if (get_size(&opt.limit, *argv)) {
+ fprintf(stderr, "Illegal \"limit\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "qlen") == 0) {
+ NEXT_ARG();
+ if (get_size(&opt.qlen, *argv)) {
+ fprintf(stderr, "Illegal \"limit\"\n");
+ return -1;
+ }
+ ok++;
+ } else if (strcmp(*argv, "help") == 0) {
+ explain();
+ return -1;
+ } else {
+ fprintf(stderr, "What is \"%s\"?\n", *argv);
+ explain();
+ return -1;
+ }
+ argc--; argv++;
+ }
+
+ if (!ok)
+ return 0;
+
+ if (opt.rate.rate == 0) {
+ fprintf(stderr, "\"rate\" is required.\n");
+ return -1;
+ }
+
+ if (opt.ceil.rate == 0)
+ opt.ceil = opt.rate;
+
+ if (opt.ceil.rate < opt.rate.rate) {
+ fprintf(stderr, "\"ceil\" must be >= \"rate\".\n");
+ return -1;
+ }
+
+ if (opt.limit == 0)
+ opt.limit = TC_PERFLOW_DEFAULTLIMIT;
+
+ if (opt.qlen > 0 && opt.qlen < 5) {
+ fprintf(stderr, "\"qlen\" must be >= 5.\n");
+ return -1;
+ }
+
+ tail = NLMSG_TAIL(n);
+ //addattr_l(n, 1024, TCA_OPTIONS, NULL, 0);
+ //addattr_l(n, 1024, TCA_PERFLOW_PARMS, &opt, sizeof(opt));
+ addattr_l(n, 1024, TCA_OPTIONS, &opt, sizeof(opt));
+ tail->rta_len = (void *) NLMSG_TAIL(n) - (void *) tail;
+
+ return 0;
+}
+
+static int perflow_print_opt(struct qdisc_util *qu, FILE *f, struct rtattr *opt)
+{
+ struct tc_perflow_qopt *qopt;
+
+ if (opt == NULL)
+ return 0;
+
+ if (RTA_PAYLOAD(opt) < sizeof(*qopt))
+ return -1;
+
+ qopt = RTA_DATA(opt);
+
+ SPRINT_BUF(b1);
+ fprintf(f, "rate %s ", sprint_rate(qopt->rate.rate, b1));
+ fprintf(f, "ceil %s ", sprint_rate(qopt->ceil.rate, b1));
+ fprintf(f, "limit %s ", sprint_size(qopt->rate.rate, b1));
+
+ return 0;
+}
+
+struct qdisc_util perflow_qdisc_util = {
+ .id = "perflow",
+ .parse_qopt = perflow_parse_opt,
+ .print_qopt = perflow_print_opt,
+};
Index: iproute2-2.6.11-w/tc/Makefile
===================================================================
--- iproute2-2.6.11-w/tc/Makefile (revision 1)
+++ iproute2-2.6.11-w/tc/Makefile (working copy)
@@ -7,6 +7,7 @@
TCMODULES += q_fifo.o
TCMODULES += q_sfq.o
TCMODULES += q_red.o
+TCMODULES += q_perflow.o
TCMODULES += q_prio.o
TCMODULES += q_tbf.o
TCMODULES += q_cbq.o
[-- Attachment #3: linux-2.6.11-perflow.diff --]
[-- Type: application/octet-stream, Size: 15639 bytes --]
Index: linux-2.6.11-w/include/linux/pkt_sched.h
===================================================================
--- linux-2.6.11-w/include/linux/pkt_sched.h (revision 1)
+++ linux-2.6.11-w/include/linux/pkt_sched.h (working copy)
@@ -272,6 +272,28 @@
__u32 ctokens;
};
+
+/* PERFLOW section */
+
+#define TC_PERFLOW_DEFAULTLIMIT 1024
+
+struct tc_perflow_qopt
+{
+ struct tc_ratespec rate;
+ struct tc_ratespec ceil;
+ __u32 limit;
+ __u32 qlen;
+};
+enum
+{
+ TCA_PERFLOW_UNSPEC,
+ TCA_PERFLOW_PARMS,
+ __TCA_PERFLOW_MAX,
+};
+
+#define TCA_PERFLOW_MAX (__TCA_PERFLOW_MAX - 1)
+
+
/* HFSC section */
struct tc_hfsc_qopt
Index: linux-2.6.11-w/net/sched/Kconfig
===================================================================
--- linux-2.6.11-w/net/sched/Kconfig (revision 1)
+++ linux-2.6.11-w/net/sched/Kconfig (working copy)
@@ -192,6 +192,15 @@
To compile this code as a module, choose M here: the
module will be called sch_gred.
+config NET_SCH_PERFLOW
+ tristate "PERFLOW queue"
+ depends on NET_SCHED
+ ---help---
+ Say Y here if you want to use per flow rate control.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_perflow.
+
config NET_SCH_DSMARK
tristate "Diffserv field marker"
depends on NET_SCHED
Index: linux-2.6.11-w/net/sched/Makefile
===================================================================
--- linux-2.6.11-w/net/sched/Makefile (revision 1)
+++ linux-2.6.11-w/net/sched/Makefile (working copy)
@@ -19,6 +19,7 @@
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
+obj-$(CONFIG_NET_SCH_PERFLOW) += sch_perflow.o
obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
Index: linux-2.6.11-w/net/sched/sch_perflow.c
===================================================================
--- linux-2.6.11-w/net/sched/sch_perflow.c (revision 0)
+++ linux-2.6.11-w/net/sched/sch_perflow.c (revision 0)
@@ -0,0 +1,551 @@
+/*
+ * net/sched/sch_perflow.c Per flow rate control queue.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * Authors: Wang Jian <lark@linux.net.cn>, 2005
+ *
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+#include <linux/bitops.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/mm.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <linux/in.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/if_ether.h>
+#include <linux/inet.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/notifier.h>
+#include <linux/jhash.h>
+#include <linux/timer.h>
+#include <net/ip.h>
+#include <net/route.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+#include <net/pkt_sched.h>
+#include <net/inet_ecn.h>
+#include <net/dsfield.h>
+
+/* Per flow rate control algorithm
+
+Description
+-----------
+
+ When a new packet arrives, we lookup in the table to see if it
+ belongs to a flow being traced. If not, we create new entry for it.
+
+ For a packet belongs to a flow being traced, we see if it has
+ available token to send it out.
+
+ The 'rate' and 'ceil' have same meaning of HTB qdisc. The 'limit'
+ parameter defined how many flows we will trace, which defaults to
+ 1024.
+
+ Any flow entry without traffic in LIFETIME seconds will be wiped
+ to free slot.
+
+
+Algorithm
+---------
+
+ The algorithm is simple and naive. We have a token bucket, which
+ generate grate/10 (= rate * limit / 10) token in 1/10 seconds.
+
+ A flow can always send when it is under rate. When a flow is over
+ rate but under ceil, every time it borrows, it is put into borrow
+ list, so it may receive penalty.
+
+ When a under-rate flow sends but token is short, penalty is added
+ to one of flows in borrow list, and this punished flow is
+ removed from borrow list.
+
+ When a flow carrying penalty has packet, the penalty is
+ added to used token. This means, this flow can borrow less than
+ last time (if in a new time slot), or can borrow less
+ (ceil - rate - penalty).
+
+ There are other rules that handle fairness and low rate situation.
+ See code for details.
+
+ NOTE: the implementation of algorithm is not timer driven, but
+ packet driven.
+
+ The ideas behind this algorithm are
+
+ 1. We assume that perflow qdisc has rate * limit guaranteed.
+ 2. We can't affect past. We can only affect future.
+ 3. If a flow's borrowing leads to overlimit, we let this flow
+ borrow less in future.
+ 4. With a round robin punishment style, a flow borrows more times,
+ it stays in borrow list more times, and so receives more penalty.
+ (But we should consider more about this, I think the fairness
+ should be improved by sort borrow list.)
+
+
+Security Consideration
+----------------------
+
+ It's dangerous to create new entry for any new packets not belongs
+ to a flow being traced. For example, syn flood attack on a single
+ port may cause thousands of entries created. The 'limit' parameter
+ is used to set limit on maximum entries we will create.
+
+ But even with 'limit', port scan can fill the slots and make valid
+ new flow has no slot available and not be traced.
+
+ One of solution is to use netfilter MARK for classification
+ (a.k.a. cls_fw.c). Only established session is given a fw mark, and
+ then, if port scan use a spoofing source address, no fw mark gotten.
+
+
+Application
+-----------
+
+ You should use HTB or other classful qdisc to enclose this qdisc.
+
+ */
+
+
+#define PERFLOW_HSIZE 251
+#define PERFLOW_LIFETIME (10*HZ)
+
+struct perflow_entry
+{
+ u32 src;
+ u32 dst;
+ u16 sport;
+ u16 dport;
+
+ struct list_head hlist;
+ struct list_head borrow;
+ struct timer_list timer;
+ struct perflow_sched_data *q;
+ u32 jiffies;
+ u32 used;
+ u32 penalty;
+
+ u8 protocol;
+};
+
+struct perflow_sched_data
+{
+ /* parameters */
+ u32 rate; /* guaranted rate */
+ u32 ceil; /* upper bound */
+ u32 limit; /* maximum flows we trace */
+ u32 qlen; /* maximum queue length */
+
+ /* variables */
+ u32 grate; /* aggregative rate */
+
+ u32 jiffies;
+ u32 token;
+ u32 flow_count; /* how many flows we are tracing */
+ struct list_head ht[PERFLOW_HSIZE]; /* hash table */
+ struct list_head borrow; /* lists of borrowing flow */
+};
+
+struct commonhdr
+{
+ u16 source;
+ u16 dest;
+};
+
+static __inline__ u32 perflow_hash(struct perflow_entry *tuple)
+{
+ return (jhash_3words(tuple->src ^ tuple->protocol,
+ tuple->dst,
+ (tuple->sport << 16 | tuple->dport),
+ 0x5e83ad03) % PERFLOW_HSIZE);
+}
+
+static __inline__ int perflow_is_valid(struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+
+ if (skb->protocol != __constant_htons(ETH_P_IP))
+ return 0;
+
+ if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_TCP
+ && iph->protocol != IPPROTO_SCTP)
+ return 0;
+
+ return 1;
+}
+
+static __inline__ void perflow_flow_timer(unsigned long arg)
+{
+ struct perflow_entry *e = (struct perflow_entry *) arg;
+
+ e->q->flow_count--;
+ list_del_init(&e->hlist);
+ if (!list_empty(&e->borrow))
+ list_del_init(&e->borrow);
+ printk(KERN_WARNING "delete flow\n");
+ kfree(e);
+}
+
+static __inline__ struct perflow_entry *
+perflow_new_flow(u32 hash, struct perflow_entry *tuple, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct perflow_entry *e;
+
+ if (q->flow_count >= q->limit)
+ return NULL;
+
+ e = kmalloc(sizeof(*e), GFP_KERNEL);
+ if (!e)
+ return NULL;
+
+ q->flow_count++;
+
+ memset(e, 0, sizeof(*e));
+
+ e->src = tuple->src;
+ e->dst = tuple->dst;
+ e->sport = tuple->sport;
+ e->dport = tuple->dport;
+ e->protocol = tuple->protocol;
+
+ INIT_LIST_HEAD(&e->hlist);
+ INIT_LIST_HEAD(&e->borrow);
+ init_timer(&e->timer);
+ e->timer.function = perflow_flow_timer;
+ e->timer.data = (unsigned long) e;
+ e->jiffies = jiffies;
+ e->q = q;
+
+ list_add(&e->hlist, q->ht + hash);
+
+ return e;
+}
+
+static __inline__ void
+perflow_fill_tuple(struct perflow_entry *tuple, struct sk_buff *skb)
+{
+ struct iphdr *iph = skb->nh.iph;
+ struct commonhdr *ch;
+
+ memset(tuple, 0, sizeof(struct perflow_entry));
+#if 1
+ /* not a traceable flow, do nothing */
+ if (!perflow_is_valid(skb))
+ return;
+#endif
+
+ ch = (struct commonhdr *)((void *)iph + (iph->ihl << 2));
+ tuple->src = iph->saddr;
+ tuple->dst = iph->daddr;
+ tuple->sport = ch->source;
+ tuple->dport = ch->dest;
+ tuple->protocol = iph->protocol;
+}
+
+static struct perflow_entry *
+perflow_find_flow(u32 *hash, struct perflow_entry *flow, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct list_head *h, *head;
+ struct perflow_entry *e;
+
+ *hash = perflow_hash(flow);
+ head = q->ht + (*hash);
+
+ list_for_each(h, head) {
+ e = list_entry(h, struct perflow_entry, hlist);
+ if (e->src == flow->src
+ && e->dst == flow->dst
+ && e->sport == flow->sport
+ && e->dport == flow->dport
+ && e->protocol == flow->protocol)
+ del_timer(&e->timer);
+ return e;
+ }
+ return NULL;
+}
+
+static void perflow_punish(int penalty, struct list_head *borrow)
+{
+ struct perflow_entry *e;
+ struct list_head *h;
+
+ if (!list_empty(borrow)) {
+ h = borrow->next;
+ list_del_init(h);
+ e = list_entry(h, struct perflow_entry, hlist);
+ e->penalty += penalty;
+ }
+}
+
+static __inline__ void
+perflow_adjust_used(struct perflow_entry *flow, u32 rate, u32 ceil)
+{
+ int cycles;
+
+ /* still in this time slot */
+ if ((jiffies - flow->jiffies) <= HZ) {
+ if ((ceil - flow->used) > flow->penalty) {
+ flow->used += flow->penalty;
+ flow->penalty = 0;
+ } else {
+ flow->used = ceil;
+ flow->penalty -= ceil - flow->used;
+ }
+ return;
+ }
+
+ /* the packet arrives after cycles slots */
+ cycles = (jiffies - flow->jiffies) / HZ;
+ flow->jiffies += cycles * HZ;
+
+ if ((cycles * ceil - flow->used) > flow->penalty) {
+ flow->used = 0;
+ flow->penalty = 0;
+ } else {
+ flow->used = flow->penalty + flow->used - cycles * ceil;
+ if (flow->used > ceil) {
+ flow->used = ceil;
+ flow->penalty -= ceil;
+ }
+ }
+}
+
+static int perflow_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct perflow_entry tuple, *flow;
+ u32 hash;
+
+ /* not a traceable flow */
+ if (!perflow_is_valid(skb))
+ goto drop;
+
+ /* if max qlen is set and current queue is full, drop */
+ if (q->qlen && sch->q.qlen >= q->qlen)
+ goto drop;
+
+ perflow_fill_tuple(&tuple, skb);
+ flow = perflow_find_flow(&hash, &tuple, sch);
+
+ if (flow == NULL) {
+ flow = perflow_new_flow(hash, &tuple, sch);
+ if (flow == NULL)
+ goto drop;
+ }
+
+ /* renew timer for flow */
+ flow->timer.expires = jiffies + PERFLOW_LIFETIME;
+ add_timer(&flow->timer);
+
+ /* renew used for this flow */
+ perflow_adjust_used(flow, q->rate, q->ceil);
+
+ if ((jiffies - q->jiffies) > HZ/10) {
+ q->token = q->grate / 10;
+ q->jiffies = jiffies;
+ }
+
+ /* we always satisfy flow under rate */
+ if (flow->used < q->rate) {
+ flow->used += skb->len;
+ if (flow->used > q->rate) {
+ /* if we borrow, we may receive penalty */
+ if (list_empty(&flow->borrow))
+ list_add_tail(&flow->borrow, &q->borrow);
+ }
+ if (flow->used > q->ceil)
+ flow->penalty += flow->used - q->ceil;
+ if (q->token < skb->len) {
+ q->token = 0;
+ perflow_punish(skb->len - q->token, &q->borrow);
+ } else {
+ q->token -= skb->len;
+ }
+ } else {
+ if (q->token < skb->len || flow->used >= q->ceil)
+ goto drop;
+
+ flow->used += skb->len;
+ q->token -= skb->len;
+
+ if (list_empty(&flow->borrow))
+ list_add_tail(&flow->borrow, &q->borrow);
+ if (flow->used > q->ceil)
+ flow->penalty += flow->used - q->ceil;
+ }
+
+enqueue:
+ sch->qstats.backlog += skb->len;
+ sch->bstats.bytes += skb->len;
+ sch->bstats.packets++;
+ __skb_queue_tail(&sch->q, skb);
+ return NET_XMIT_SUCCESS;
+
+drop:
+ printk(KERN_WARNING "perflow drop\n");
+ sch->qstats.overlimits++;
+ kfree_skb(skb);
+ sch->qstats.drops++;
+ return NET_XMIT_CN;
+}
+
+static int perflow_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ __skb_queue_head(&sch->q, skb);
+ sch->qstats.backlog += skb->len;
+ sch->qstats.requeues++;
+ return 0;
+}
+
+static struct sk_buff *perflow_dequeue(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue(&sch->q);
+ if (skb) {
+ sch->qstats.backlog -= skb->len;
+ }
+ return skb;
+}
+
+static unsigned int perflow_drop(struct Qdisc *sch)
+{
+ struct sk_buff *skb;
+
+ skb = __skb_dequeue_tail(&sch->q);
+ if (skb) {
+ unsigned int len = skb->len;
+ sch->qstats.backlog -= len;
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return len;
+ }
+ return 0;
+}
+
+static void perflow_reset(struct Qdisc *sch)
+{
+ skb_queue_purge(&sch->q);
+ sch->qstats.backlog = 0;
+}
+
+static int perflow_change(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct tc_perflow_qopt *ctl;
+
+ /* if limit and qlen are lowered, and we are in a over limit
+ * state, we still don't drop flow or drop packet.
+ * sch->q will decrease to allowed length
+ * q->flow_count will decrease because no new flow is traced
+ */
+ if (opt == NULL) {
+ return -EINVAL;
+ } else {
+ ctl = RTA_DATA(opt);
+ if (opt->rta_len < RTA_LENGTH(sizeof(*ctl)))
+ return -EINVAL;
+
+ q->rate = ctl->rate.rate;
+ q->ceil = ctl->ceil.rate;
+ q->limit = ctl->limit;
+ q->qlen = ctl->qlen;
+
+ if (q->limit == 0)
+ q->limit = 1024;
+ printk(KERN_WARNING "(rate,ceil,limit,qlen)=(%u,%u,%u,%u)\n",
+ q->rate, q->ceil, q->limit, q->qlen);
+ /* aggregative rate is (rate * 1.05 * limit) */
+ q->grate = (q->rate + q->rate/20) * q->limit;
+ }
+
+ return 0;
+}
+
+static int perflow_init(struct Qdisc *sch, struct rtattr *opt)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ int i, ret;
+
+ ret = perflow_change(sch, opt);
+ if (ret)
+ return ret;
+
+ INIT_LIST_HEAD(&q->borrow);
+
+ for (i = 0; i < PERFLOW_HSIZE; i++)
+ INIT_LIST_HEAD(q->ht + i);
+
+ return 0;
+}
+
+static void perflow_destroy(struct Qdisc *sch)
+{
+ struct perflow_sched_data *q = qdisc_priv(sch);
+ struct list_head *h, *n;
+ struct perflow_entry *e;
+ int i;
+
+ for (i = 0; i < PERFLOW_HSIZE; i++) {
+ list_for_each_safe (h, n, q->ht + i) {
+ e = list_entry(h, struct perflow_entry, hlist);
+ del_timer(&e->timer);
+ if (!list_empty(&e->borrow))
+ list_del_init(&e->borrow);
+ list_del(h);
+ kfree(e);
+ }
+ }
+}
+
+static int perflow_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ return -1;
+}
+
+static struct Qdisc_ops perflow_qdisc_ops = {
+ .next = NULL,
+ .cl_ops = NULL,
+ .id = "perflow",
+ .priv_size = sizeof(struct perflow_sched_data),
+ .enqueue = perflow_enqueue,
+ .dequeue = perflow_dequeue,
+ .requeue = perflow_requeue,
+ .drop = perflow_drop,
+ .init = perflow_init,
+ .reset = perflow_reset,
+ .destroy = perflow_destroy,
+ .change = perflow_change,
+ .dump = perflow_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init perflow_module_init(void)
+{
+ return register_qdisc(&perflow_qdisc_ops);
+}
+
+static void __exit perflow_module_exit(void)
+{
+ unregister_qdisc(&perflow_qdisc_ops);
+}
+
+module_init(perflow_module_init);
+module_exit(perflow_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Wang Jian <lark@linux.net.cn>");
[-- Attachment #4: t.sh --]
[-- Type: application/x-sh, Size: 489 bytes --]
[-- Attachment #5: Type: text/plain, Size: 143 bytes --]
_______________________________________________
LARTC mailing list
LARTC@mailman.ds9a.nl
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc
next reply other threads:[~2005-04-04 7:21 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-04-04 7:21 Wang Jian [this message]
2005-04-04 8:51 ` [LARTC] new perflow rate control queue Patrick McHardy
2005-04-04 9:10 ` Wang Jian
2005-04-04 11:42 ` Andy Furniss
2005-04-04 13:53 ` Wang Jian
2005-04-04 14:39 ` Wang Jian
2005-04-04 15:10 ` Andy Furniss
2005-04-04 15:23 ` Andy Furniss
2005-04-04 15:57 ` Wang Jian
2005-04-05 22:40 ` Andy Furniss
2005-04-06 4:17 ` Wang Jian
2005-04-06 12:29 ` Andy Furniss
2005-04-06 12:48 ` Wang Jian
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20050404152117.6d90635d.lark@linux.net.cn \
--to=lark@linux.net.cn \
--cc=lartc@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.