* [RFC] use ktime for packet scheduling
@ 2007-02-22 22:30 Stephen Hemminger
2007-02-24 14:17 ` Patrick McHardy
0 siblings, 1 reply; 4+ messages in thread
From: Stephen Hemminger @ 2007-02-22 22:30 UTC (permalink / raw)
To: netdev
Here is an experimental patch that changes the packet scheduler to use
ktime instead of gettimeofday. This should be faster on 64 bit and avoid some of
the math overhead issues with previous code.
Also since it uses monotonic clock, it won't cause timing glitches when NTP
adjusts the clock.
---
include/net/pkt_sched.h | 123 ++++++++++++++----------------------------------
kernel/hrtimer.c | 1
2 files changed, 37 insertions(+), 87 deletions(-)
--- netem.orig/include/net/pkt_sched.h 2007-02-22 12:08:53.000000000 -0800
+++ netem/include/net/pkt_sched.h 2007-02-22 14:21:57.000000000 -0800
@@ -56,19 +56,48 @@
#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-typedef struct timeval psched_time_t;
-typedef long psched_tdiff_t;
+typedef ktime_t psched_time_t;
+typedef long psched_tdiff_t;
+
+/* Avoid doing 64 bit divide by 1000 */
+#define PSCHED_US2NS(x) ((s64)(x) << 10)
+#define PSCHED_NS2US(x) ((x) >> 10)
-#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp))
+#define PSCHED_GET_TIME(stamp) ((stamp) = ktime_get())
#define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs)
#define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay)
+static inline psched_tdiff_t psched_diff(const psched_time_t tv1,
+ const psched_time_t tv2)
+{
+ return PSCHED_NS2US(ktime_to_ns(ktime_sub(tv1, tv2)));
+}
+
+#define PSCHED_TDIFF(tv1, tv2) psched_diff(tv1, tv2)
+#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
+ min_t(long, psched_diff((tv1),(tv2)), bound)
+
+static inline psched_time_t psched_add(const psched_time_t tv1, u32 usec)
+{
+ u64 ns = PSCHED_US2NS(usec);
+ return ktime_add_ns(tv1, ns);
+}
+
+#define PSCHED_TLESS(tv1, tv2) ((tv1).tv64 < (tv2).tv64)
+#define PSCHED_TADD(tv, delta) psched_add((tv), (delta))
+#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = psched_add((tv), (delta)))
+
+/* Set/check that time is in the "past perfect" */
+
+#define PSCHED_SET_PASTPERFECT(t) ((t).tv64 = 0)
+#define PSCHED_IS_PASTPERFECT(t) ((t).tv64 == 0)
+#define PSCHED_AUDIT_TDIFF(t)
#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
typedef u64 psched_time_t;
typedef long psched_tdiff_t;
-#ifdef CONFIG_NET_SCH_CLK_JIFFIES
+# ifdef CONFIG_NET_SCH_CLK_JIFFIES
#if HZ < 96
#define PSCHED_JSCALE 14
@@ -86,8 +115,7 @@
#define PSCHED_US2JIFFIE(delay) (((delay)+(1<<PSCHED_JSCALE)-1)>>PSCHED_JSCALE)
#define PSCHED_JIFFIE2US(delay) ((delay)<<PSCHED_JSCALE)
-#endif /* CONFIG_NET_SCH_CLK_JIFFIES */
-#ifdef CONFIG_NET_SCH_CLK_CPU
+# elif defined(CONFIG_NET_SCH_CLK_CPU)
#include <asm/timex.h>
extern psched_tdiff_t psched_clock_per_hz;
@@ -110,89 +138,10 @@
#define PSCHED_US2JIFFIE(delay) (((delay)+psched_clock_per_hz-1)/psched_clock_per_hz)
#define PSCHED_JIFFIE2US(delay) ((delay)*psched_clock_per_hz)
-#endif /* CONFIG_NET_SCH_CLK_CPU */
-
+#else
+#error CONFIG_NET_SCH_CLK not set correctly!
#endif /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
-#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY
-#define PSCHED_TDIFF(tv1, tv2) \
-({ \
- int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
- int __delta = (tv1).tv_usec - (tv2).tv_usec; \
- if (__delta_sec) { \
- switch (__delta_sec) { \
- default: \
- __delta = 0; \
- case 2: \
- __delta += USEC_PER_SEC; \
- case 1: \
- __delta += USEC_PER_SEC; \
- } \
- } \
- __delta; \
-})
-
-static inline int
-psched_tod_diff(int delta_sec, int bound)
-{
- int delta;
-
- if (bound <= USEC_PER_SEC || delta_sec > (0x7FFFFFFF/USEC_PER_SEC)-1)
- return bound;
- delta = delta_sec * USEC_PER_SEC;
- if (delta > bound || delta < 0)
- delta = bound;
- return delta;
-}
-
-#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
-({ \
- int __delta_sec = (tv1).tv_sec - (tv2).tv_sec; \
- int __delta = (tv1).tv_usec - (tv2).tv_usec; \
- switch (__delta_sec) { \
- default: \
- __delta = psched_tod_diff(__delta_sec, bound); break; \
- case 2: \
- __delta += USEC_PER_SEC; \
- case 1: \
- __delta += USEC_PER_SEC; \
- case 0: \
- if (__delta > bound || __delta < 0) \
- __delta = bound; \
- } \
- __delta; \
-})
-
-#define PSCHED_TLESS(tv1, tv2) (((tv1).tv_usec < (tv2).tv_usec && \
- (tv1).tv_sec <= (tv2).tv_sec) || \
- (tv1).tv_sec < (tv2).tv_sec)
-
-#define PSCHED_TADD2(tv, delta, tv_res) \
-({ \
- int __delta = (tv).tv_usec + (delta); \
- (tv_res).tv_sec = (tv).tv_sec; \
- while (__delta >= USEC_PER_SEC) { (tv_res).tv_sec++; __delta -= USEC_PER_SEC; } \
- (tv_res).tv_usec = __delta; \
-})
-
-#define PSCHED_TADD(tv, delta) \
-({ \
- (tv).tv_usec += (delta); \
- while ((tv).tv_usec >= USEC_PER_SEC) { (tv).tv_sec++; \
- (tv).tv_usec -= USEC_PER_SEC; } \
-})
-
-/* Set/check that time is in the "past perfect";
- it depends on concrete representation of system time
- */
-
-#define PSCHED_SET_PASTPERFECT(t) ((t).tv_sec = 0)
-#define PSCHED_IS_PASTPERFECT(t) ((t).tv_sec == 0)
-
-#define PSCHED_AUDIT_TDIFF(t) ({ if ((t) > 2000000) (t) = 2000000; })
-
-#else /* !CONFIG_NET_SCH_CLK_GETTIMEOFDAY */
-
#define PSCHED_TDIFF(tv1, tv2) (long)((tv1) - (tv2))
#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \
min_t(long long, (tv1) - (tv2), bound)
--- netem.orig/kernel/hrtimer.c 2007-02-22 14:03:11.000000000 -0800
+++ netem/kernel/hrtimer.c 2007-02-22 14:11:02.000000000 -0800
@@ -59,6 +59,7 @@
return timespec_to_ktime(now);
}
+EXPORT_SYMBOL_GPL(ktime_get);
/**
* ktime_get_real - get the real (wall-) time in ktime_t format
^ permalink raw reply [flat|nested] 4+ messages in thread* Re: [RFC] use ktime for packet scheduling 2007-02-22 22:30 [RFC] use ktime for packet scheduling Stephen Hemminger @ 2007-02-24 14:17 ` Patrick McHardy 2007-03-07 23:59 ` Stephen Hemminger 0 siblings, 1 reply; 4+ messages in thread From: Patrick McHardy @ 2007-02-24 14:17 UTC (permalink / raw) To: Stephen Hemminger; +Cc: netdev Stephen Hemminger wrote: > Here is an experimental patch that changes the packet scheduler to use > ktime instead of gettimeofday. This should be faster on 64 bit and avoid some of > the math overhead issues with previous code. > > Also since it uses monotonic clock, it won't cause timing glitches when NTP > adjusts the clock. This looks like a good idea, even if we can't take full advantage of the higher precision for now. BTW, any news on the iproute patches I sent you for this? > --- netem.orig/include/net/pkt_sched.h 2007-02-22 12:08:53.000000000 -0800 > +++ netem/include/net/pkt_sched.h 2007-02-22 14:21:57.000000000 -0800 > @@ -56,19 +56,48 @@ > > #ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY > > -typedef struct timeval psched_time_t; > -typedef long psched_tdiff_t; > +typedef ktime_t psched_time_t; > +typedef long psched_tdiff_t; > + > +/* Avoid doing 64 bit divide by 1000 */ > +#define PSCHED_US2NS(x) ((s64)(x) << 10) > +#define PSCHED_NS2US(x) ((x) >> 10) Since you use this for PSCHED_TDIFF etc, the resulting values are not exactly microseconds anymore. You need to adjust psched_us_per_tick/psched_tick_per_us so userspace can correctly calculate time values. > -#define PSCHED_GET_TIME(stamp) do_gettimeofday(&(stamp)) > +#define PSCHED_GET_TIME(stamp) ((stamp) = ktime_get()) > #define PSCHED_US2JIFFIE(usecs) usecs_to_jiffies(usecs) > #define PSCHED_JIFFIE2US(delay) jiffies_to_usecs(delay) Both of these need to take into account that its not real microseconds anymore. Please also fix up the HFSC PSCHED_GET_TIME redefinition, it expects the results to be usable with these macros. > +static inline psched_tdiff_t psched_diff(const psched_time_t tv1, > + const psched_time_t tv2) > +{ > + return PSCHED_NS2US(ktime_to_ns(ktime_sub(tv1, tv2))); > +} > + > +#define PSCHED_TDIFF(tv1, tv2) psched_diff(tv1, tv2) > +#define PSCHED_TDIFF_SAFE(tv1, tv2, bound) \ > + min_t(long, psched_diff((tv1),(tv2)), bound) > + > +static inline psched_time_t psched_add(const psched_time_t tv1, u32 usec) > +{ > + u64 ns = PSCHED_US2NS(usec); > + return ktime_add_ns(tv1, ns); > +} > + > +#define PSCHED_TLESS(tv1, tv2) ((tv1).tv64 < (tv2).tv64) > +#define PSCHED_TADD(tv, delta) psched_add((tv), (delta)) > +#define PSCHED_TADD2(tv, delta, tv_res) ((tv_res) = psched_add((tv), (delta))) > + > +/* Set/check that time is in the "past perfect" */ > + > +#define PSCHED_SET_PASTPERFECT(t) ((t).tv64 = 0) > +#define PSCHED_IS_PASTPERFECT(t) ((t).tv64 == 0) Maybe use one of the 32 bit members, I guess that will generate better code on 32 bit. ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC] use ktime for packet scheduling 2007-02-24 14:17 ` Patrick McHardy @ 2007-03-07 23:59 ` Stephen Hemminger 2007-03-08 17:48 ` Patrick McHardy 0 siblings, 1 reply; 4+ messages in thread From: Stephen Hemminger @ 2007-03-07 23:59 UTC (permalink / raw) To: Patrick McHardy; +Cc: netdev Here is the lastest version of the netem patch to use hrtimers. It is against the current net tree, so it will need adjusting to fit with new psched/ktime stuff. --- include/net/pkt_sched.h | 3 ++ net/sched/sch_api.c | 30 +++++++++++++++++++++ net/sched/sch_netem.c | 68 ++++++++++++++++++++++++------------------------ 3 files changed, 67 insertions(+), 34 deletions(-) --- netem-dev.orig/net/sched/sch_netem.c +++ netem-dev/net/sched/sch_netem.c @@ -54,7 +54,7 @@ struct netem_sched_data { struct Qdisc *qdisc; - struct timer_list timer; + struct hrtimer timer; u32 latency; u32 loss; @@ -78,8 +78,9 @@ struct netem_sched_data { }; /* Time stamp put into socket buffer control block */ +/* TODO: move this to skb->timestamp */ struct netem_skb_cb { - psched_time_t time_to_send; + ktime_t due_time; }; /* init_crandom - initialize correlated random number generator @@ -207,14 +208,14 @@ static int netem_enqueue(struct sk_buff if (q->gap == 0 /* not doing reordering */ || q->counter < q->gap /* inside last reordering gap */ || q->reorder < get_crandom(&q->reorder_cor)) { - psched_time_t now; - psched_tdiff_t delay; + u64 ns; - delay = tabledist(q->latency, q->jitter, - &q->delay_cor, q->delay_dist); + ns = tabledist(q->latency, q->jitter, + &q->delay_cor, q->delay_dist) * 1000ul; + + + cb->due_time = ktime_add_ns(ktime_get(), ns); - PSCHED_GET_TIME(now); - PSCHED_TADD2(now, delay, cb->time_to_send); ++q->counter; ret = q->qdisc->enqueue(skb, q->qdisc); } else { @@ -222,7 +223,7 @@ static int netem_enqueue(struct sk_buff * Do re-ordering by putting one out of N packets at the front * of the queue. */ - PSCHED_GET_TIME(cb->time_to_send); + cb->due_time = ktime_get(); q->counter = 0; ret = q->qdisc->ops->requeue(skb, q->qdisc); } @@ -273,41 +274,40 @@ static struct sk_buff *netem_dequeue(str if (skb) { const struct netem_skb_cb *cb = (const struct netem_skb_cb *)skb->cb; - psched_time_t now; + ktime_t now = ktime_get(); - /* if more time remaining? */ - PSCHED_GET_TIME(now); - - if (PSCHED_TLESS(cb->time_to_send, now)) { + /* if time has come to send? */ + if (now.tv64 <= cb->due_time.tv64) { pr_debug("netem_dequeue: return skb=%p\n", skb); sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; return skb; - } else { - psched_tdiff_t delay = PSCHED_TDIFF(cb->time_to_send, now); - - if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { - qdisc_tree_decrease_qlen(q->qdisc, 1); - sch->qstats.drops++; - printk(KERN_ERR "netem: queue discpline %s could not requeue\n", - q->qdisc->ops->id); - } + } - mod_timer(&q->timer, jiffies + PSCHED_US2JIFFIE(delay)); - sch->flags |= TCQ_F_THROTTLED; + if (q->qdisc->ops->requeue(skb, q->qdisc) != NET_XMIT_SUCCESS) { + qdisc_tree_decrease_qlen(q->qdisc, 1); + sch->qstats.drops++; + printk(KERN_ERR "netem: queue discpline %s could not requeue\n", + q->qdisc->ops->id); } + + hrtimer_start(&q->timer, cb->due_time, HRTIMER_MODE_ABS); + sch->flags |= TCQ_F_THROTTLED; } return NULL; } -static void netem_watchdog(unsigned long arg) +static enum hrtimer_restart netem_watchdog(struct hrtimer *hrt) { - struct Qdisc *sch = (struct Qdisc *)arg; + struct netem_sched_data *q + = container_of(hrt, struct netem_sched_data, timer); + struct Qdisc *sch = q->qdisc; pr_debug("netem_watchdog qlen=%d\n", sch->q.qlen); sch->flags &= ~TCQ_F_THROTTLED; netif_schedule(sch->dev); + return HRTIMER_NORESTART; } static void netem_reset(struct Qdisc *sch) @@ -317,7 +317,7 @@ static void netem_reset(struct Qdisc *sc qdisc_reset(q->qdisc); sch->q.qlen = 0; sch->flags &= ~TCQ_F_THROTTLED; - del_timer_sync(&q->timer); + hrtimer_cancel(&q->timer); } /* Pass size change message down to embedded FIFO */ @@ -502,7 +502,8 @@ static int tfifo_enqueue(struct sk_buff const struct netem_skb_cb *cb = (const struct netem_skb_cb *)skb->cb; - if (!PSCHED_TLESS(ncb->time_to_send, cb->time_to_send)) + if (ktime_to_ns(ktime_sub(ncb->due_time, + cb->due_time)) >= 0) break; } @@ -567,9 +568,8 @@ static int netem_init(struct Qdisc *sch, if (!opt) return -EINVAL; - init_timer(&q->timer); + hrtimer_init(&q->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); q->timer.function = netem_watchdog; - q->timer.data = (unsigned long) sch; q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); @@ -590,7 +590,7 @@ static void netem_destroy(struct Qdisc * { struct netem_sched_data *q = qdisc_priv(sch); - del_timer_sync(&q->timer); + hrtimer_cancel(&q->timer); qdisc_destroy(q->qdisc); kfree(q->delay_dist); } @@ -605,8 +605,8 @@ static int netem_dump(struct Qdisc *sch, struct tc_netem_reorder reorder; struct tc_netem_corrupt corrupt; - qopt.latency = q->latency; - qopt.jitter = q->jitter; + qopt.latency = psched_usecs2ticks(q->latency); + qopt.jitter = psched_usecs2ticks(q->jitter); qopt.limit = q->limit; qopt.loss = q->loss; qopt.gap = q->gap; --- netem-dev.orig/include/net/pkt_sched.h +++ netem-dev/include/net/pkt_sched.h @@ -239,4 +239,7 @@ static inline unsigned psched_mtu(struct return dev->hard_header ? mtu + dev->hard_header_len : mtu; } +extern unsigned long psched_usecs2ticks(unsigned long us); +extern unsigned long psched_ticks2usecs(unsigned long ticks); + #endif --- netem-dev.orig/net/sched/sch_api.c +++ netem-dev/net/sched/sch_api.c @@ -1178,6 +1178,36 @@ reclassify: static int psched_us_per_tick = 1; static int psched_tick_per_us = 1; + +/** + * psched_ticks2usecs - convert from scaled PSCHED ticks to usecs + * @ticks: pscehed ticks + * Returns time in microseconds + */ +unsigned long psched_ticks2usecs(unsigned long ticks) +{ + u64 t = ticks; + + t *= psched_us_per_tick; + do_div(t, psched_tick_per_us); + return t; +} +EXPORT_SYMBOL(psched_ticks2usecs); + +/** + * psched_usecs2ticks - convert from usecs to PSCHED ticks + * @us: time in microseconds + */ +unsigned long psched_usecs2ticks(unsigned long us) +{ + u64 t = us; + + t *= psched_tick_per_us; + do_div(t, psched_us_per_tick); + return t; +} +EXPORT_SYMBOL(psched_usecs2ticks); + #ifdef CONFIG_PROC_FS static int psched_show(struct seq_file *seq, void *v) { ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [RFC] use ktime for packet scheduling 2007-03-07 23:59 ` Stephen Hemminger @ 2007-03-08 17:48 ` Patrick McHardy 0 siblings, 0 replies; 4+ messages in thread From: Patrick McHardy @ 2007-03-08 17:48 UTC (permalink / raw) To: Stephen Hemminger; +Cc: netdev Stephen Hemminger wrote: > Here is the lastest version of the netem patch to use hrtimers. > It is against the current net tree, so it will need adjusting to fit > with new psched/ktime stuff. Thanks, I'll add whats still needed to my patches. ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2007-03-08 17:42 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2007-02-22 22:30 [RFC] use ktime for packet scheduling Stephen Hemminger 2007-02-24 14:17 ` Patrick McHardy 2007-03-07 23:59 ` Stephen Hemminger 2007-03-08 17:48 ` Patrick McHardy
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).