* Latency Problems with 2.6.31/PREEMPT_RT on a 1.2GHz Celeron
[not found] <abe060d60912171537j10fe194r5f0cd1aca85475df@mail.gmail.com>
@ 2009-12-18 16:56 ` Vladimir Cotfas
2009-12-19 11:34 ` Mark Hounschell
0 siblings, 1 reply; 5+ messages in thread
From: Vladimir Cotfas @ 2009-12-18 16:56 UTC (permalink / raw)
To: linux-rt-users; +Cc: Scott Gilbertson
[-- Attachment #1: Type: text/plain, Size: 1792 bytes --]
All,
I have a a driver that must process an interrupt every 100 uS without
missing any. I am having latency issues and I am looking for some help
debugging my problem.
I am using a TME 5831 motherboard (VIA TwisterT chipset) with a custom
BIOS. The SMBIOS is disabled.
I prepared a minimalistic kernel (configuration file attached) that uses
a ramdisk for the apps (as to limit the scope of the latency problem).
It has no disk(s) or network drivers. I use tty0 console and a serial
console.
The IRQ is fired every 100 uS. The ISR for this takes typically 30 uS to
execute and does some PCI transfers with the hardware. The typical
latency between IRQ-assert and the execution of the ISR is about 12 uS.
However sometimes IRQs for my hardware are lost or delayed.
While doing some instrumentation and using the kernel latency tracers I
noticed that the scheduler sometimes take a long time to execute (50+
uS, with spikes to 99 uS).
I instrumented handle_irq() (see irq_32.diff) to take the time stamps
before & after executing the ISR handlers. The time stamps are kept in
some global arrays (which are exported as symbols). A utility module
(irq_time.c) exports this to user-space via /proc/irq_time.
I did a run in which my custom driver is not loaded. The file
report.txt (attached) shows the cpu info, the IRQ list (the timer
interrupt is 0). The dump of /proc/irq_time has histograms which show
that the timer interrupt has a sensible number of occurrences that
gravitate around 55 uS. I managed to cause these high numbers by running
this script:
while :; do cat /proc/nonexistent; done
on the tty0 console.
I need to understand whether this is caused by my setup or it's
something to be expected from the scheduler/timer interrupt.
Regards,
Vlad
[-- Attachment #2: report.txt --]
[-- Type: text/plain, Size: 4886 bytes --]
cat /proc/cpuinfo
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 11
model name : Intel(R) Celeron(TM) CPU 1200MHz
stepping : 4
cpu MHz : 1202.732
cache size : 256 KB
fdiv_bug : no
hlt_bug : no
f00f_bug : no
coma_bug : no
fpu : yes
fpu_exception : yes
cpuid level : 2
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 sep mtrr pge mca cmov pse36 mmx fxsr sse
bogomips : 2405.46
clflush size : 32
power management:
$ cat /proc/sched_debug
Sched Debug Version: v0.09, 2.6.31.5-pae #40
now at 569878.550195 msecs
.jiffies : 26987
.sysctl_sched_latency : 20.000000
.sysctl_sched_min_granularity : 4.000000
.sysctl_sched_wakeup_granularity : 5.000000
.sysctl_sched_child_runs_first : 0.000001
.sysctl_sched_features : 113917
cpu#0, 1202.732 MHz
.nr_running : 2
.load : 2048
.nr_switches : 166477
.nr_load_updates : 56987
.nr_uninterruptible : 0
.next_balance : 0.000000
.curr->pid : 10247
.clock : 569876.819824
.cpu_load[0] : 0
.cpu_load[1] : 0
.cpu_load[2] : 0
.cpu_load[3] : 0
.cpu_load[4] : 0
.rt.rt_nr_running : 0
.rt.rt_nr_uninterruptible : 4294967295
.rto_schedule : 0
.rto_schedule_tail : 0
.rto_wakeup : 0
.rto_pulled : 0
.rto_pushed : 0
.yld_count : 0
.sched_switch : 0
.sched_count : 166492
.sched_goidle : 53938
.ttwu_count : 0
.ttwu_local : 0
.bkl_count : 113
cfs_rq[0]:
.exec_clock : 33842.569082
.MIN_vruntime : 109488.040579
.min_vruntime : 109478.039924
.max_vruntime : 109488.040579
.spread : 0.000000
.spread0 : 0.000000
.nr_running : 2
.load : 2048
.nr_spread_over : 0
rt_rq[0]:
.rt_nr_running : 0
.rt_throttled : 0
.rt_time : 0.244658
.rt_runtime : 950.000000
runnable tasks:
task PID tree-key switches prio exec-runtime sum-exec sum-sleep
----------------------------------------------------------------------------------------------------------
sh 94 109488.040579 1368 120 109488.040579 76.460871 565643.825364
R cat 10247 109478.039924 0 120 109478.039924 0.000000 0.000000
$ cat /proc/schedstat
version 15
timestamp 27327
cpu0 0 0 167641 54449 0 0 34327664830 27532065207 113177
$ cat /proc/interrupts
CPU0
0: 57955 XT-PIC-XT timer
1: 46 XT-PIC-XT i8042
2: 0 XT-PIC-XT cascade
4: 4062 XT-PIC-XT serial
8: 0 XT-PIC-XT rtc
12: 3 XT-PIC-XT i8042
NMI: 0 Non-maskable interrupts
TRM: 0 Thermal event interrupts
MCE: 0 Machine check exceptions
MCP: 2 Machine check polls
ERR: 0
$ cat /proc/irq_time
IRQ count avg usS max uS
0 58634 11 89
1 46 6 25
4 4123 18 30
12 3 9 10
IRQ usS count
0 7 6
0 8 54032
0 9 907
0 10 201
0 11 34
0 12 25
0 13 13
0 14 11
0 15 4
0 16 3
0 17 1
0 18 7
0 20 2
0 21 3
0 22 3
0 23 1
0 29 4
0 30 2
0 31 4
0 32 7
0 33 5
0 34 19
0 35 22
0 36 23
0 37 23
0 38 33
0 39 32
0 40 44
0 41 47
0 42 56
0 43 76
0 44 93
0 45 148
0 46 184
0 47 133
0 48 148
0 49 114
0 50 92
0 51 83
0 52 77
0 53 86
0 54 66
0 55 63
0 56 61
0 57 70
0 58 101
0 59 99
0 60 123
0 61 211
0 62 294
0 63 312
0 64 212
0 65 100
0 66 51
0 67 11
0 68 7
0 69 2
0 70 2
0 71 5
0 72 5
0 73 6
0 74 3
0 75 5
0 76 2
0 77 6
0 79 3
0 82 1
0 85 1
0 87 1
0 88 2
0 89 1
IRQ usS count
1 5 39
1 6 2
1 7 2
1 18 1
1 20 1
1 25 1
IRQ usS count
4 5 557
4 6 156
4 7 74
4 8 122
4 9 73
4 10 45
4 11 39
4 12 79
4 13 97
4 14 174
4 15 180
4 16 132
4 17 162
4 18 154
4 19 121
4 20 171
4 21 104
4 22 90
4 23 119
4 24 187
4 25 179
4 26 195
4 27 298
4 28 425
4 29 178
4 30 12
IRQ usS count
12 8 1
12 9 1
12 10 1
[-- Attachment #3: config-rt-100Hz.gz --]
[-- Type: application/x-gzip, Size: 5575 bytes --]
[-- Attachment #4: irq_32.diff --]
[-- Type: application/octet-stream, Size: 2413 bytes --]
--- arch/x86/kernel/irq_32.c.orig 2009-10-22 18:57:56.000000000 -0400
+++ arch/x86/kernel/irq_32.c 2009-12-17 17:35:32.000000000 -0500
@@ -192,6 +192,41 @@
execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
#endif
+#define MAX_TRACED_LATENCY 100
+#ifdef MAX_TRACED_LATENCY
+ #define PROC_MHZ 1202
+#endif
+
+// Dec 11, 2009 ulianov - time IRQs
+volatile u32 irq_count[NR_VECTORS] = {0};
+EXPORT_SYMBOL(irq_count);
+
+volatile u64 irq_total_time[NR_VECTORS] = {0};
+EXPORT_SYMBOL(irq_total_time);
+
+volatile u64 irq_max_time[NR_VECTORS] = {0};
+EXPORT_SYMBOL(irq_max_time);
+
+#ifdef MAX_TRACED_LATENCY
+volatile u64 irq_max_time_vec[NR_VECTORS][MAX_TRACED_LATENCY+1];
+EXPORT_SYMBOL(irq_max_time_vec);
+#endif
+
+//#define PULSE_CALLBACK
+#ifdef PULSE_CALLBACK
+static void (*pulse_cbk)(unsigned irq, int state, void* func);
+static DEFINE_ATOMIC_SPINLOCK(pulse_cbk_lock);
+
+void irq_set_pulse_cbk(void (*handler)(unsigned irq, int state, void* func))
+{
+ u32 flags = 0;
+ atomic_spin_lock_irqsave(&pulse_cbk_lock, flags);
+ pulse_cbk = handler;
+ atomic_spin_unlock_irqrestore(&pulse_cbk_lock, flags);
+}
+EXPORT_SYMBOL(irq_set_pulse_cbk);
+#endif // PULSE_CALLBACK
+
bool handle_irq(unsigned irq, struct pt_regs *regs)
{
struct irq_desc *desc;
@@ -206,7 +241,37 @@
if (!execute_on_irq_stack(overflow, desc, irq)) {
if (unlikely(overflow))
print_stack_overflow();
+
+#ifdef PULSE_CALLBACK
+ if(pulse_cbk != NULL) {
+ atomic_spin_lock(&pulse_cbk_lock);
+ if(pulse_cbk != NULL) pulse_cbk(irq, 1, &handle_irq);
+ atomic_spin_unlock(&pulse_cbk_lock);
+ }
+#endif // PULSE_CALLBACK
+ // Dec 11, 2009 ulianov - time IRQs
+ u64 ts = get_cycles();
desc->handle_irq(irq, desc);
+ u64 te = get_cycles();
+#ifdef PULSE_CALLBACK
+ if(pulse_cbk != NULL) {
+ atomic_spin_lock(&pulse_cbk_lock);
+ if(pulse_cbk != NULL) pulse_cbk(irq, 0, &handle_irq);
+ atomic_spin_unlock(&pulse_cbk_lock);
+ }
+#endif // PULSE_CALLBACK
+ if(te >= ts) { // the TSC did not overflow
+ u64 dT = te - ts;
+ irq_count[irq]++;
+ irq_total_time[irq] += dT;
+ if(dT > irq_max_time[irq]) irq_max_time[irq] = dT;
+#ifdef MAX_TRACED_LATENCY
+ do_div(dT, PROC_MHZ);
+ if(dT < MAX_TRACED_LATENCY)
+ irq_max_time_vec[irq][(int)dT]++;
+ else irq_max_time_vec[irq][MAX_TRACED_LATENCY]++;
+#endif
+ }
}
return true;
[-- Attachment #5: irq_time.c --]
[-- Type: application/octet-stream, Size: 3046 bytes --]
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/cpufreq.h>
#include <linux/proc_fs.h> // for the proc filesystem
#define NR_VECTORS 256
// defined in arch/x86/kernel/irq_32.c
extern volatile u32 irq_count[NR_VECTORS];
extern volatile u64 irq_total_time[NR_VECTORS];
extern volatile u64 irq_max_time[NR_VECTORS];
#define MAX_TRACED_LATENCY 100
#ifdef MAX_TRACED_LATENCY
extern volatile u64 irq_max_time_vec[NR_VECTORS][MAX_TRACED_LATENCY+1];
#endif
static u32 processorMegahertz;
static struct proc_dir_entry* procFileEntry;
#define STATIC_BUF_LEN 50000
static int procfile_read(char* buffer_UNUSED, char** start,
off_t offset, int count,
int* eof, void* data)
{
static int len = 0;
static char s_buf[STATIC_BUF_LEN] = {0};
const int buf_cnt = STATIC_BUF_LEN - 1;
char* buf = s_buf;
int irq = 0;
if(offset > 0) {
*start = s_buf + offset;
*eof = (len >= offset) ? 0 : 1;
return eof ? (len - offset) : 0;
}
len = 0;
len += snprintf(buf + len, buf_cnt - len - 1, " IRQ \tcount\tavg usS\tmax uS\n");
for(irq = 0; irq < NR_VECTORS; irq++) {
u64 max_div = irq_max_time[irq];
if(max_div == 0) continue;
u32 cnt = irq_count[irq];
u64 total = irq_total_time[irq];
u64 avg_div = 0;
if(cnt != 0) {
do_div(total, cnt);
avg_div = total;
do_div(avg_div, processorMegahertz); // TRICK!! div is modified, returns reminder
}
do_div(max_div, processorMegahertz); // TRICK!! div is modified, returns reminder
len += snprintf(buf + len, buf_cnt - len - 1, "%3d \t%lu\t%llu\t%llu\n", irq, cnt, avg_div, max_div);
}
#ifdef MAX_TRACED_LATENCY
for(irq = 0; irq < NR_VECTORS; irq++) {
u32 lat = 0;
if(irq_count[irq] == 0) continue;
len += snprintf(buf + len, buf_cnt - len - 1, " IRQ \tusS\tcount\n");
for(lat = 0; lat <= MAX_TRACED_LATENCY; lat++) {
u32 time = irq_max_time_vec[irq][lat];
if(time == 0) continue;
len += snprintf(buf + len, buf_cnt - len - 1, "%3d \t%lu\t%lu\n", irq, lat, time);
}
}
#endif // MAX_TRACED_LATENCY
*start = s_buf + offset;
*eof = (len >= offset) ? 0 : 1;
return eof ? (len - offset) : 0;
}
static int __init irq_time_init(void)
{
unsigned int freq = 0;
#ifdef CONFIG_CPU_FREQ
freq = cpufreq_get(0 /*cpu*/); // in kHz!
#endif
if(freq == 0)
freq = cpu_khz;
freq /= 1000;
processorMegahertz = freq;
if(processorMegahertz <= 0) return -EIO;
procFileEntry = create_proc_read_entry(
"irq_time",
S_IFREG | S_IRUGO,
NULL, /* base entry */
procfile_read, /* read operation */
NULL /* data */
);
if(procFileEntry == NULL) return -ENOMEM;
return 0;
}
module_init(irq_time_init);
static void __exit irq_time_cleanup(void)
{
remove_proc_entry(procFileEntry->name, procFileEntry->subdir);
}
module_exit(irq_time_cleanup);
MODULE_LICENSE("GPL");
^ permalink raw reply [flat|nested] 5+ messages in thread