* [Adeos-main] [RFC][PATCH 2/2] x86: Add support for ipipe_get_irq_regs
@ 2010-06-03 14:22 Jan Kiszka
2010-06-05 17:40 ` Philippe Gerum
0 siblings, 1 reply; 6+ messages in thread
From: Jan Kiszka @ 2010-06-03 14:22 UTC (permalink / raw)
To: Philippe Gerum; +Cc: adeos-main
From: Jan Kiszka <jan.kiszka@domain.hid>
Implement the x86 arch bits for ipipe_get_irq_regs support. This allows
to drop __ipipe_tick_regs and use the new service instead.
Signed-off-by: Jan Kiszka <jan.kiszka@domain.hid>
---
arch/x86/include/asm/ipipe.h | 43 +++++++++++++++++++++++++++++++++++++-
arch/x86/include/asm/ipipe_32.h | 2 +-
arch/x86/include/asm/ipipe_64.h | 2 +-
arch/x86/kernel/ipipe.c | 26 +++--------------------
4 files changed, 47 insertions(+), 26 deletions(-)
diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h
index 4d711dd..971b3f3 100644
--- a/arch/x86/include/asm/ipipe.h
+++ b/arch/x86/include/asm/ipipe.h
@@ -31,8 +31,6 @@
#define IPIPE_PATCH_NUMBER 4
#endif
-DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
-
static inline unsigned __ipipe_get_irq_vector(int irq)
{
#ifdef CONFIG_X86_IO_APIC
@@ -153,4 +151,45 @@ int __ipipe_check_tickdev(const char *devname);
#define __ipipe_move_root_irq(irq) do { } while (0)
#endif /* !(CONFIG_SMP && CONFIG_IPIPE) */
+static inline void
+__ipipe_setup_irq_regs(struct pt_regs **orig_regs, struct pt_regs *saved_regs)
+{
+ *orig_regs = ipipe_get_irq_regs();
+
+ if (*orig_regs)
+ saved_regs->orig_ax = (*orig_regs)->orig_ax;
+ else {
+ saved_regs->flags = X86_EFLAGS_IF;
+ saved_regs->cs = __KERNEL_CS;
+#ifdef CONFIG_X86_32
+ saved_regs->ss = __KERNEL_DS;
+ __asm__ __volatile__ ("here: movl $here, %0\n\t"
+ "movl %%ebp, %1\n\t"
+ "movl %%esp, %2\n\t"
+ : "=m" (saved_regs->ip),
+ "=m" (saved_regs->bp),
+ "=m" (saved_regs->sp));
+#else /* CONFIG_X86_64 */
+ saved_regs->ss = 0;
+ __asm__ __volatile__ ("here: movq $here, %0\n\t"
+ "movq %%rbp, %1\n\t"
+ "movq %%rsp, %2\n\t"
+ : "=m" (saved_regs->ip),
+ "=m" (saved_regs->bp),
+ "=m" (saved_regs->sp));
+#endif /* CONFIG_X86_64 */
+ __ipipe_get_cpu_var(ipipe_irq_regs) = saved_regs;
+ }
+}
+
+static inline void
+__ipipe_cleanup_irq_regs(struct pt_regs *orig_regs,
+ struct pt_regs *saved_regs)
+{
+ if (orig_regs)
+ orig_regs->orig_ax = saved_regs->orig_ax;
+ else
+ __ipipe_get_cpu_var(ipipe_irq_regs) = NULL;
+}
+
#endif /* !__X86_IPIPE_H */
diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h
index 8d1f4b5..ce3d417 100644
--- a/arch/x86/include/asm/ipipe_32.h
+++ b/arch/x86/include/asm/ipipe_32.h
@@ -65,7 +65,7 @@ void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
static inline void __ipipe_call_root_xirq_handler(unsigned irq,
ipipe_irq_handler_t handler)
{
- struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
+ struct pt_regs *regs = ipipe_get_irq_regs();
regs->orig_ax = ~__ipipe_get_irq_vector(irq);
diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h
index bc427b8..4452662 100644
--- a/arch/x86/include/asm/ipipe_64.h
+++ b/arch/x86/include/asm/ipipe_64.h
@@ -63,7 +63,7 @@ void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
static inline void __ipipe_call_root_xirq_handler(unsigned irq,
void (*handler)(unsigned, void *))
{
- struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
+ struct pt_regs *regs = ipipe_get_irq_regs();
regs->orig_ax = ~__ipipe_get_irq_vector(irq);
diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c
index 521ec53..23b6908 100644
--- a/arch/x86/kernel/ipipe.c
+++ b/arch/x86/kernel/ipipe.c
@@ -900,11 +900,14 @@ int __ipipe_syscall_root(struct pt_regs *regs)
*/
int __ipipe_handle_irq(struct pt_regs *regs)
{
+ struct pt_regs *old_regs = __ipipe_get_cpu_var(ipipe_irq_regs);
struct ipipe_domain *this_domain, *next_domain;
unsigned int vector = regs->orig_ax, irq;
struct list_head *head, *pos;
int m_ack;
+ __ipipe_get_cpu_var(ipipe_irq_regs) = regs;
+
if ((long)regs->orig_ax < 0) {
vector = ~vector;
#ifdef CONFIG_X86_LOCAL_APIC
@@ -976,28 +979,7 @@ int __ipipe_handle_irq(struct pt_regs *regs)
__ipipe_walk_pipeline(head);
finalize_nosync:
-
- /*
- * Given our deferred dispatching model for regular IRQs, we
- * only record CPU regs for the last timer interrupt, so that
- * the timer handler charges CPU times properly. It is assumed
- * that other interrupt handlers don't actually care for such
- * information.
- */
-
- if (irq == __ipipe_tick_irq) {
- struct pt_regs *tick_regs = &__raw_get_cpu_var(__ipipe_tick_regs);
- tick_regs->flags = regs->flags;
- tick_regs->cs = regs->cs;
- tick_regs->ip = regs->ip;
- tick_regs->bp = regs->bp;
-#ifdef CONFIG_X86_64
- tick_regs->ss = regs->ss;
- tick_regs->sp = regs->sp;
-#endif
- if (!ipipe_root_domain_p)
- tick_regs->flags &= ~X86_EFLAGS_IF;
- }
+ __ipipe_get_cpu_var(ipipe_irq_regs) = old_regs;
if (!ipipe_root_domain_p ||
test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [Adeos-main] [RFC][PATCH 2/2] x86: Add support for ipipe_get_irq_regs
2010-06-03 14:22 [Adeos-main] [RFC][PATCH 2/2] x86: Add support for ipipe_get_irq_regs Jan Kiszka
@ 2010-06-05 17:40 ` Philippe Gerum
2010-06-05 18:37 ` Jan Kiszka
0 siblings, 1 reply; 6+ messages in thread
From: Philippe Gerum @ 2010-06-05 17:40 UTC (permalink / raw)
To: Jan Kiszka; +Cc: adeos-main
On Thu, 2010-06-03 at 16:22 +0200, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@domain.hid>
>
> Implement the x86 arch bits for ipipe_get_irq_regs support. This allows
> to drop __ipipe_tick_regs and use the new service instead.
I'm unsure whether this patch would actually replace __ipipe_tick_regs
properly, particularly regarding how the profiling code works.
But I'm now convinced that we are on the wrong track, with trying to
track the IRQ frame, for the purpose we discussed on the Xenomai mailing
list. In fact, Gilles already told us about the best approach, we did
not pay attention enough, though:
https://mail.gna.org/public/xenomai-help/2010-06/msg00033.html
Basically, we want to get a hold of the register frame of a task context
before it returns to user-space, so that we can do some fixups.
Therefore, the best option is clearly to add a special event, that the
pipeline would dispatch upon request, when:
- __ipipe_grab/handle_irq is about to return to userland
- __ipipe_syscall_root is about to do the same
In those contexts, we do have the _real_ register frame for the context,
which removes the restriction on faked IRQ frames induced by the
deferred interrupt model. We could then use that feature, not only for
having a more graceful watchdog, but for allowing Xenomai's real-time
signals to preempt syscall-less code as well.
I worked on this lately, and eventually coupled that event to a
generalized support for forcing userland to run a kernel exit. I'll post
more info to the Xenomai list.
>
> Signed-off-by: Jan Kiszka <jan.kiszka@domain.hid>
> ---
> arch/x86/include/asm/ipipe.h | 43 +++++++++++++++++++++++++++++++++++++-
> arch/x86/include/asm/ipipe_32.h | 2 +-
> arch/x86/include/asm/ipipe_64.h | 2 +-
> arch/x86/kernel/ipipe.c | 26 +++--------------------
> 4 files changed, 47 insertions(+), 26 deletions(-)
>
> diff --git a/arch/x86/include/asm/ipipe.h b/arch/x86/include/asm/ipipe.h
> index 4d711dd..971b3f3 100644
> --- a/arch/x86/include/asm/ipipe.h
> +++ b/arch/x86/include/asm/ipipe.h
> @@ -31,8 +31,6 @@
> #define IPIPE_PATCH_NUMBER 4
> #endif
>
> -DECLARE_PER_CPU(struct pt_regs, __ipipe_tick_regs);
> -
> static inline unsigned __ipipe_get_irq_vector(int irq)
> {
> #ifdef CONFIG_X86_IO_APIC
> @@ -153,4 +151,45 @@ int __ipipe_check_tickdev(const char *devname);
> #define __ipipe_move_root_irq(irq) do { } while (0)
> #endif /* !(CONFIG_SMP && CONFIG_IPIPE) */
>
> +static inline void
> +__ipipe_setup_irq_regs(struct pt_regs **orig_regs, struct pt_regs *saved_regs)
> +{
> + *orig_regs = ipipe_get_irq_regs();
> +
> + if (*orig_regs)
> + saved_regs->orig_ax = (*orig_regs)->orig_ax;
> + else {
> + saved_regs->flags = X86_EFLAGS_IF;
> + saved_regs->cs = __KERNEL_CS;
> +#ifdef CONFIG_X86_32
> + saved_regs->ss = __KERNEL_DS;
> + __asm__ __volatile__ ("here: movl $here, %0\n\t"
> + "movl %%ebp, %1\n\t"
> + "movl %%esp, %2\n\t"
> + : "=m" (saved_regs->ip),
> + "=m" (saved_regs->bp),
> + "=m" (saved_regs->sp));
> +#else /* CONFIG_X86_64 */
> + saved_regs->ss = 0;
> + __asm__ __volatile__ ("here: movq $here, %0\n\t"
> + "movq %%rbp, %1\n\t"
> + "movq %%rsp, %2\n\t"
> + : "=m" (saved_regs->ip),
> + "=m" (saved_regs->bp),
> + "=m" (saved_regs->sp));
> +#endif /* CONFIG_X86_64 */
> + __ipipe_get_cpu_var(ipipe_irq_regs) = saved_regs;
> + }
> +}
> +
> +static inline void
> +__ipipe_cleanup_irq_regs(struct pt_regs *orig_regs,
> + struct pt_regs *saved_regs)
> +{
> + if (orig_regs)
> + orig_regs->orig_ax = saved_regs->orig_ax;
> + else
> + __ipipe_get_cpu_var(ipipe_irq_regs) = NULL;
> +}
> +
> #endif /* !__X86_IPIPE_H */
> diff --git a/arch/x86/include/asm/ipipe_32.h b/arch/x86/include/asm/ipipe_32.h
> index 8d1f4b5..ce3d417 100644
> --- a/arch/x86/include/asm/ipipe_32.h
> +++ b/arch/x86/include/asm/ipipe_32.h
> @@ -65,7 +65,7 @@ void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
> static inline void __ipipe_call_root_xirq_handler(unsigned irq,
> ipipe_irq_handler_t handler)
> {
> - struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
> + struct pt_regs *regs = ipipe_get_irq_regs();
>
> regs->orig_ax = ~__ipipe_get_irq_vector(irq);
>
> diff --git a/arch/x86/include/asm/ipipe_64.h b/arch/x86/include/asm/ipipe_64.h
> index bc427b8..4452662 100644
> --- a/arch/x86/include/asm/ipipe_64.h
> +++ b/arch/x86/include/asm/ipipe_64.h
> @@ -63,7 +63,7 @@ void __ipipe_end_edge_irq(unsigned irq, struct irq_desc *desc);
> static inline void __ipipe_call_root_xirq_handler(unsigned irq,
> void (*handler)(unsigned, void *))
> {
> - struct pt_regs *regs = &__raw_get_cpu_var(__ipipe_tick_regs);
> + struct pt_regs *regs = ipipe_get_irq_regs();
>
> regs->orig_ax = ~__ipipe_get_irq_vector(irq);
>
> diff --git a/arch/x86/kernel/ipipe.c b/arch/x86/kernel/ipipe.c
> index 521ec53..23b6908 100644
> --- a/arch/x86/kernel/ipipe.c
> +++ b/arch/x86/kernel/ipipe.c
> @@ -900,11 +900,14 @@ int __ipipe_syscall_root(struct pt_regs *regs)
> */
> int __ipipe_handle_irq(struct pt_regs *regs)
> {
> + struct pt_regs *old_regs = __ipipe_get_cpu_var(ipipe_irq_regs);
> struct ipipe_domain *this_domain, *next_domain;
> unsigned int vector = regs->orig_ax, irq;
> struct list_head *head, *pos;
> int m_ack;
>
> + __ipipe_get_cpu_var(ipipe_irq_regs) = regs;
> +
> if ((long)regs->orig_ax < 0) {
> vector = ~vector;
> #ifdef CONFIG_X86_LOCAL_APIC
> @@ -976,28 +979,7 @@ int __ipipe_handle_irq(struct pt_regs *regs)
> __ipipe_walk_pipeline(head);
>
> finalize_nosync:
> -
> - /*
> - * Given our deferred dispatching model for regular IRQs, we
> - * only record CPU regs for the last timer interrupt, so that
> - * the timer handler charges CPU times properly. It is assumed
> - * that other interrupt handlers don't actually care for such
> - * information.
> - */
> -
> - if (irq == __ipipe_tick_irq) {
> - struct pt_regs *tick_regs = &__raw_get_cpu_var(__ipipe_tick_regs);
> - tick_regs->flags = regs->flags;
> - tick_regs->cs = regs->cs;
> - tick_regs->ip = regs->ip;
> - tick_regs->bp = regs->bp;
> -#ifdef CONFIG_X86_64
> - tick_regs->ss = regs->ss;
> - tick_regs->sp = regs->sp;
> -#endif
> - if (!ipipe_root_domain_p)
> - tick_regs->flags &= ~X86_EFLAGS_IF;
> - }
> + __ipipe_get_cpu_var(ipipe_irq_regs) = old_regs;
>
> if (!ipipe_root_domain_p ||
> test_bit(IPIPE_STALL_FLAG, &ipipe_root_cpudom_var(status)))
--
Philippe.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Adeos-main] [RFC][PATCH 2/2] x86: Add support for ipipe_get_irq_regs
2010-06-05 17:40 ` Philippe Gerum
@ 2010-06-05 18:37 ` Jan Kiszka
2010-06-05 19:23 ` Gilles Chanteperdrix
0 siblings, 1 reply; 6+ messages in thread
From: Jan Kiszka @ 2010-06-05 18:37 UTC (permalink / raw)
To: Philippe Gerum; +Cc: adeos-main
[-- Attachment #1: Type: text/plain, Size: 2160 bytes --]
Philippe Gerum wrote:
> On Thu, 2010-06-03 at 16:22 +0200, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@domain.hid>
>>
>> Implement the x86 arch bits for ipipe_get_irq_regs support. This allows
>> to drop __ipipe_tick_regs and use the new service instead.
>
> I'm unsure whether this patch would actually replace __ipipe_tick_regs
> properly, particularly regarding how the profiling code works.
tick_regs are a "workaround", this approach appears to me way closer to
how native works.
>
> But I'm now convinced that we are on the wrong track, with trying to
> track the IRQ frame, for the purpose we discussed on the Xenomai mailing
> list. In fact, Gilles already told us about the best approach, we did
> not pay attention enough, though:
> https://mail.gna.org/public/xenomai-help/2010-06/msg00033.html
>
> Basically, we want to get a hold of the register frame of a task context
> before it returns to user-space, so that we can do some fixups.
> Therefore, the best option is clearly to add a special event, that the
> pipeline would dispatch upon request, when:
>
> - __ipipe_grab/handle_irq is about to return to userland
> - __ipipe_syscall_root is about to do the same
>
> In those contexts, we do have the _real_ register frame for the context,
> which removes the restriction on faked IRQ frames induced by the
> deferred interrupt model.
The register frame is incomplete, but it is very real. It is what you
get when an interrupt fires right after the kernel re-enabled IRQ delivery.
> We could then use that feature, not only for
> having a more graceful watchdog, but for allowing Xenomai's real-time
> signals to preempt syscall-less code as well.
>
> I worked on this lately, and eventually coupled that event to a
> generalized support for forcing userland to run a kernel exit. I'll post
> more info to the Xenomai list.
That said, I'm not opposed to some kind of "I-pipe user space return
notifiers", specifically as they will include the syscall path. Maybe we
can still save bits of this approach to overcome the tick_regs and
related patches of the profiling code.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Adeos-main] [RFC][PATCH 2/2] x86: Add support for ipipe_get_irq_regs
2010-06-05 18:37 ` Jan Kiszka
@ 2010-06-05 19:23 ` Gilles Chanteperdrix
2010-06-05 20:48 ` Jan Kiszka
0 siblings, 1 reply; 6+ messages in thread
From: Gilles Chanteperdrix @ 2010-06-05 19:23 UTC (permalink / raw)
To: Jan Kiszka; +Cc: adeos-main, Philippe Gerum
Jan Kiszka wrote:
> Philippe Gerum wrote:
>> On Thu, 2010-06-03 at 16:22 +0200, Jan Kiszka wrote:
>>> From: Jan Kiszka <jan.kiszka@domain.hid>
>>>
>>> Implement the x86 arch bits for ipipe_get_irq_regs support. This allows
>>> to drop __ipipe_tick_regs and use the new service instead.
>> I'm unsure whether this patch would actually replace __ipipe_tick_regs
>> properly, particularly regarding how the profiling code works.
>
> tick_regs are a "workaround", this approach appears to me way closer to
> how native works.
We do not want ipipe_tick_regs to work the way the native kernel works.
We want to fool the kernel by passing it the value of the registers at
the moment of the real timer tick so that its accounting works more or
less reliably. When the kernel used the real registers it got the
accounting wrong.
--
Gilles.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Adeos-main] [RFC][PATCH 2/2] x86: Add support for ipipe_get_irq_regs
2010-06-05 19:23 ` Gilles Chanteperdrix
@ 2010-06-05 20:48 ` Jan Kiszka
2010-06-05 21:09 ` Philippe Gerum
0 siblings, 1 reply; 6+ messages in thread
From: Jan Kiszka @ 2010-06-05 20:48 UTC (permalink / raw)
To: Gilles Chanteperdrix; +Cc: adeos-main, Philippe Gerum
[-- Attachment #1: Type: text/plain, Size: 1283 bytes --]
Gilles Chanteperdrix wrote:
> Jan Kiszka wrote:
>> Philippe Gerum wrote:
>>> On Thu, 2010-06-03 at 16:22 +0200, Jan Kiszka wrote:
>>>> From: Jan Kiszka <jan.kiszka@domain.hid>
>>>>
>>>> Implement the x86 arch bits for ipipe_get_irq_regs support. This allows
>>>> to drop __ipipe_tick_regs and use the new service instead.
>>> I'm unsure whether this patch would actually replace __ipipe_tick_regs
>>> properly, particularly regarding how the profiling code works.
>> tick_regs are a "workaround", this approach appears to me way closer to
>> how native works.
>
> We do not want ipipe_tick_regs to work the way the native kernel works.
> We want to fool the kernel by passing it the value of the registers at
> the moment of the real timer tick so that its accounting works more or
> less reliably. When the kernel used the real registers it got the
> accounting wrong.
Right, though current tick_regs approach is not accurate either: the
preempted task gets the full time slice accounted, neglecting the actual
schedule inside the preempting domain.
But to get this right, I-pipe would have to update the stats on behalf
of Linux, maybe even supported by Xenomai delivering the current shadow
task. Not sure if it was worth the effort, though.
Jan
[-- Attachment #2: OpenPGP digital signature --]
[-- Type: application/pgp-signature, Size: 257 bytes --]
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [Adeos-main] [RFC][PATCH 2/2] x86: Add support for ipipe_get_irq_regs
2010-06-05 20:48 ` Jan Kiszka
@ 2010-06-05 21:09 ` Philippe Gerum
0 siblings, 0 replies; 6+ messages in thread
From: Philippe Gerum @ 2010-06-05 21:09 UTC (permalink / raw)
To: Jan Kiszka; +Cc: adeos-main
On Sat, 2010-06-05 at 22:48 +0200, Jan Kiszka wrote:
> Gilles Chanteperdrix wrote:
> > Jan Kiszka wrote:
> >> Philippe Gerum wrote:
> >>> On Thu, 2010-06-03 at 16:22 +0200, Jan Kiszka wrote:
> >>>> From: Jan Kiszka <jan.kiszka@domain.hid>
> >>>>
> >>>> Implement the x86 arch bits for ipipe_get_irq_regs support. This allows
> >>>> to drop __ipipe_tick_regs and use the new service instead.
> >>> I'm unsure whether this patch would actually replace __ipipe_tick_regs
> >>> properly, particularly regarding how the profiling code works.
> >> tick_regs are a "workaround", this approach appears to me way closer to
> >> how native works.
> >
> > We do not want ipipe_tick_regs to work the way the native kernel works.
> > We want to fool the kernel by passing it the value of the registers at
> > the moment of the real timer tick so that its accounting works more or
> > less reliably. When the kernel used the real registers it got the
> > accounting wrong.
>
> Right, though current tick_regs approach is not accurate either: the
> preempted task gets the full time slice accounted, neglecting the actual
> schedule inside the preempting domain.
>
Indeed. This trade-off was made back in 2002.
>
> Jan
>
> _______________________________________________
> Adeos-main mailing list
> Adeos-main@domain.hid
> https://mail.gna.org/listinfo/adeos-main
--
Philippe.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2010-06-05 21:09 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-06-03 14:22 [Adeos-main] [RFC][PATCH 2/2] x86: Add support for ipipe_get_irq_regs Jan Kiszka
2010-06-05 17:40 ` Philippe Gerum
2010-06-05 18:37 ` Jan Kiszka
2010-06-05 19:23 ` Gilles Chanteperdrix
2010-06-05 20:48 ` Jan Kiszka
2010-06-05 21:09 ` Philippe Gerum
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.